1. Modify the recording format of the Hisilicon Kunpeng9xx common errors in the rasdaemon.
2. In the ras-mc-ctl,
2.1. Modify the error statistics for the HiSilicon Kunpeng9xx common errors to display
the statistics and error info based on the module and the error severity..
2.2. Add support to display the vendor-errors for a specified module.
2.3. Add printing usage if the necessary parameters are not passed for the
vendor-errors options.
2.4. Reformat error info of the HiSilicon Kunpeng920.
2.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
(cherry picked from commit ca01a3db7b2b002855070d02a095296680325354)
225 lines
8.4 KiB
Diff
225 lines
8.4 KiB
Diff
From 62218a9c3aec44330ce3b77f3634c788b6e6f60c Mon Sep 17 00:00:00 2001
|
|
From: Shiju Jose <shiju.jose@huawei.com>
|
|
Date: Wed, 2 Mar 2022 12:20:40 +0000
|
|
Subject: [PATCH 1/6] rasdaemon: Modify recording Hisilicon common error data
|
|
|
|
The error statistics for the Hisilicon common
|
|
error need to do based on module, error severity etc.
|
|
|
|
Modify recording Hisilicon common error data as separate fields
|
|
in the sql db table instead of the combined single field.
|
|
|
|
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
|
---
|
|
non-standard-hisilicon.c | 122 ++++++++++++++++++++++++++++++++-------
|
|
1 file changed, 102 insertions(+), 20 deletions(-)
|
|
|
|
diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c
|
|
index 1432163..dc69d46 100644
|
|
--- a/non-standard-hisilicon.c
|
|
+++ b/non-standard-hisilicon.c
|
|
@@ -17,6 +17,7 @@
|
|
#include "non-standard-hisilicon.h"
|
|
|
|
#define HISI_BUF_LEN 2048
|
|
+#define HISI_PCIE_INFO_BUF_LEN 256
|
|
|
|
struct hisi_common_error_section {
|
|
uint32_t val_bits;
|
|
@@ -63,12 +64,25 @@ enum {
|
|
enum {
|
|
HISI_COMMON_FIELD_ID,
|
|
HISI_COMMON_FIELD_TIMESTAMP,
|
|
- HISI_COMMON_FIELD_ERR_INFO,
|
|
+ HISI_COMMON_FIELD_VERSION,
|
|
+ HISI_COMMON_FIELD_SOC_ID,
|
|
+ HISI_COMMON_FIELD_SOCKET_ID,
|
|
+ HISI_COMMON_FIELD_TOTEM_ID,
|
|
+ HISI_COMMON_FIELD_NIMBUS_ID,
|
|
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
|
|
+ HISI_COMMON_FIELD_MODULE_ID,
|
|
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
|
|
+ HISI_COMMON_FIELD_CORE_ID,
|
|
+ HISI_COMMON_FIELD_PORT_ID,
|
|
+ HISI_COMMON_FIELD_ERR_TYPE,
|
|
+ HISI_COMMON_FIELD_PCIE_INFO,
|
|
+ HISI_COMMON_FIELD_ERR_SEVERITY,
|
|
HISI_COMMON_FIELD_REGS_DUMP,
|
|
};
|
|
|
|
struct hisi_event {
|
|
char error_msg[HISI_BUF_LEN];
|
|
+ char pcie_info[HISI_PCIE_INFO_BUF_LEN];
|
|
char reg_msg[HISI_BUF_LEN];
|
|
};
|
|
|
|
@@ -134,12 +148,24 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name)
|
|
static const struct db_fields hisi_common_section_fields[] = {
|
|
{ .name = "id", .type = "INTEGER PRIMARY KEY" },
|
|
{ .name = "timestamp", .type = "TEXT" },
|
|
- { .name = "err_info", .type = "TEXT" },
|
|
+ { .name = "version", .type = "INTEGER" },
|
|
+ { .name = "soc_id", .type = "INTEGER" },
|
|
+ { .name = "socket_id", .type = "INTEGER" },
|
|
+ { .name = "totem_id", .type = "INTEGER" },
|
|
+ { .name = "nimbus_id", .type = "INTEGER" },
|
|
+ { .name = "sub_system_id", .type = "INTEGER" },
|
|
+ { .name = "module_id", .type = "TEXT" },
|
|
+ { .name = "sub_module_id", .type = "INTEGER" },
|
|
+ { .name = "core_id", .type = "INTEGER" },
|
|
+ { .name = "port_id", .type = "INTEGER" },
|
|
+ { .name = "err_type", .type = "INTEGER" },
|
|
+ { .name = "pcie_info", .type = "TEXT" },
|
|
+ { .name = "err_severity", .type = "TEXT" },
|
|
{ .name = "regs_dump", .type = "TEXT" },
|
|
};
|
|
|
|
static const struct db_table_descriptor hisi_common_section_tab = {
|
|
- .name = "hisi_common_section",
|
|
+ .name = "hisi_common_section_v2",
|
|
.fields = hisi_common_section_fields,
|
|
.num_fields = ARRAY_SIZE(hisi_common_section_fields),
|
|
};
|
|
@@ -199,12 +225,20 @@ static const char* get_soc_desc(uint8_t soc_id)
|
|
return soc_desc[soc_id];
|
|
}
|
|
|
|
-static void decode_module(struct hisi_event *event, uint8_t module_id)
|
|
+static void decode_module(struct ras_ns_ev_decoder *ev_decoder,
|
|
+ struct hisi_event *event, uint8_t module_id)
|
|
{
|
|
- if (module_id >= sizeof(module_name)/sizeof(char *))
|
|
+ if (module_id >= sizeof(module_name)/sizeof(char *)) {
|
|
HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id);
|
|
- else
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
+ HISI_COMMON_FIELD_MODULE_ID,
|
|
+ 0, "unknown");
|
|
+ } else {
|
|
HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
+ HISI_COMMON_FIELD_MODULE_ID,
|
|
+ 0, module_name[module_id]);
|
|
+ }
|
|
}
|
|
|
|
static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
|
|
@@ -212,43 +246,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
|
|
struct hisi_event *event)
|
|
{
|
|
HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version);
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID))
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_VERSION,
|
|
+ err->version, NULL);
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id));
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_SOC_ID,
|
|
+ err->soc_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_SOCKET_ID,
|
|
+ err->socket_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_TOTEM_ID,
|
|
+ err->totem_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_NIMBUS_ID,
|
|
+ err->nimbus_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
|
|
+ err->subsystem_id, NULL);
|
|
+ }
|
|
|
|
if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID))
|
|
- decode_module(event, err->module_id);
|
|
+ decode_module(ev_decoder, event, err->module_id);
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
|
|
+ err->submodule_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_CORE_ID,
|
|
+ err->core_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) {
|
|
HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_PORT_ID,
|
|
+ err->port_id, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) {
|
|
HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
|
|
+ HISI_COMMON_FIELD_ERR_TYPE,
|
|
+ err->err_type, NULL);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) {
|
|
HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x",
|
|
err->pcie_info.segment, err->pcie_info.bus,
|
|
err->pcie_info.device, err->pcie_info.function);
|
|
+ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x",
|
|
+ err->pcie_info.segment, err->pcie_info.bus,
|
|
+ err->pcie_info.device, err->pcie_info.function);
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
+ HISI_COMMON_FIELD_PCIE_INFO,
|
|
+ 0, event->pcie_info);
|
|
+ }
|
|
|
|
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY))
|
|
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) {
|
|
HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity));
|
|
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
+ HISI_COMMON_FIELD_ERR_SEVERITY,
|
|
+ 0, err_severity(err->err_severity));
|
|
+ }
|
|
|
|
HISI_SNPRINTF(event->error_msg, "]");
|
|
}
|
|
@@ -293,8 +377,6 @@ static int decode_hisi_common_section(struct ras_events *ras,
|
|
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
HISI_COMMON_FIELD_TIMESTAMP,
|
|
0, event->timestamp);
|
|
- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg);
|
|
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
|
|
HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg);
|
|
step_vendor_data_tab(ev_decoder, "hisi_common_section_tab");
|
|
--
|
|
2.25.1
|
|
|