rasdaemon: Modify the format of the Hisilicon Kunpeng9xx common error records and improvements in the ras-mc-ctl for the Hisilicon Kunpeng9xx errors

1. Modify the recording format of the Hisilicon Kunpeng9xx common errors in the rasdaemon.
2. In the ras-mc-ctl,
2.1. Modify the error statistics for the HiSilicon Kunpeng9xx common errors to display
     the statistics and error info based on the module and the error severity..
2.2. Add support to display the vendor-errors for a specified module.
2.3. Add printing usage if the necessary parameters are not passed for the
     vendor-errors options.
2.4. Reformat error info of the HiSilicon Kunpeng920.
2.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
(cherry picked from commit ca01a3db7b2b002855070d02a095296680325354)
This commit is contained in:
Shiju Jose 2022-03-07 13:33:34 +00:00 committed by openeuler-sync-bot
parent eab7751ce2
commit d12d3023a9
7 changed files with 779 additions and 1 deletions

View File

@ -0,0 +1,224 @@
From 62218a9c3aec44330ce3b77f3634c788b6e6f60c Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Wed, 2 Mar 2022 12:20:40 +0000
Subject: [PATCH 1/6] rasdaemon: Modify recording Hisilicon common error data
The error statistics for the Hisilicon common
error need to do based on module, error severity etc.
Modify recording Hisilicon common error data as separate fields
in the sql db table instead of the combined single field.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
non-standard-hisilicon.c | 122 ++++++++++++++++++++++++++++++++-------
1 file changed, 102 insertions(+), 20 deletions(-)
diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c
index 1432163..dc69d46 100644
--- a/non-standard-hisilicon.c
+++ b/non-standard-hisilicon.c
@@ -17,6 +17,7 @@
#include "non-standard-hisilicon.h"
#define HISI_BUF_LEN 2048
+#define HISI_PCIE_INFO_BUF_LEN 256
struct hisi_common_error_section {
uint32_t val_bits;
@@ -63,12 +64,25 @@ enum {
enum {
HISI_COMMON_FIELD_ID,
HISI_COMMON_FIELD_TIMESTAMP,
- HISI_COMMON_FIELD_ERR_INFO,
+ HISI_COMMON_FIELD_VERSION,
+ HISI_COMMON_FIELD_SOC_ID,
+ HISI_COMMON_FIELD_SOCKET_ID,
+ HISI_COMMON_FIELD_TOTEM_ID,
+ HISI_COMMON_FIELD_NIMBUS_ID,
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
+ HISI_COMMON_FIELD_MODULE_ID,
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
+ HISI_COMMON_FIELD_CORE_ID,
+ HISI_COMMON_FIELD_PORT_ID,
+ HISI_COMMON_FIELD_ERR_TYPE,
+ HISI_COMMON_FIELD_PCIE_INFO,
+ HISI_COMMON_FIELD_ERR_SEVERITY,
HISI_COMMON_FIELD_REGS_DUMP,
};
struct hisi_event {
char error_msg[HISI_BUF_LEN];
+ char pcie_info[HISI_PCIE_INFO_BUF_LEN];
char reg_msg[HISI_BUF_LEN];
};
@@ -134,12 +148,24 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name)
static const struct db_fields hisi_common_section_fields[] = {
{ .name = "id", .type = "INTEGER PRIMARY KEY" },
{ .name = "timestamp", .type = "TEXT" },
- { .name = "err_info", .type = "TEXT" },
+ { .name = "version", .type = "INTEGER" },
+ { .name = "soc_id", .type = "INTEGER" },
+ { .name = "socket_id", .type = "INTEGER" },
+ { .name = "totem_id", .type = "INTEGER" },
+ { .name = "nimbus_id", .type = "INTEGER" },
+ { .name = "sub_system_id", .type = "INTEGER" },
+ { .name = "module_id", .type = "TEXT" },
+ { .name = "sub_module_id", .type = "INTEGER" },
+ { .name = "core_id", .type = "INTEGER" },
+ { .name = "port_id", .type = "INTEGER" },
+ { .name = "err_type", .type = "INTEGER" },
+ { .name = "pcie_info", .type = "TEXT" },
+ { .name = "err_severity", .type = "TEXT" },
{ .name = "regs_dump", .type = "TEXT" },
};
static const struct db_table_descriptor hisi_common_section_tab = {
- .name = "hisi_common_section",
+ .name = "hisi_common_section_v2",
.fields = hisi_common_section_fields,
.num_fields = ARRAY_SIZE(hisi_common_section_fields),
};
@@ -199,12 +225,20 @@ static const char* get_soc_desc(uint8_t soc_id)
return soc_desc[soc_id];
}
-static void decode_module(struct hisi_event *event, uint8_t module_id)
+static void decode_module(struct ras_ns_ev_decoder *ev_decoder,
+ struct hisi_event *event, uint8_t module_id)
{
- if (module_id >= sizeof(module_name)/sizeof(char *))
+ if (module_id >= sizeof(module_name)/sizeof(char *)) {
HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id);
- else
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_MODULE_ID,
+ 0, "unknown");
+ } else {
HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_MODULE_ID,
+ 0, module_name[module_id]);
+ }
}
static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
@@ -212,43 +246,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
struct hisi_event *event)
{
HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version);
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID))
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_VERSION,
+ err->version, NULL);
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) {
HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id));
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SOC_ID,
+ err->soc_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) {
HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SOCKET_ID,
+ err->socket_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) {
HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_TOTEM_ID,
+ err->totem_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) {
HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_NIMBUS_ID,
+ err->nimbus_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) {
HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
+ err->subsystem_id, NULL);
+ }
if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID))
- decode_module(event, err->module_id);
+ decode_module(ev_decoder, event, err->module_id);
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) {
HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
+ err->submodule_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) {
HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_CORE_ID,
+ err->core_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) {
HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_PORT_ID,
+ err->port_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) {
HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_ERR_TYPE,
+ err->err_type, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) {
HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x",
err->pcie_info.segment, err->pcie_info.bus,
err->pcie_info.device, err->pcie_info.function);
+ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x",
+ err->pcie_info.segment, err->pcie_info.bus,
+ err->pcie_info.device, err->pcie_info.function);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_PCIE_INFO,
+ 0, event->pcie_info);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) {
HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity));
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_ERR_SEVERITY,
+ 0, err_severity(err->err_severity));
+ }
HISI_SNPRINTF(event->error_msg, "]");
}
@@ -293,8 +377,6 @@ static int decode_hisi_common_section(struct ras_events *ras,
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
HISI_COMMON_FIELD_TIMESTAMP,
0, event->timestamp);
- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg);
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg);
step_vendor_data_tab(ev_decoder, "hisi_common_section_tab");
--
2.25.1

View File

@ -0,0 +1,97 @@
From 4d9f297028ce3116eaf574b2570d71a4ed666b7d Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Thu, 24 Feb 2022 18:02:14 +0000
Subject: [PATCH 2/6] rasdaemon: ras-mc-ctl: Modify error statistics for
HiSilicon Kunpeng9xx common errors
Modify the error statistics for the HiSilicon Kunpeng9xx platforms common errors
to display the statistics and error info based on the module and the error severity.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 1e3aeb7..22ba1fd 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1535,7 +1535,7 @@ sub vendor_errors_summary
require DBI;
my ($num_args, $platform_id);
my ($query, $query_handle, $count, $out);
- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info);
+ my ($module_id, $sub_module_id, $err_severity, $err_sev);
$num_args = $#ARGV + 1;
$platform_id = 0;
@@ -1612,13 +1612,18 @@ sub vendor_errors_summary
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
- $query = "select err_info, count(*) from hisi_common_section";
+ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($err_info, $count));
+ $query_handle->bind_columns(\($err_severity, $module_id, $count));
$out = "";
+ $err_sev = "";
while($query_handle->fetch()) {
- $out .= "\terrors: $count\n";
+ if ($err_severity ne $err_sev) {
+ $out .= "$err_severity errors:\n";
+ $err_sev = $err_severity;
+ }
+ $out .= "\t$module_id: $count\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events summary:\n$out\n";
@@ -1636,8 +1641,8 @@ sub vendor_errors
require DBI;
my ($num_args, $platform_id);
my ($query, $query_handle, $id, $timestamp, $out);
- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id);
- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs);
+ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
+ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
$num_args = $#ARGV + 1;
$platform_id = 0;
@@ -1725,15 +1730,28 @@ sub vendor_errors
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id";
+ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs));
+ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp ";
- $out .= "Error Info:$err_info \n" if ($err_info);
- $out .= "Error Registers: $regs\n\n" if ($regs);
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "totem_id=$totem_id, " if ($totem_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs" if ($regs);
+ $out .= "\n\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
--
2.25.1

View File

@ -0,0 +1,56 @@
From eb93d77b417b58cba27799ae85747b8a193cf063 Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Sat, 5 Mar 2022 16:18:55 +0000
Subject: [PATCH 3/6] rasdaemon: ras-mc-ctl: Reformat error info of the
HiSilicon Kunpeng920
Reformat the code to display the error info of HiSilicon Kunpeng920.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 22ba1fd..eeaf885 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1669,8 +1669,9 @@ sub vendor_errors
$out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
$out .= "module_id=$module_id, " if ($module_id);
$out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, \n" if ($err_severity);
- $out .= "Error Registers: $regs\n\n" if ($regs);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n";
@@ -1692,8 +1693,9 @@ sub vendor_errors
$out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
$out .= "module_id=$module_id, " if ($module_id);
$out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, \n" if ($err_severity);
- $out .= "Error Registers: $regs\n\n" if ($regs);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n";
@@ -1717,8 +1719,9 @@ sub vendor_errors
$out .= "core_id=$core_id, " if ($core_id);
$out .= "port_id=$port_id, " if ($port_id);
$out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "err_type=$err_type, \n" if ($err_type);
- $out .= "Error Registers: $regs\n\n" if ($regs);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n";
--
2.25.1

View File

@ -0,0 +1,36 @@
From 623e85c07ab21ccc89ffe2bb444eb000a2664a9d Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Sat, 5 Mar 2022 17:01:35 +0000
Subject: [PATCH 4/6] rasdaemon: ras-mc-ctl: Add printing usage if necessary
parameters are not passed for the HiSilicon vendor-errors options
Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options of the ras-mc-ctl.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 2 ++
1 file changed, 2 insertions(+)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index eeaf885..0e32cb1 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1542,6 +1542,7 @@ sub vendor_errors_summary
if ($num_args ne 0) {
$platform_id = $ARGV[0];
} else {
+ usage(1);
return;
}
@@ -1649,6 +1650,7 @@ sub vendor_errors
if ($num_args ne 0) {
$platform_id = $ARGV[0];
} else {
+ usage(1);
return;
}
--
2.25.1

View File

@ -0,0 +1,198 @@
From 4007c95f8a8d570542ffc11676b619ea5649d0e7 Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Sat, 5 Mar 2022 18:19:38 +0000
Subject: [PATCH 5/6] rasdaemon: ras-mc-ctl: Add support to display the
HiSilicon vendor errors for a specified module
Add support to display the HiSilicon vendor errors for a specified module.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 119 ++++++++++++++++++++++++---------------------
1 file changed, 63 insertions(+), 56 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 0e32cb1..d728300 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -96,7 +96,8 @@ Usage: $prog [OPTIONS...]
--errors Shows the errors stored at the error database.
--error-count Shows the corrected and uncorrected error counts using sysfs.
--vendor-errors-summary <platform-id> Presents a summary of the vendor-specific logged errors.
- --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
+ --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
+ --vendor-errors <platform-id> <module-name> Shows the vendor-specific errors for a specific module stored in the error database.
--vendor-platforms Shows the supported platforms with platform-ids for the vendor-specific errors.
--help This help message.
EOF
@@ -1640,15 +1641,19 @@ sub vendor_errors_summary
sub vendor_errors
{
require DBI;
- my ($num_args, $platform_id);
+ my ($num_args, $platform_id, $module);
my ($query, $query_handle, $id, $timestamp, $out);
my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
$num_args = $#ARGV + 1;
$platform_id = 0;
+ $module = 0;
if ($num_args ne 0) {
$platform_id = $ARGV[0];
+ if ($num_args gt 1) {
+ $module = $ARGV[1];
+ }
} else {
usage(1);
return;
@@ -1664,21 +1669,21 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && ($module eq $module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type1 errors.\n";
}
$query_handle->finish;
@@ -1688,21 +1693,21 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && ($module eq $module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type2 errors.\n";
}
$query_handle->finish;
@@ -1712,23 +1717,23 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "core_id=$core_id, " if ($core_id);
- $out .= "port_id=$port_id, " if ($port_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "err_type=$err_type, " if ($err_type);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($sub_module_id && ($module eq $sub_module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 PCIe controller errors.\n";
}
$query_handle->finish;
}
@@ -1741,22 +1746,24 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "totem_id=$totem_id, " if ($totem_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "core_id=$core_id, " if ($core_id);
- $out .= "port_id=$port_id, " if ($port_id);
- $out .= "err_type=$err_type, " if ($err_type);
- $out .= "pcie_info=$pcie_info, " if ($pcie_info);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs" if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && ($module eq $module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "totem_id=$totem_id, " if ($totem_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs" if ($regs);
+ $out .= "\n\n";
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
--
2.25.1

View File

@ -0,0 +1,148 @@
From 88bf3126312645843152c6c3215b54b120bcc1ec Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Mon, 7 Mar 2022 12:38:45 +0000
Subject: [PATCH 6/6] rasdaemon: ras-mc-ctl: Relocate reading and display
Kunpeng920 errors to under Kunpeng9xx
Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 38 ++++++++++----------------------------
1 file changed, 10 insertions(+), 28 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index d728300..2ab9602 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1527,7 +1527,6 @@ sub errors
# Definitions of the vendor platform IDs.
use constant {
- HISILICON_KUNPENG_920 => "Kunpeng920",
HISILICON_KUNPENG_9XX => "Kunpeng9xx",
};
@@ -1549,8 +1548,8 @@ sub vendor_errors_summary
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {});
- # HiSilicon Kunpeng920 errors
- if ($platform_id eq HISILICON_KUNPENG_920) {
+ # HiSilicon Kunpeng9xx common errors
+ if ($platform_id eq HISILICON_KUNPENG_9XX) {
$query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1565,9 +1564,7 @@ sub vendor_errors_summary
$out .= "\t$module_id: $count\n";
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 OEM type1 error events summary:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type1 errors.\n\n";
+ print "HiSilicon Kunpeng9xx OEM type1 error events summary:\n$out\n";
}
$query_handle->finish;
@@ -1585,9 +1582,7 @@ sub vendor_errors_summary
$out .= "\t$module_id: $count\n";
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 OEM type2 error events summary:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type2 errors.\n\n";
+ print "HiSilicon Kunpeng9xx OEM type2 error events summary:\n$out\n";
}
$query_handle->finish;
@@ -1605,15 +1600,10 @@ sub vendor_errors_summary
$out .= "\t$sub_module_id: $count\n";
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 PCIe controller error events summary:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 PCIe controller errors.\n\n";
+ print "HiSilicon Kunpeng9xx PCIe controller error events summary:\n$out\n";
}
$query_handle->finish;
- }
- # HiSilicon Kunpeng9xx common errors
- if ($platform_id eq HISILICON_KUNPENG_9XX) {
$query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1629,8 +1619,6 @@ sub vendor_errors_summary
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events summary:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng9xx common errors.\n\n";
}
$query_handle->finish;
}
@@ -1661,8 +1649,8 @@ sub vendor_errors
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {});
- # HiSilicon Kunpeng920 errors
- if ($platform_id eq HISILICON_KUNPENG_920) {
+ # HiSilicon Kunpeng9xx common errors
+ if ($platform_id eq HISILICON_KUNPENG_9XX) {
$query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1683,7 +1671,7 @@ sub vendor_errors
}
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n";
+ print "HiSilicon Kunpeng9xx OEM type1 error events:\n$out\n";
}
$query_handle->finish;
@@ -1707,7 +1695,7 @@ sub vendor_errors
}
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n";
+ print "HiSilicon Kunpeng9xx OEM type2 error events:\n$out\n";
}
$query_handle->finish;
@@ -1733,13 +1721,10 @@ sub vendor_errors
}
}
if ($out ne "") {
- print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n";
+ print "HiSilicon Kunpeng9xx PCIe controller error events:\n$out\n";
}
$query_handle->finish;
- }
- # HiSilicon Kunpeng9xx common errors
- if ($platform_id eq HISILICON_KUNPENG_9XX) {
$query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1767,8 +1752,6 @@ sub vendor_errors
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng9xx common errors.\n";
}
$query_handle->finish;
}
@@ -1779,7 +1762,6 @@ sub vendor_errors
sub vendor_platforms
{
print "\nSupported platforms for the vendor-specific errors:\n";
- print "\tHiSilicon Kunpeng920, platform-id=\"", HISILICON_KUNPENG_920, "\"\n";
print "\tHiSilicon Kunpeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n";
print "\n";
}
--
2.25.1

View File

@ -1,6 +1,6 @@
Name: rasdaemon
Version: 0.6.7
Release: 3
Release: 4
License: GPLv2
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
URL: https://github.com/mchehab/rasdaemon.git
@ -29,6 +29,12 @@ Patch7: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch
Patch8: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch
Patch9: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch
Patch10: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch
Patch11: 0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch
Patch12: 0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch
Patch13: 0003-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch
Patch14: 0004-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch
Patch15: 0005-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch
Patch16: 0006-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch
%description
The rasdaemon program is a daemon which monitors the platform
@ -74,6 +80,19 @@ rm INSTALL %{buildroot}/usr/include/*.h
/usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || :
%changelog
* Mon Mar 07 2022 Shiju Jose<shiju.jose@huawei.com> - 0.6.7-4
- Type:feature
- ID:NA
- SUG:NA
- DESC:
1. Modify recording Hisilicon common error data in the rasdaemon and
2. In the ras-mc-ctl,
2.1. Improve Hisilicon common error statistics.
2.2. Add support to display the HiSilicon vendor-errors for a specified module.
2.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options.
2.4. Reformat error info of the HiSilicon Kunpeng920.
2.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
* Wed Mar 2 2022 tanxiaofei<tanxiaofei@huawei.com> - 0.6.7-3
- Type:bugfix
- ID:NA