rasdaemon: Fix for vendor errors are not recorded in the SQLite database if some cpus are offline
Fix for vendor errors are not recorded in the SQLite database if some cpus are offline at the system start. Signed-off-by: Bing Xia <xiabing12@h-partners.com>
This commit is contained in:
parent
5f9abe58c8
commit
4cdf0a2c6b
103
0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch
Normal file
103
0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From 370ac83b39f09eda0fb8a5cfa40ecfc71846eb0d Mon Sep 17 00:00:00 2001
|
||||
From: Shiju Jose <shiju.jose@huawei.com>
|
||||
Date: Wed, 20 Mar 2024 12:16:05 +0000
|
||||
Subject: [PATCH] rasdaemon: Fix for vendor errors are not recorded in the
|
||||
SQLite database if some cpus are offline
|
||||
|
||||
Fix for vendor errors are not recorded in the SQLite database if some cpus
|
||||
are offline at the system start.
|
||||
|
||||
Issue:
|
||||
|
||||
This issue is reproducible by offline some cpus, run
|
||||
./rasdaemon -f --record & and
|
||||
inject vendor specific error supported in the rasdaemon.
|
||||
|
||||
Reason:
|
||||
|
||||
When the system starts with some of the cpus offline and then run
|
||||
the rasdaemon, read_ras_event_all_cpus() exit with error and switch to
|
||||
the multi thread way. However read() in read_ras_event() return error in
|
||||
threads for each of the offline CPUs and does clean up including calling
|
||||
ras_ns_finalize_vendor_tables(), which invokes sqlite3_finalize() on vendor
|
||||
tables created. Thus the vendor error data does not stored in the SQLite
|
||||
database when such error is reported next time.
|
||||
|
||||
Solution:
|
||||
|
||||
In ras_ns_add_vendor_tables() and ras_ns_finalize_vendor_tables() use
|
||||
reference count and close vendor tables which created in
|
||||
ras_ns_add_vendor_tables() based on the reference count.
|
||||
|
||||
Reported-by: Junhao He <hejunhao3@huawei.com>
|
||||
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
||||
Signed-off-by: Junhao He <hejunhao3@huawei.com>
|
||||
Signed-off-by: Bing Xia <xiabing12@h-partners.com>
|
||||
---
|
||||
ras-non-standard-handler.c | 16 ++++++++++++++++
|
||||
ras-non-standard-handler.h | 1 +
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c
|
||||
index 20d514b..13e2acf 100644
|
||||
--- a/ras-non-standard-handler.c
|
||||
+++ b/ras-non-standard-handler.c
|
||||
@@ -65,6 +65,7 @@ int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder)
|
||||
#endif
|
||||
if (!ras_ns_ev_dec_list) {
|
||||
ras_ns_ev_dec_list = ns_ev_decoder;
|
||||
+ ras_ns_ev_dec_list->ref_count = 0;
|
||||
} else {
|
||||
list = ras_ns_ev_dec_list;
|
||||
while (list->next)
|
||||
@@ -85,6 +86,8 @@ int ras_ns_add_vendor_tables(struct ras_events *ras)
|
||||
return -1;
|
||||
|
||||
ns_ev_decoder = ras_ns_ev_dec_list;
|
||||
+ if (ras_ns_ev_dec_list)
|
||||
+ ras_ns_ev_dec_list->ref_count++;
|
||||
while (ns_ev_decoder) {
|
||||
if (ns_ev_decoder->add_table && !ns_ev_decoder->stmt_dec_record) {
|
||||
error = ns_ev_decoder->add_table(ras, ns_ev_decoder);
|
||||
@@ -127,6 +130,16 @@ void ras_ns_finalize_vendor_tables(void)
|
||||
#ifdef HAVE_SQLITE3
|
||||
struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list;
|
||||
|
||||
+ if (!ras_ns_ev_dec_list)
|
||||
+ return;
|
||||
+
|
||||
+ if (ras_ns_ev_dec_list->ref_count > 0)
|
||||
+ ras_ns_ev_dec_list->ref_count--;
|
||||
+ else
|
||||
+ return;
|
||||
+ if (ras_ns_ev_dec_list->ref_count > 0)
|
||||
+ return;
|
||||
+
|
||||
while (ns_ev_decoder) {
|
||||
if (ns_ev_decoder->stmt_dec_record) {
|
||||
ras_mc_finalize_vendor_table(ns_ev_decoder->stmt_dec_record);
|
||||
@@ -140,6 +153,9 @@ void ras_ns_finalize_vendor_tables(void)
|
||||
static void unregister_ns_ev_decoder(void)
|
||||
{
|
||||
#ifdef HAVE_SQLITE3
|
||||
+ if (!ras_ns_ev_dec_list)
|
||||
+ return;
|
||||
+ ras_ns_ev_dec_list->ref_count = 1;
|
||||
ras_ns_finalize_vendor_tables();
|
||||
#endif
|
||||
ras_ns_ev_dec_list = NULL;
|
||||
diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h
|
||||
index 341206a..2777584 100644
|
||||
--- a/ras-non-standard-handler.h
|
||||
+++ b/ras-non-standard-handler.h
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
struct ras_ns_ev_decoder {
|
||||
struct ras_ns_ev_decoder *next;
|
||||
+ uint16_t ref_count;
|
||||
const char *sec_type;
|
||||
int (*add_table)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder);
|
||||
int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder,
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: rasdaemon
|
||||
Version: 0.6.7
|
||||
Release: 19
|
||||
Release: 20
|
||||
License: GPLv2
|
||||
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
|
||||
URL: https://github.com/mchehab/rasdaemon.git
|
||||
@ -58,6 +58,7 @@ Patch9007: fix-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch
|
||||
Patch9008: 0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch
|
||||
Patch9009: add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch
|
||||
Patch9010: fix-rasdaemon-print-loading-config-logs-multiple-times.patch
|
||||
Patch9011: 0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch
|
||||
|
||||
%description
|
||||
The rasdaemon program is a daemon which monitors the platform
|
||||
@ -111,6 +112,13 @@ if [ $1 -eq 0 ] ; then
|
||||
fi
|
||||
|
||||
%changelog
|
||||
* Tue Apr 23 2024 Bing Xia <xiabing12@h-partners.com> - 0.6.7-20
|
||||
- Type:bugfix
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC:Fix for vendor errors are not recorded in the SQLite database if
|
||||
some cpus are offline at the system start
|
||||
|
||||
* Mon Apr 8 2024 caixiaomeng <caixiaomeng2@huawei.com> - 0.6.7-19
|
||||
- Type:bugfix
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user