451 lines
14 KiB
Diff
451 lines
14 KiB
Diff
From b26f624fbe12203b12b65e0674fea60c70e48a21 Mon Sep 17 00:00:00 2001
|
|
From: caixiaomeng 00662745 <caixiaomeng2@huawei.com>
|
|
Date: Wed, 21 Feb 2024 15:25:11 +0800
|
|
Subject: [PATCH] BACKPORT-Add-Dynamic-Switch
|
|
|
|
---
|
|
misc/rasdaemon.env | 5 +-
|
|
ras-disabled-events.h | 10 ++
|
|
ras-events.c | 247 +++++++++++++++++++++++++++---------------
|
|
rasdaemon.c | 36 ++++++
|
|
4 files changed, 208 insertions(+), 90 deletions(-)
|
|
create mode 100644 ras-disabled-events.h
|
|
|
|
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
|
|
index dc40af8..6780eb0 100644
|
|
--- a/misc/rasdaemon.env
|
|
+++ b/misc/rasdaemon.env
|
|
@@ -51,4 +51,7 @@ CPU_CE_THRESHOLD="18"
|
|
CPU_ISOLATION_CYCLE="24h"
|
|
|
|
# Prevent excessive isolation from causing an avalanche effect
|
|
-CPU_ISOLATION_LIMIT="10"
|
|
\ No newline at end of file
|
|
+CPU_ISOLATION_LIMIT="10"
|
|
+
|
|
+# Disable specified events by config
|
|
+DISABLE="block:block_rq_complete"
|
|
\ No newline at end of file
|
|
diff --git a/ras-disabled-events.h b/ras-disabled-events.h
|
|
new file mode 100644
|
|
index 0000000..298a5f3
|
|
--- /dev/null
|
|
+++ b/ras-disabled-events.h
|
|
@@ -0,0 +1,10 @@
|
|
+#ifndef __RAS_DISABLED_EVENTS_H
|
|
+#define __RAS_DISABLED_EVENTS_H
|
|
+#define DISABLE "DISABLE"
|
|
+#define MAX_DISABLED_TRACEPOINTS_NUM 50
|
|
+#define MAX_DISABLED_TRACEPOINTS_STR_LENGTH 255
|
|
+#define MAX_TRACEPOINTS_STR_LENGTH 50
|
|
+
|
|
+extern char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH];
|
|
+extern int disabled_tracepoints_num;
|
|
+#endif
|
|
\ No newline at end of file
|
|
diff --git a/ras-events.c b/ras-events.c
|
|
index bc7da34..675d020 100644
|
|
--- a/ras-events.c
|
|
+++ b/ras-events.c
|
|
@@ -43,6 +43,7 @@
|
|
#include "ras-logger.h"
|
|
#include "ras-page-isolation.h"
|
|
#include "ras-cpu-isolation.h"
|
|
+#include "ras-disabled-events.h"
|
|
|
|
/*
|
|
* Polling time, if read() doesn't block. Currently, trace_pipe_raw never
|
|
@@ -172,6 +173,23 @@ static int get_tracing_dir(struct ras_events *ras)
|
|
return 0;
|
|
}
|
|
|
|
+static bool is_disabled_event(char *group, char *event) {
|
|
+ char ras_event_name[MAX_PATH + 1];
|
|
+
|
|
+ snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s",
|
|
+ group, event);
|
|
+
|
|
+ if (disabled_tracepoints_num == 0) {
|
|
+ return false;
|
|
+ }
|
|
+ for (int i = 0; i < disabled_tracepoints_num; ++i) {
|
|
+ if (strcmp(choices_disable[i], ras_event_name) == 0) {
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
/*
|
|
* Tracing enable/disable code
|
|
*/
|
|
@@ -228,40 +246,41 @@ int toggle_ras_mc_event(int enable)
|
|
goto free_ras;
|
|
}
|
|
|
|
- rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable);
|
|
+ rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable > 0 ? (is_disabled_event("ras", "mc_event") ? 0 : 1) : enable);
|
|
|
|
#ifdef HAVE_AER
|
|
- rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable > 0 ? (is_disabled_event("ras", "aer_event") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_MCE
|
|
- rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable > 0 ? (is_disabled_event("mce", "mce_record") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_EXTLOG
|
|
- rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable > 0 ? (is_disabled_event("ras", "extlog_mem_event") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_NON_STANDARD
|
|
- rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable > 0 ? (is_disabled_event("ras", "non_standard_event") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_ARM
|
|
- rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable > 0 ? (is_disabled_event("ras", "arm_event") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_DEVLINK
|
|
- rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable > 0 ? (is_disabled_event("devlink", "devlink_health_report") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_DISKERROR
|
|
- rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable > 0 ? (is_disabled_event("block", "block_rq_complete") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
#ifdef HAVE_MEMORY_FAILURE
|
|
- rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable);
|
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable > 0 ? (is_disabled_event("ras", "memory_failure_event") ? 0 : 1) : enable);
|
|
#endif
|
|
|
|
+
|
|
free_ras:
|
|
free(ras);
|
|
return rc;
|
|
@@ -870,42 +889,62 @@ int handle_ras_events(int record_events)
|
|
ras_page_account_init();
|
|
#endif
|
|
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event",
|
|
- ras_mc_event_handler, NULL, MC_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "mc_event");
|
|
+ if (is_disabled_event("ras", "mc_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "mc_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event",
|
|
+ ras_mc_event_handler, NULL, MC_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "mc_event");
|
|
+ }
|
|
|
|
#ifdef HAVE_AER
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event",
|
|
- ras_aer_event_handler, NULL, AER_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "aer_event");
|
|
+ if (is_disabled_event("ras", "aer_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "aer_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event",
|
|
+ ras_aer_event_handler, NULL, AER_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "aer_event");
|
|
+ }
|
|
#endif
|
|
|
|
#ifdef HAVE_NON_STANDARD
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event",
|
|
- ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "non_standard_event");
|
|
+ if (is_disabled_event("ras", "non_standard_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "non_standard_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event",
|
|
+ ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "non_standard_event");
|
|
+ }
|
|
#endif
|
|
|
|
#ifdef HAVE_ARM
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event",
|
|
- ras_arm_event_handler, NULL, ARM_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "arm_event");
|
|
+ if (is_disabled_event("ras", "arm_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "arm_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event",
|
|
+ ras_arm_event_handler, NULL, ARM_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "arm_event");
|
|
+ }
|
|
#endif
|
|
|
|
cpus = get_num_cpus(ras);
|
|
@@ -915,72 +954,102 @@ int handle_ras_events(int record_events)
|
|
#endif
|
|
|
|
#ifdef HAVE_MCE
|
|
- rc = register_mce_handler(ras, cpus);
|
|
- if (rc)
|
|
- log(ALL, LOG_INFO, "Can't register mce handler\n");
|
|
- if (ras->mce_priv) {
|
|
- rc = add_event_handler(ras, pevent, page_size,
|
|
- "mce", "mce_record",
|
|
- ras_mce_event_handler, NULL, MCE_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "mce", "mce_record");
|
|
+ if (is_disabled_event("mce", "mce_record")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "mce", "mce_record");
|
|
+ } else {
|
|
+ rc = register_mce_handler(ras, cpus);
|
|
+ if (rc)
|
|
+ log(ALL, LOG_INFO, "Can't register mce handler\n");
|
|
+ if (ras->mce_priv) {
|
|
+ rc = add_event_handler(ras, pevent, page_size,
|
|
+ "mce", "mce_record",
|
|
+ ras_mce_event_handler, NULL, MCE_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "mce", "mce_record");
|
|
+ }
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_EXTLOG
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event",
|
|
- ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT);
|
|
- if (!rc) {
|
|
- /* tell kernel we are listening, so don't printk to console */
|
|
- (void)open("/sys/kernel/debug/ras/daemon_active", 0);
|
|
- num_events++;
|
|
- } else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "extlog_mem_event");
|
|
+ if (is_disabled_event("ras", "extlog_mem_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "extlog_mem_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event",
|
|
+ ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT);
|
|
+ if (!rc) {
|
|
+ /* tell kernel we are listening, so don't printk to console */
|
|
+ (void)open("/sys/kernel/debug/ras/daemon_active", 0);
|
|
+ num_events++;
|
|
+ } else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "extlog_mem_event");
|
|
+ }
|
|
#endif
|
|
|
|
#ifdef HAVE_DEVLINK
|
|
- rc = add_event_handler(ras, pevent, page_size, "net",
|
|
- "net_dev_xmit_timeout",
|
|
- ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT);
|
|
- if (!rc)
|
|
- filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'";
|
|
-
|
|
- rc = add_event_handler(ras, pevent, page_size, "devlink",
|
|
- "devlink_health_report",
|
|
- ras_devlink_event_handler, filter_str, DEVLINK_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "devlink", "devlink_health_report");
|
|
+ if (is_disabled_event("net", "net_dev_xmit_timeout")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "net", "net_dev_xmit_timeout");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "net",
|
|
+ "net_dev_xmit_timeout",
|
|
+ ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT);
|
|
+ if (!rc)
|
|
+ filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'";
|
|
+
|
|
+ if (is_disabled_event("devlink", "devlink_health_report")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "devlink", "devlink_health_report");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "devlink",
|
|
+ "devlink_health_report",
|
|
+ ras_devlink_event_handler, filter_str, DEVLINK_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "devlink", "devlink_health_report");
|
|
+ }
|
|
+ }
|
|
#endif
|
|
|
|
#ifdef HAVE_DISKERROR
|
|
- rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0");
|
|
- if (!rc) {
|
|
- rc = add_event_handler(ras, pevent, page_size, "block",
|
|
- "block_rq_complete", ras_diskerror_event_handler,
|
|
- NULL, DISKERROR_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "block", "block_rq_complete");
|
|
+ if (is_disabled_event("block", "block_rq_complete")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "block", "block_rq_complete");
|
|
+ } else {
|
|
+ rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0");
|
|
+ if (!rc) {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "block",
|
|
+ "block_rq_complete", ras_diskerror_event_handler,
|
|
+ NULL, DISKERROR_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "block", "block_rq_complete");
|
|
+ }
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_MEMORY_FAILURE
|
|
- rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event",
|
|
- ras_memory_failure_event_handler, NULL, MF_EVENT);
|
|
- if (!rc)
|
|
- num_events++;
|
|
- else
|
|
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
- "ras", "memory_failure_event");
|
|
+ if (is_disabled_event("ras", "memory_failure_event")) {
|
|
+ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
|
|
+ "ras", "memory_failure_event");
|
|
+ } else {
|
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event",
|
|
+ ras_memory_failure_event_handler, NULL, MF_EVENT);
|
|
+ if (!rc)
|
|
+ num_events++;
|
|
+ else
|
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
|
|
+ "ras", "memory_failure_event");
|
|
+ }
|
|
#endif
|
|
|
|
if (!num_events) {
|
|
diff --git a/rasdaemon.c b/rasdaemon.c
|
|
index 66f4dea..0437662 100644
|
|
--- a/rasdaemon.c
|
|
+++ b/rasdaemon.c
|
|
@@ -25,6 +25,7 @@
|
|
#include "ras-record.h"
|
|
#include "ras-logger.h"
|
|
#include "ras-events.h"
|
|
+#include "ras-disabled-events.h"
|
|
|
|
/*
|
|
* Arguments(argp) handling logic and main
|
|
@@ -34,6 +35,9 @@
|
|
#define TOOL_DESCRIPTION "RAS daemon to log the RAS events."
|
|
#define ARGS_DOC "<options>"
|
|
|
|
+char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH];
|
|
+int disabled_tracepoints_num;
|
|
+
|
|
const char *argp_program_version = TOOL_NAME " " VERSION;
|
|
const char *argp_program_bug_address = "Mauro Carvalho Chehab <mchehab@kernel.org>";
|
|
|
|
@@ -43,6 +47,36 @@ struct arguments {
|
|
int foreground;
|
|
};
|
|
|
|
+static void parse_disabled_choices() {
|
|
+ char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH];
|
|
+ const char* sep = ";";
|
|
+ char* tracepoint_str;
|
|
+ char* config_disabled_tracepoints = getenv(DISABLE);
|
|
+ if (config_disabled_tracepoints == NULL) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) {
|
|
+ log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH);
|
|
+ return;
|
|
+ }
|
|
+ strcpy(disabled_tracepoints_str, config_disabled_tracepoints);
|
|
+
|
|
+ tracepoint_str = strtok(disabled_tracepoints_str, sep);
|
|
+ int index = 0;
|
|
+
|
|
+ while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) {
|
|
+ if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) {
|
|
+ log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH);
|
|
+ }
|
|
+ else {
|
|
+ strcpy(choices_disable[index++], tracepoint_str);
|
|
+ }
|
|
+ tracepoint_str = strtok(NULL, sep);
|
|
+ }
|
|
+ disabled_tracepoints_num = index;
|
|
+}
|
|
+
|
|
static error_t parse_opt(int k, char *arg, struct argp_state *state)
|
|
{
|
|
struct arguments *args = state->input;
|
|
@@ -102,6 +136,8 @@ int main(int argc, char *argv[])
|
|
return -1;
|
|
}
|
|
|
|
+ parse_disabled_choices();
|
|
+
|
|
if (args.enable_ras) {
|
|
int enable;
|
|
|
|
--
|
|
2.33.0
|
|
|
|
|