From b26f624fbe12203b12b65e0674fea60c70e48a21 Mon Sep 17 00:00:00 2001 From: caixiaomeng 00662745 Date: Wed, 21 Feb 2024 15:25:11 +0800 Subject: [PATCH] BACKPORT-Add-Dynamic-Switch --- misc/rasdaemon.env | 5 +- ras-disabled-events.h | 10 ++ ras-events.c | 247 +++++++++++++++++++++++++++--------------- rasdaemon.c | 36 ++++++ 4 files changed, 208 insertions(+), 90 deletions(-) create mode 100644 ras-disabled-events.h diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env index dc40af8..6780eb0 100644 --- a/misc/rasdaemon.env +++ b/misc/rasdaemon.env @@ -51,4 +51,7 @@ CPU_CE_THRESHOLD="18" CPU_ISOLATION_CYCLE="24h" # Prevent excessive isolation from causing an avalanche effect -CPU_ISOLATION_LIMIT="10" \ No newline at end of file +CPU_ISOLATION_LIMIT="10" + +# Disable specified events by config +DISABLE="block:block_rq_complete" \ No newline at end of file diff --git a/ras-disabled-events.h b/ras-disabled-events.h new file mode 100644 index 0000000..298a5f3 --- /dev/null +++ b/ras-disabled-events.h @@ -0,0 +1,10 @@ +#ifndef __RAS_DISABLED_EVENTS_H +#define __RAS_DISABLED_EVENTS_H +#define DISABLE "DISABLE" +#define MAX_DISABLED_TRACEPOINTS_NUM 50 +#define MAX_DISABLED_TRACEPOINTS_STR_LENGTH 255 +#define MAX_TRACEPOINTS_STR_LENGTH 50 + +extern char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; +extern int disabled_tracepoints_num; +#endif \ No newline at end of file diff --git a/ras-events.c b/ras-events.c index bc7da34..675d020 100644 --- a/ras-events.c +++ b/ras-events.c @@ -43,6 +43,7 @@ #include "ras-logger.h" #include "ras-page-isolation.h" #include "ras-cpu-isolation.h" +#include "ras-disabled-events.h" /* * Polling time, if read() doesn't block. Currently, trace_pipe_raw never @@ -172,6 +173,23 @@ static int get_tracing_dir(struct ras_events *ras) return 0; } +static bool is_disabled_event(char *group, char *event) { + char ras_event_name[MAX_PATH + 1]; + + snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s", + group, event); + + if (disabled_tracepoints_num == 0) { + return false; + } + for (int i = 0; i < disabled_tracepoints_num; ++i) { + if (strcmp(choices_disable[i], ras_event_name) == 0) { + return true; + } + } + return false; +} + /* * Tracing enable/disable code */ @@ -228,40 +246,41 @@ int toggle_ras_mc_event(int enable) goto free_ras; } - rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable); + rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable > 0 ? (is_disabled_event("ras", "mc_event") ? 0 : 1) : enable); #ifdef HAVE_AER - rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable); + rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable > 0 ? (is_disabled_event("ras", "aer_event") ? 0 : 1) : enable); #endif #ifdef HAVE_MCE - rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable); + rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable > 0 ? (is_disabled_event("mce", "mce_record") ? 0 : 1) : enable); #endif #ifdef HAVE_EXTLOG - rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); + rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable > 0 ? (is_disabled_event("ras", "extlog_mem_event") ? 0 : 1) : enable); #endif #ifdef HAVE_NON_STANDARD - rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable); + rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable > 0 ? (is_disabled_event("ras", "non_standard_event") ? 0 : 1) : enable); #endif #ifdef HAVE_ARM - rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); + rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable > 0 ? (is_disabled_event("ras", "arm_event") ? 0 : 1) : enable); #endif #ifdef HAVE_DEVLINK - rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable); + rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable > 0 ? (is_disabled_event("devlink", "devlink_health_report") ? 0 : 1) : enable); #endif #ifdef HAVE_DISKERROR - rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); + rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable > 0 ? (is_disabled_event("block", "block_rq_complete") ? 0 : 1) : enable); #endif #ifdef HAVE_MEMORY_FAILURE - rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); + rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable > 0 ? (is_disabled_event("ras", "memory_failure_event") ? 0 : 1) : enable); #endif + free_ras: free(ras); return rc; @@ -870,42 +889,62 @@ int handle_ras_events(int record_events) ras_page_account_init(); #endif - rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", - ras_mc_event_handler, NULL, MC_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "mc_event"); + if (is_disabled_event("ras", "mc_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "mc_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", + ras_mc_event_handler, NULL, MC_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "mc_event"); + } #ifdef HAVE_AER - rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", - ras_aer_event_handler, NULL, AER_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "aer_event"); + if (is_disabled_event("ras", "aer_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "aer_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", + ras_aer_event_handler, NULL, AER_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "aer_event"); + } #endif #ifdef HAVE_NON_STANDARD - rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", - ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "non_standard_event"); + if (is_disabled_event("ras", "non_standard_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "non_standard_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", + ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "non_standard_event"); + } #endif #ifdef HAVE_ARM - rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", - ras_arm_event_handler, NULL, ARM_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "arm_event"); + if (is_disabled_event("ras", "arm_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "arm_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", + ras_arm_event_handler, NULL, ARM_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "arm_event"); + } #endif cpus = get_num_cpus(ras); @@ -915,72 +954,102 @@ int handle_ras_events(int record_events) #endif #ifdef HAVE_MCE - rc = register_mce_handler(ras, cpus); - if (rc) - log(ALL, LOG_INFO, "Can't register mce handler\n"); - if (ras->mce_priv) { - rc = add_event_handler(ras, pevent, page_size, - "mce", "mce_record", - ras_mce_event_handler, NULL, MCE_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "mce", "mce_record"); + if (is_disabled_event("mce", "mce_record")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "mce", "mce_record"); + } else { + rc = register_mce_handler(ras, cpus); + if (rc) + log(ALL, LOG_INFO, "Can't register mce handler\n"); + if (ras->mce_priv) { + rc = add_event_handler(ras, pevent, page_size, + "mce", "mce_record", + ras_mce_event_handler, NULL, MCE_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "mce", "mce_record"); + } } #endif #ifdef HAVE_EXTLOG - rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", - ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); - if (!rc) { - /* tell kernel we are listening, so don't printk to console */ - (void)open("/sys/kernel/debug/ras/daemon_active", 0); - num_events++; - } else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "extlog_mem_event"); + if (is_disabled_event("ras", "extlog_mem_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "extlog_mem_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", + ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); + if (!rc) { + /* tell kernel we are listening, so don't printk to console */ + (void)open("/sys/kernel/debug/ras/daemon_active", 0); + num_events++; + } else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "extlog_mem_event"); + } #endif #ifdef HAVE_DEVLINK - rc = add_event_handler(ras, pevent, page_size, "net", - "net_dev_xmit_timeout", - ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); - if (!rc) - filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; - - rc = add_event_handler(ras, pevent, page_size, "devlink", - "devlink_health_report", - ras_devlink_event_handler, filter_str, DEVLINK_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "devlink", "devlink_health_report"); + if (is_disabled_event("net", "net_dev_xmit_timeout")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "net", "net_dev_xmit_timeout"); + } else { + rc = add_event_handler(ras, pevent, page_size, "net", + "net_dev_xmit_timeout", + ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); + if (!rc) + filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; + + if (is_disabled_event("devlink", "devlink_health_report")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "devlink", "devlink_health_report"); + } else { + rc = add_event_handler(ras, pevent, page_size, "devlink", + "devlink_health_report", + ras_devlink_event_handler, filter_str, DEVLINK_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "devlink", "devlink_health_report"); + } + } #endif #ifdef HAVE_DISKERROR - rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); - if (!rc) { - rc = add_event_handler(ras, pevent, page_size, "block", - "block_rq_complete", ras_diskerror_event_handler, - NULL, DISKERROR_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "block", "block_rq_complete"); + if (is_disabled_event("block", "block_rq_complete")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "block", "block_rq_complete"); + } else { + rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); + if (!rc) { + rc = add_event_handler(ras, pevent, page_size, "block", + "block_rq_complete", ras_diskerror_event_handler, + NULL, DISKERROR_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "block", "block_rq_complete"); + } } #endif #ifdef HAVE_MEMORY_FAILURE - rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", - ras_memory_failure_event_handler, NULL, MF_EVENT); - if (!rc) - num_events++; - else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", - "ras", "memory_failure_event"); + if (is_disabled_event("ras", "memory_failure_event")) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", + "ras", "memory_failure_event"); + } else { + rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", + ras_memory_failure_event_handler, NULL, MF_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "memory_failure_event"); + } #endif if (!num_events) { diff --git a/rasdaemon.c b/rasdaemon.c index 66f4dea..0437662 100644 --- a/rasdaemon.c +++ b/rasdaemon.c @@ -25,6 +25,7 @@ #include "ras-record.h" #include "ras-logger.h" #include "ras-events.h" +#include "ras-disabled-events.h" /* * Arguments(argp) handling logic and main @@ -34,6 +35,9 @@ #define TOOL_DESCRIPTION "RAS daemon to log the RAS events." #define ARGS_DOC "" +char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; +int disabled_tracepoints_num; + const char *argp_program_version = TOOL_NAME " " VERSION; const char *argp_program_bug_address = "Mauro Carvalho Chehab "; @@ -43,6 +47,36 @@ struct arguments { int foreground; }; +static void parse_disabled_choices() { + char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH]; + const char* sep = ";"; + char* tracepoint_str; + char* config_disabled_tracepoints = getenv(DISABLE); + if (config_disabled_tracepoints == NULL) { + return; + } + + if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) { + log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); + return; + } + strcpy(disabled_tracepoints_str, config_disabled_tracepoints); + + tracepoint_str = strtok(disabled_tracepoints_str, sep); + int index = 0; + + while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) { + if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { + log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH); + } + else { + strcpy(choices_disable[index++], tracepoint_str); + } + tracepoint_str = strtok(NULL, sep); + } + disabled_tracepoints_num = index; +} + static error_t parse_opt(int k, char *arg, struct argp_state *state) { struct arguments *args = state->input; @@ -102,6 +136,8 @@ int main(int argc, char *argv[]) return -1; } + parse_disabled_choices(); + if (args.enable_ras) { int enable; -- 2.33.0