From ef8824010d5eca69167b71f572de953de7faeaa5 Mon Sep 17 00:00:00 2001 From: volcanodragon Date: Sun, 29 Oct 2023 16:02:07 +0800 Subject: [PATCH] fix activate_mapping bug (cherry picked from commit aa3cbff97fa1749029db426c3b43d00d80c1b1f7) --- ...-avoid-use-after-free-when-affinity-.patch | 151 ++++++++++++++++++ ...apping-make-sure-to-catch-all-errors.patch | 48 ++++++ ...activate_mapping-report-error-reason.patch | 64 ++++++++ ...-only-blacklist-irq-if-error-is-cons.patch | 51 ++++++ ...-avoid-logging-error-when-there-is-n.patch | 28 ++++ irqbalance.spec | 13 +- 6 files changed, 354 insertions(+), 1 deletion(-) create mode 100644 backport-0001-activate_mapping-avoid-use-after-free-when-affinity-.patch create mode 100644 backport-0002-activate_mapping-make-sure-to-catch-all-errors.patch create mode 100644 backport-0003-activate_mapping-report-error-reason.patch create mode 100644 backport-0004-activate_mapping-only-blacklist-irq-if-error-is-cons.patch create mode 100644 backport-0005-activate_mapping-avoid-logging-error-when-there-is-n.patch diff --git a/backport-0001-activate_mapping-avoid-use-after-free-when-affinity-.patch b/backport-0001-activate_mapping-avoid-use-after-free-when-affinity-.patch new file mode 100644 index 0000000..5797674 --- /dev/null +++ b/backport-0001-activate_mapping-avoid-use-after-free-when-affinity-.patch @@ -0,0 +1,151 @@ +From f589bdced6e1fe885969f2833fc3cacdfb60ea79 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Tue, 11 Jul 2023 15:17:55 +0200 +Subject: [PATCH 1/5] activate_mapping: avoid use-after-free when affinity + cannot be set + +Conflict: 725d9b12888f0dcfce5038c24e2015a10b36a4e9 commit modified +check_for_irq_ban input paras in function parse_user_policy_key(),so +we modified this patch ensure that the patch can be installed. +The details are as follows: +check_for_irq_ban(hint,mod) changed to check_for_irq_ban(irq, +proc_interrupts) + +add_banned_irq appends the irq_info to the banned_irqs list. +remove_one_irq_from_db removes it from the interrupts_db and free()s it. + +This leaves an invalid pointer dangling in banned_irqs *and* potentially +in rebalance_irq_list which can cause use-after-free errors. + +Do not move the irq_info around. Only add a flag to indicate that this +irq's affinity cannot be managed and ignore the irq when this flag is +set. + +Link: https://github.com/Irqbalance/irqbalance/issues/267 +Fixes: 55c5c321c73e ("arm64: Add irq aff change check For aarch64...") +Signed-off-by: Robin Jarry +--- + activate.c | 8 +++++--- + classify.c | 28 +++------------------------- + irqbalance.h | 2 -- + types.h | 3 ++- + 4 files changed, 10 insertions(+), 31 deletions(-) + +diff --git a/activate.c b/activate.c +index 62cfd08..6f8af27 100644 +--- a/activate.c ++++ b/activate.c +@@ -60,6 +60,9 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + if (!info->assigned_obj) + return; + ++ if (info->flags & IRQ_FLAG_AFFINITY_UNMANAGED) ++ return; ++ + /* activate only online cpus, otherwise writing to procfs returns EOVERFLOW */ + cpus_and(applied_mask, cpu_online_map, info->assigned_obj->mask); + +@@ -77,9 +80,8 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); + if (ret < 0) { +- log(TO_ALL, LOG_WARNING, "cannot change irq %i's affinity, add it to banned list", info->irq); +- add_banned_irq(info->irq); +- remove_one_irq_from_db(info->irq); ++ log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + fclose(file); + info->moved = 0; /*migration is done*/ +diff --git a/classify.c b/classify.c +index 105ecd6..6242e6c 100644 +--- a/classify.c ++++ b/classify.c +@@ -256,7 +256,7 @@ static gint compare_ints(gconstpointer a, gconstpointer b) + return ai->irq - bi->irq; + } + +-static void __add_banned_irq(int irq, GList **list) ++static void add_banned_irq(int irq, GList **list) + { + struct irq_info find, *new; + GList *entry; +@@ -280,14 +280,9 @@ static void __add_banned_irq(int irq, GList **list) + return; + } + +-void add_banned_irq(int irq) +-{ +- __add_banned_irq(irq, &banned_irqs); +-} +- + void add_cl_banned_irq(int irq) + { +- __add_banned_irq(irq, &cl_banned_irqs); ++ add_banned_irq(irq, &cl_banned_irqs); + } + + gint substr_find(gconstpointer a, gconstpointer b) +@@ -386,23 +381,6 @@ get_numa_node: + return new; + } + +-void remove_one_irq_from_db(int irq) +-{ +- struct irq_info find, *tmp; +- GList *entry = NULL; +- +- find.irq = irq; +- entry = g_list_find_custom(interrupts_db, &find, compare_ints); +- if (!entry) +- return; +- +- tmp = entry->data; +- interrupts_db = g_list_remove(interrupts_db, tmp); +- free(tmp); +- log(TO_CONSOLE, LOG_INFO, "IRQ %d was removed from db.\n", irq); +- return; +-} +- + static void parse_user_policy_key(char *buf, int irq, struct user_irq_policy *pol) + { + char *key, *value, *end; +@@ -612,7 +590,7 @@ static void add_new_irq(char *path, struct irq_info *hint, GList *proc_interrupt + /* Set NULL devpath for the irq has no sysfs entries */ + get_irq_user_policy(path, irq, &pol); + if ((pol.ban == 1) || check_for_irq_ban(irq, proc_interrupts)) { /*FIXME*/ +- __add_banned_irq(irq, &banned_irqs); ++ add_banned_irq(irq, &banned_irqs); + new = get_irq_info(irq); + } else + new = add_one_irq_to_db(path, hint, &pol); +diff --git a/irqbalance.h b/irqbalance.h +index e7f6b94..46e05ca 100644 +--- a/irqbalance.h ++++ b/irqbalance.h +@@ -109,8 +109,6 @@ extern struct irq_info *get_irq_info(int irq); + extern void migrate_irq(GList **from, GList **to, struct irq_info *info); + extern void free_cl_opts(void); + extern void add_cl_banned_module(char *modname); +-extern void add_banned_irq(int irq); +-extern void remove_one_irq_from_db(int irq); + #define irq_numa_node(irq) ((irq)->numa_node) + + +diff --git a/types.h b/types.h +index 9693cf4..c63cfea 100644 +--- a/types.h ++++ b/types.h +@@ -34,7 +34,8 @@ + /* + * IRQ Internal tracking flags + */ +-#define IRQ_FLAG_BANNED 1 ++#define IRQ_FLAG_BANNED (1ULL << 0) ++#define IRQ_FLAG_AFFINITY_UNMANAGED (1ULL << 1) + + enum obj_type_e { + OBJ_TYPE_CPU, +-- +2.33.0 + diff --git a/backport-0002-activate_mapping-make-sure-to-catch-all-errors.patch b/backport-0002-activate_mapping-make-sure-to-catch-all-errors.patch new file mode 100644 index 0000000..e77e1c8 --- /dev/null +++ b/backport-0002-activate_mapping-make-sure-to-catch-all-errors.patch @@ -0,0 +1,48 @@ +From 470a64b190628574c28a266bdcf8960291463191 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 08:51:08 +0200 +Subject: [PATCH 2/5] activate_mapping: make sure to catch all errors + +fprintf() is buffered and may not report an error which may be deferred +when fflush() is called (either explicitly or internally by fclose()). + +Check for errors returned by fopen(), fprintf() and fclose() and add +IRQ_FLAG_AFFINITY_UNMANAGED accordingly. + +Fixes: 55c5c321c73e ("arm64: Add irq aff change check For aarch64, ...") +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2184735 +Signed-off-by: Robin Jarry +--- + activate.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/activate.c b/activate.c +index 6f8af27..a4112e0 100644 +--- a/activate.c ++++ b/activate.c +@@ -75,16 +75,16 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + sprintf(buf, "/proc/irq/%i/smp_affinity", info->irq); + file = fopen(buf, "w"); + if (!file) +- return; ++ goto error; + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (ret < 0) { +- log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); +- info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; +- } +- fclose(file); ++ if (fclose(file) || ret < 0) ++ goto error; + info->moved = 0; /*migration is done*/ ++error: ++ log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + + void activate_mappings(void) +-- +2.33.0 + diff --git a/backport-0003-activate_mapping-report-error-reason.patch b/backport-0003-activate_mapping-report-error-reason.patch new file mode 100644 index 0000000..dd3a8ae --- /dev/null +++ b/backport-0003-activate_mapping-report-error-reason.patch @@ -0,0 +1,64 @@ +From 9a1fd29a82c9762c3676f613075d44a8d1fcbe82 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 08:59:45 +0200 +Subject: [PATCH 3/5] activate_mapping: report error reason + +If a given IRQ affinity cannot be set, include strerror in the warning +message. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2184735 +Signed-off-by: Robin Jarry +--- + activate.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/activate.c b/activate.c +index a4112e0..4418cda 100644 +--- a/activate.c ++++ b/activate.c +@@ -25,10 +25,12 @@ + * of interrupts to the kernel. + */ + #include "config.h" ++#include + #include + #include + #include + #include ++#include + + #include "irqbalance.h" + +@@ -48,7 +50,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + { + char buf[PATH_MAX]; + FILE *file; +- int ret = 0; ++ int errsave, ret; + cpumask_t applied_mask; + + /* +@@ -79,11 +81,18 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (fclose(file) || ret < 0) ++ errsave = errno; ++ if (fclose(file)) { ++ errsave = errno; ++ goto error; ++ } ++ if (ret < 0) + goto error; + info->moved = 0; /*migration is done*/ + error: +- log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ log(TO_ALL, LOG_WARNING, ++ "Cannot change IRQ %i affinity: %s. Will never try again.\n", ++ info->irq, strerror(errsave)); + info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + +-- +2.33.0 + diff --git a/backport-0004-activate_mapping-only-blacklist-irq-if-error-is-cons.patch b/backport-0004-activate_mapping-only-blacklist-irq-if-error-is-cons.patch new file mode 100644 index 0000000..2e79414 --- /dev/null +++ b/backport-0004-activate_mapping-only-blacklist-irq-if-error-is-cons.patch @@ -0,0 +1,51 @@ +From eee7917ef5272691b9d4ee6341463f3c78f7f909 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 17:49:13 +0200 +Subject: [PATCH 4/5] activate_mapping: only blacklist irq if error is + considered permanent + +Some errors reported when writing to smp_affinity are transient. For +example, when a CPU interrupt controller does not have enough room to +map the IRQ, the kernel will return "No space left on device". + +This kind of situation can change over time. Do not mark the IRQ +affinity as "unmanaged". Let irqbalance try again later. + +Signed-off-by: Robin Jarry +--- + activate.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/activate.c b/activate.c +index 4418cda..7353692 100644 +--- a/activate.c ++++ b/activate.c +@@ -91,9 +91,23 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + info->moved = 0; /*migration is done*/ + error: + log(TO_ALL, LOG_WARNING, +- "Cannot change IRQ %i affinity: %s. Will never try again.\n", ++ "Cannot change IRQ %i affinity: %s\n", + info->irq, strerror(errsave)); +- info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; ++ switch (errsave) { ++ case ENOSPC: /* Specified CPU APIC is full. */ ++ case EAGAIN: /* Interrupted by signal. */ ++ case EBUSY: /* Affinity change already in progress. */ ++ case EINVAL: /* IRQ would be bound to no CPU. */ ++ case ERANGE: /* CPU in mask is offline. */ ++ case ENOMEM: /* Kernel cannot allocate CPU mask. */ ++ /* Do not blacklist the IRQ on transient errors. */ ++ break; ++ default: ++ /* Any other error is considered permanent. */ ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; ++ log(TO_ALL, LOG_WARNING, "IRQ %i affinity is now unmanaged\n", ++ info->irq); ++ } + } + + void activate_mappings(void) +-- +2.33.0 + diff --git a/backport-0005-activate_mapping-avoid-logging-error-when-there-is-n.patch b/backport-0005-activate_mapping-avoid-logging-error-when-there-is-n.patch new file mode 100644 index 0000000..44e5180 --- /dev/null +++ b/backport-0005-activate_mapping-avoid-logging-error-when-there-is-n.patch @@ -0,0 +1,28 @@ +From bc7794dc78474c463a26926749537f23abc4c082 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Thu, 13 Jul 2023 20:49:16 +0200 +Subject: [PATCH 5/5] activate_mapping: avoid logging error when there is none + +Add missing return statement. + +Fixes: 470a64b19062 ("activate_mapping: make sure to catch all errors") +Signed-off-by: Robin Jarry +--- + activate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/activate.c b/activate.c +index 7353692..548a401 100644 +--- a/activate.c ++++ b/activate.c +@@ -89,6 +89,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + if (ret < 0) + goto error; + info->moved = 0; /*migration is done*/ ++ return; + error: + log(TO_ALL, LOG_WARNING, + "Cannot change IRQ %i affinity: %s\n", +-- +2.33.0 + diff --git a/irqbalance.spec b/irqbalance.spec index 94f0a8e..e8465d7 100644 --- a/irqbalance.spec +++ b/irqbalance.spec @@ -1,7 +1,7 @@ Summary: A dynamic adaptive IRQ balancing daemon Name: irqbalance Version: 1.8.0 -Release: 9 +Release: 10 Epoch: 3 License: GPLv2 Source0: https://github.com/Irqbalance/irqbalance/archive/v%{version}.tar.gz#/irqbalance-%{version}.tar.gz @@ -29,6 +29,11 @@ Patch6004: backport-Fix-compile-issue-with-none-AARCH64-builds.patch Patch6005: backport-fix-opendir-fails-in-check_platform_device.patch Patch6006: backport-check-whether-savedptr-is-NULL-before-invoking-strle.patch Patch6007: backport-procinterrupts-Fix-IRQ-name-parsing-on-certain-arm64.patch +Patch6008: backport-0001-activate_mapping-avoid-use-after-free-when-affinity-.patch +Patch6009: backport-0002-activate_mapping-make-sure-to-catch-all-errors.patch +Patch6010: backport-0003-activate_mapping-report-error-reason.patch +Patch6011: backport-0004-activate_mapping-only-blacklist-irq-if-error-is-cons.patch +Patch6012: backport-0005-activate_mapping-avoid-logging-error-when-there-is-n.patch %description Irqbalance is a daemon to help balance the cpu load generated by @@ -86,6 +91,12 @@ fi /sbin/chkconfig --del %{name} >/dev/null 2>&1 || : %changelog +* Sun Oct 29 2023 volcanodragon - 3:1.8.0-10 +- Type:bugfix +- ID:NA +- SUG:restart +- DESC: fix activate_mapping bug + * Thu Dec 8 2022 qinyu - 3:1.8.0-9 - Type:bugfix - ID:NA