httpd/backport-Handle-children-killed-pathologically.patch
chengyechun b981a203d9 change default value of HeartbeatMaxServers
fix setting and comparison of IPs fileds
avoid voerflow in case of indecently large session
make ap_escape_quotes() work correctly
fix lua_request with cast first
Handle children killed pathologically
q
2022-12-14 10:54:40 +08:00

109 lines
4.6 KiB
Diff

From 5f33010a643ac7c67b7733484797d41366e328ecdb Mon Sep 17 00:00:00 2001
From: icing <icing@apache.org>
Date: Tue, 30 Aug 2022 14:47:19 +0800
Subject: [PATCH] Handle children killed pathologically
Conflict:NA
Reference:https://github.com/apache/httpd/commit/5f3010a643ac7c67b733484797d41366e328ecdb
---
server/mpm/event/event.c | 26 +++++++++++++++++++++++---
server/mpm/worker/worker.c | 26 +++++++++++++++++++++++---
2 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index dddff35..5969c88 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -2983,6 +2983,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
static void server_main_loop(int remaining_children_to_start, int num_buckets)
{
+ int successive_kills = 0;
int child_slot;
apr_exit_why_e exitwhy;
int status, processed_status;
@@ -3072,11 +3073,30 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
/* Don't perform idle maintenance when a child dies,
* only do it when there's a timeout. Remember only a
* finite number of children can die, and it's pretty
- * pathological for a lot to die suddenly.
+ * pathological for a lot to die suddenly. If a child is
+ * killed by a signal (faulting) we want to restart it ASAP
+ * though, up to 3 successive faults or we stop this until
+ * a timeout happens again (to avoid the flood of fork()ed
+ * process that keep being killed early).
*/
- continue;
+ if (child_slot < 0 || !APR_PROC_CHECK_SIGNALED(exitwhy)) {
+ continue;
+ }
+ if (++successive_kills >= 3) {
+ if (successive_kills % 10 == 3) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
+ ap_server_conf, APLOGNO(10392)
+ "children are killed successively!");
+ }
+ continue;
+ }
+ ++remaining_children_to_start;
+ }
+ else {
+ successive_kills = 0;
}
- else if (remaining_children_to_start) {
+
+ if (remaining_children_to_start) {
/* we hit a 1 second timeout in which none of the previous
* generation of children needed to be reaped... so assume
* they're all done, and pick up the slack if any is left.
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index bd56f61..30d5aeb 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -1569,6 +1569,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
static void server_main_loop(int remaining_children_to_start, int num_buckets)
{
+ int successive_kills = 0;
ap_generation_t old_gen;
int child_slot;
apr_exit_why_e exitwhy;
@@ -1663,11 +1664,30 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
/* Don't perform idle maintenance when a child dies,
* only do it when there's a timeout. Remember only a
* finite number of children can die, and it's pretty
- * pathological for a lot to die suddenly.
+ * pathological for a lot to die suddenly. If a child is
+ * killed by a signal (faulting) we want to restart if ASAP
+ * though, up to 3 successive faults or we stop this until
+ * a timeout happens again (to avoid the flood of fork()ed
+ * processes that keep being killed early).
*/
- continue;
+ if (child_slot < 0 || !APR_PROC_CHECK_SIGNALED(exitwhy)) {
+ continue;
+ }
+ if (++successive_kills >= 3) {
+ if (successive_kills % 10 == 3) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
+ ap_server_conf, APLOGNO(10392)
+ "children are killed successively!");
+ }
+ continue;
+ }
+ ++remaining_children_to_start;
+ }
+ else {
+ successive_kills = 0;
}
- else if (remaining_children_to_start) {
+
+ if (remaining_children_to_start) {
/* we hit a 1 second timeout in which none of the previous
* generation of children needed to be reaped... so assume
* they're all done, and pick up the slack if any is left.
--
2.23.0