131 lines
5.7 KiB
Diff
131 lines
5.7 KiB
Diff
From 4981dd7c8771e83d625ee42dec709c5504cbad80 Mon Sep 17 00:00:00 2001
|
|
From: Lennart Poettering <lennart@poettering.net>
|
|
Date: Tue, 30 May 2023 15:33:59 +0200
|
|
Subject: [PATCH] pid1: when taking possession of passed fds check O_CLOEXEC
|
|
state first
|
|
|
|
So here's the thing. One library we use (libselinux) is opening fds
|
|
behind our back when we initialize it and keeps it open. On the other
|
|
hand we want to automatically pick up all fds passed in to us, so that
|
|
we can distribute them to our services and close the rest. We pick them
|
|
up very early in our code, to ensure that we don't get confused by open
|
|
fds at that point. Except that libselinux insists on being initialized
|
|
even earlier. So suddenly we might take possession of libselinux' fds,
|
|
and then close them later when we decide no service wants them. Then
|
|
during shutdown we close down selinux and selinux closes its fds, but
|
|
since already closed long ago this ight close our fds instead. Hilarity
|
|
ensues.
|
|
|
|
I wish low-level software wouldn't do such things behind our back, but
|
|
well, let's make the best of it.
|
|
|
|
This changes the fd pick-up logic to only pick up fds that have
|
|
O_CLOEXEC unset. O_CLOEXEC must be unset for any fds passed in to us
|
|
over execve() after all. And for all our own fds we should set O_CLOEXEC
|
|
since we generally don't want to litter fd tables for execve(). Also,
|
|
libselinux thankfully appears to set O_CLOEXEC correctly on its fds,
|
|
hence the filter works.
|
|
|
|
Fixes: #27491
|
|
|
|
(cherry picked from commit eb564f928e401def8d3aaa2a90f33cb09cdc1517)
|
|
Backport of the cloexec filter for v253, and for v252 (actually tested
|
|
with v252). Note that I've left the name _s of the function parameter as
|
|
it was before.
|
|
(cherry picked from commit 88bf6b5815d81cb6d29e9a41f752c70584fac062)
|
|
(cherry picked from commit 4dd3f8934a51d065d2b3df6d368989f9610a37b5)
|
|
|
|
Conflict:code context adaptation
|
|
Reference:https://github.com/systemd/systemd-stable/commit/4981dd7c8771e83d625ee42dec709c5504cbad80
|
|
---
|
|
src/core/main.c | 16 ++++++++++++----
|
|
src/shared/fdset.c | 18 +++++++++++++++++-
|
|
src/shared/fdset.h | 2 +-
|
|
3 files changed, 30 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/src/core/main.c b/src/core/main.c
|
|
index 46d6968b6b..9934bda1f3 100644
|
|
--- a/src/core/main.c
|
|
+++ b/src/core/main.c
|
|
@@ -2578,16 +2578,24 @@ static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
|
|
assert(ret_fds);
|
|
assert(ret_error_message);
|
|
|
|
- r = fdset_new_fill(ret_fds);
|
|
+ /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
|
|
+ * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
|
|
+ * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
|
|
+ * mechanism to distinguish passed in fds from our own.
|
|
+ *
|
|
+ * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
|
|
+ * process behind our back. We should not take possession of that (and then accidentally close
|
|
+ * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
|
|
+ r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds);
|
|
if (r < 0) {
|
|
*ret_error_message = "Failed to allocate fd set";
|
|
return log_emergency_errno(r, "Failed to allocate fd set: %m");
|
|
}
|
|
|
|
- fdset_cloexec(*ret_fds, true);
|
|
+ (void) fdset_cloexec(*ret_fds, true);
|
|
|
|
- if (arg_serialization)
|
|
- assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
|
|
+ /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
|
|
+ assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization)));
|
|
|
|
return 0;
|
|
}
|
|
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
|
|
index c621c14ba6..6f40c6aa0d 100644
|
|
--- a/src/shared/fdset.c
|
|
+++ b/src/shared/fdset.c
|
|
@@ -124,7 +124,9 @@ int fdset_remove(FDSet *s, int fd) {
|
|
return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
|
|
}
|
|
|
|
-int fdset_new_fill(FDSet **_s) {
|
|
+int fdset_new_fill(
|
|
+ int filter_cloexec, /* if < 0 takes all fds, otherwise only those with O_CLOEXEC set (1) or unset (0) */
|
|
+ FDSet **_s) {
|
|
_cleanup_closedir_ DIR *d = NULL;
|
|
struct dirent *de;
|
|
int r = 0;
|
|
@@ -157,6 +159,20 @@ int fdset_new_fill(FDSet **_s) {
|
|
if (fd == dirfd(d))
|
|
continue;
|
|
|
|
+ if (filter_cloexec >= 0) {
|
|
+ int fl;
|
|
+
|
|
+ /* If user asked for that filter by O_CLOEXEC. This is useful so that fds that have
|
|
+ * been passed in can be collected and fds which have been created locally can be
|
|
+ * ignored, under the assumption that only the latter have O_CLOEXEC set. */
|
|
+ fl = fcntl(fd, F_GETFD);
|
|
+ if (fl < 0)
|
|
+ return -errno;
|
|
+
|
|
+ if (FLAGS_SET(fl, FD_CLOEXEC) != !!filter_cloexec)
|
|
+ continue;
|
|
+ }
|
|
+
|
|
r = fdset_put(s, fd);
|
|
if (r < 0)
|
|
goto finish;
|
|
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
|
|
index 39d15ee4aa..e8a6b4869d 100644
|
|
--- a/src/shared/fdset.h
|
|
+++ b/src/shared/fdset.h
|
|
@@ -19,7 +19,7 @@ bool fdset_contains(FDSet *s, int fd);
|
|
int fdset_remove(FDSet *s, int fd);
|
|
|
|
int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
|
|
-int fdset_new_fill(FDSet **ret);
|
|
+int fdset_new_fill(int filter_cloexec, FDSet **ret);
|
|
int fdset_new_listen_fds(FDSet **ret, bool unset);
|
|
|
|
int fdset_cloexec(FDSet *fds, bool b);
|
|
--
|
|
2.33.0
|
|
|