From 715a1a920d06f5c7ee66629691ff892d7870ff04 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 15:29:46 +0000 Subject: [PATCH 1/6] spawn-unix: Correct indentation Signed-off-by: Simon McVittie --- dbus/dbus-spawn-unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbus/dbus-spawn-unix.c b/dbus/dbus-spawn-unix.c index 5550ffac..6a888eb5 100644 --- a/dbus/dbus-spawn-unix.c +++ b/dbus/dbus-spawn-unix.c @@ -1443,7 +1443,7 @@ _dbus_spawn_async_with_babysitter (DBusBabysitter **sitter_p, _dbus_assert_not_reached ("Got to code after write_err_and_exit()"); } else if (grandchild_pid == 0) - { + { #ifdef __linux__ int fd = -1; From f3ffe9a873708c679df88b9fc12b6b831539cf8a Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 15:41:41 +0000 Subject: [PATCH 2/6] sysdeps-unix: Factor out _dbus_reset_oom_score_adj Signed-off-by: Simon McVittie --- dbus/dbus-spawn-unix.c | 20 +++-------- dbus/dbus-sysdeps-unix.h | 2 ++ dbus/dbus-sysdeps-util-unix.c | 65 +++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/dbus/dbus-spawn-unix.c b/dbus/dbus-spawn-unix.c index 6a888eb5..eecc3219 100644 --- a/dbus/dbus-spawn-unix.c +++ b/dbus/dbus-spawn-unix.c @@ -1444,26 +1444,14 @@ _dbus_spawn_async_with_babysitter (DBusBabysitter **sitter_p, } else if (grandchild_pid == 0) { -#ifdef __linux__ - int fd = -1; + const char *error_str = NULL; -#ifdef O_CLOEXEC - fd = open ("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); -#endif - - if (fd < 0) + if (!_dbus_reset_oom_score_adj (&error_str)) { - fd = open ("/proc/self/oom_score_adj", O_WRONLY); - _dbus_fd_set_close_on_exec (fd); + /* TODO: Strictly speaking, this is not async-signal-safe. */ + _dbus_warn ("%s: %s", error_str, strerror (errno)); } - if (fd >= 0) - { - if (write (fd, "0", sizeof (char)) < 0) - _dbus_warn ("writing oom_score_adj error: %s", strerror (errno)); - _dbus_close (fd, NULL); - } -#endif /* Go back to ignoring SIGPIPE, since it's evil */ signal (SIGPIPE, SIG_IGN); diff --git a/dbus/dbus-sysdeps-unix.h b/dbus/dbus-sysdeps-unix.h index e86de6d9..a051b542 100644 --- a/dbus/dbus-sysdeps-unix.h +++ b/dbus/dbus-sysdeps-unix.h @@ -174,6 +174,8 @@ typedef void (* DBusSignalHandler) (int sig); void _dbus_set_signal_handler (int sig, DBusSignalHandler handler); +dbus_bool_t _dbus_reset_oom_score_adj (const char **error_str_p); + /** @} */ DBUS_END_DECLS diff --git a/dbus/dbus-sysdeps-util-unix.c b/dbus/dbus-sysdeps-util-unix.c index 878ca2ea..799c6892 100644 --- a/dbus/dbus-sysdeps-util-unix.c +++ b/dbus/dbus-sysdeps-util-unix.c @@ -1603,3 +1603,68 @@ _dbus_daemon_report_stopping (void) sd_notify (0, "STOPPING=1"); #endif } + +/** + * If the current process has been protected from the Linux OOM killer + * (the oom_score_adj process parameter is negative), reset it to the + * default level of protection from the OOM killer (set oom_score_adj + * to zero). + * + * This function does not use DBusError, to avoid calling malloc(), so + * that it can be used in contexts where an async-signal-safe function + * is required (for example after fork()). Instead, on failure it sets + * errno and returns something like "Failed to open /dev/null" in + * *error_str_p. Callers are expected to combine *error_str_p + * with _dbus_strerror (errno) to get a full error report. + */ +dbus_bool_t +_dbus_reset_oom_score_adj (const char **error_str_p) +{ +#ifdef __linux__ + int fd = -1; + dbus_bool_t ret = FALSE; + int saved_errno = 0; + const char *error_str = NULL; + +#ifdef O_CLOEXEC + fd = open ("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); +#endif + + if (fd < 0) + { + fd = open ("/proc/self/oom_score_adj", O_WRONLY); + _dbus_fd_set_close_on_exec (fd); + } + + if (fd >= 0) + { + if (write (fd, "0", sizeof (char)) < 0) + { + ret = FALSE; + error_str = "writing oom_score_adj error"; + saved_errno = errno; + } + else + { + ret = TRUE; + } + + _dbus_close (fd, NULL); + } + else + { + /* TODO: Historically we ignored this error, although ideally we + * would diagnose it */ + ret = TRUE; + } + + if (error_str_p != NULL) + *error_str_p = error_str; + + errno = saved_errno; + return ret; +#else + /* nothing to do on this platform */ + return TRUE; +#endif +} From c42bb64457c3b31e561ad9885c618e051af1171a Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 15:53:38 +0000 Subject: [PATCH 3/6] spawn-unix: On Linux, don't try to increase OOM-killer protection The oom_score_adj parameter is a signed integer, with increasingly positive values being more likely to be killed by the OOM-killer, and increasingly negative values being less likely. Previously, we assumed that oom_score_adj would be negative or zero, and reset it to zero, which does not require privileges because it meant we're voluntarily giving up our OOM-killer protection. In particular, bus/dbus.service.in has OOMScoreAdjust=-900, which we don't want system services to inherit. However, systemd >= 250 has started putting a positive oom_score_adj on user processes, to make it more likely that the OOM killer will kill a user process rather than a system process. Changing from a positive oom_score_adj to zero is increasing protection from the OOM-killer, which only a privileged process is allowed to do, resulting in warnings whenever we carry out traditional (non-systemd) service activation on the session bus. To avoid this, do the equivalent of: if (oom_score_adj < 0) oom_score_adj = 0; which is always allowed. Resolves: https://gitlab.freedesktop.org/dbus/dbus/-/issues/374 Signed-off-by: Simon McVittie --- dbus/dbus-sysdeps-util-unix.c | 59 ++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/dbus/dbus-sysdeps-util-unix.c b/dbus/dbus-sysdeps-util-unix.c index 799c6892..397ed20a 100644 --- a/dbus/dbus-sysdeps-util-unix.c +++ b/dbus/dbus-sysdeps-util-unix.c @@ -1627,29 +1627,68 @@ _dbus_reset_oom_score_adj (const char **error_str_p) const char *error_str = NULL; #ifdef O_CLOEXEC - fd = open ("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); + fd = open ("/proc/self/oom_score_adj", O_RDWR | O_CLOEXEC); #endif if (fd < 0) { - fd = open ("/proc/self/oom_score_adj", O_WRONLY); + fd = open ("/proc/self/oom_score_adj", O_RDWR); _dbus_fd_set_close_on_exec (fd); } if (fd >= 0) { - if (write (fd, "0", sizeof (char)) < 0) + ssize_t read_result = -1; + /* It doesn't actually matter whether we read the whole file, + * as long as we get the presence or absence of the minus sign */ + char first_char = '\0'; + + read_result = read (fd, &first_char, 1); + + if (read_result < 0) + { + /* This probably can't actually happen in practice: if we can + * open it, then we can hopefully read from it */ + ret = FALSE; + error_str = "failed to read from /proc/self/oom_score_adj"; + saved_errno = errno; + goto out; + } + + /* If we are running with protection from the OOM killer + * (typical for the system dbus-daemon under systemd), then + * oom_score_adj will be negative. Drop that protection, + * returning to oom_score_adj = 0. + * + * Conversely, if we are running with increased susceptibility + * to the OOM killer (as user sessions typically do in + * systemd >= 250), oom_score_adj will be strictly positive, + * and we are not allowed to decrease it to 0 without privileges. + * + * If it's exactly 0 (typical for non-systemd systems, and + * user processes on older systemd) then there's no need to + * alter it. + * + * We shouldn't get an empty result, but if we do, assume it + * means zero and don't try to change it. */ + if (read_result == 0 || first_char != '-') + { + /* Nothing needs to be done: the OOM score adjustment is + * non-negative */ + ret = TRUE; + goto out; + } + + if (pwrite (fd, "0", sizeof (char), 0) < 0) { ret = FALSE; error_str = "writing oom_score_adj error"; saved_errno = errno; - } - else - { - ret = TRUE; + goto out; } - _dbus_close (fd, NULL); + /* Success */ + ret = TRUE; } else { @@ -1658,6 +1697,10 @@ _dbus_reset_oom_score_adj (const char **error_str_p) ret = TRUE; } +out: + if (fd >= 0) + _dbus_close (fd, NULL); + if (error_str_p != NULL) *error_str_p = error_str; From 2efb462466d628d47d7f80c5a8e864a62b6154cc Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 16:00:42 +0000 Subject: [PATCH 4/6] dbus-daemon-launch-helper: Reset Linux OOM score adjustment here Previously, we were relying on the system bus being able to reset its OOM score adjustment after it forks, but before it execs the dbus-daemon-launch-helper. However, it can't actually do that (leading to dbus#378), because the system bus typically starts as root, uses its root privileges to adjust resource limits, and then drops privileges to the `@DBUS_USER@`, typically `dbus` or `messagebus`. This leaves the pseudo-files in /proc for its process parameters owned by root, and the `@DBUS_USER@` is not allowed to open them for writing. The dbus-daemon-launch-helper is setuid root, so it can certainly alter its OOM score adjustment before exec'ing the actual activated service. We need to do this before dropping privileges, because after dropping privileges we would be unable to write to this process parameter. This is a non-async-signal-safe context, so we can safely log errors here, unlike the fork-and-exec code paths. Resolves: https://gitlab.freedesktop.org/dbus/dbus/-/issues/378 Signed-off-by: Simon McVittie --- bus/activation-helper.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bus/activation-helper.c b/bus/activation-helper.c index 8172b6cf..8a4fd732 100644 --- a/bus/activation-helper.c +++ b/bus/activation-helper.c @@ -32,6 +32,7 @@ #include "activation-helper.h" #include "activation-exit-codes.h" +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include static BusDesktopFile * desktop_file_for_name (BusConfigParser *parser, @@ -337,11 +339,17 @@ exec_for_correct_user (char *exec, char *user, DBusError *error) char **argv; int argc; dbus_bool_t retval; + const char *error_str = NULL; argc = 0; retval = TRUE; argv = NULL; + /* Resetting the OOM score adjustment is best-effort, so we don't + * treat a failure to do so as fatal. */ + if (!_dbus_reset_oom_score_adj (&error_str)) + _dbus_warn ("%s: %s", error_str, strerror (errno)); + if (!switch_user (user, error)) return FALSE; From 7ee72a27957be5d3436beaa02ccd01b9ce042962 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 16:02:13 +0000 Subject: [PATCH 5/6] spawn-unix: Don't log an error if unable to reset Linux OOM score We cannot safely log between fork() and exec() because it isn't an async-signal-safe operation (in particular it might allocate memory). We also don't want to treat a failure here as a real problem, because it might legitimately not work: in a system dbus-daemon that has dropped privileges from root, the pseudo-file representing this process parameter remains owned by root and cannot be altered by the unprivileged user. For the main use-case for this operation, the system dbus-daemon, we have another opportunity to do this in the dbus-daemon-launch-helper (see the previous commit). Signed-off-by: Simon McVittie --- dbus/dbus-spawn-unix.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dbus/dbus-spawn-unix.c b/dbus/dbus-spawn-unix.c index eecc3219..f3577e70 100644 --- a/dbus/dbus-spawn-unix.c +++ b/dbus/dbus-spawn-unix.c @@ -1444,13 +1444,11 @@ _dbus_spawn_async_with_babysitter (DBusBabysitter **sitter_p, } else if (grandchild_pid == 0) { - const char *error_str = NULL; - - if (!_dbus_reset_oom_score_adj (&error_str)) - { - /* TODO: Strictly speaking, this is not async-signal-safe. */ - _dbus_warn ("%s: %s", error_str, strerror (errno)); - } + /* This might not succeed in a dbus-daemon that started as root + * and dropped privileges, so don't log an error on failure. + * (Also, we can't safely log errors here anyway, because logging + * is not async-signal safe). */ + _dbus_reset_oom_score_adj (NULL); /* Go back to ignoring SIGPIPE, since it's evil */ From 226f24144a4db4898a1f5958293d200b975baee0 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 21 Feb 2022 16:07:26 +0000 Subject: [PATCH 6/6] sysdeps-unix: Diagnose failure to open /proc/self/oom_score_adj Previously, we silently ignored this, but now that we're more careful about the contexts in which we try to reset the OOM score and whether we log failures as a warning, we can let the dbus-daemon-launch-helper show a message if it can't write there. Signed-off-by: Simon McVittie --- dbus/dbus-sysdeps-util-unix.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbus/dbus-sysdeps-util-unix.c b/dbus/dbus-sysdeps-util-unix.c index 397ed20a..314ce64b 100644 --- a/dbus/dbus-sysdeps-util-unix.c +++ b/dbus/dbus-sysdeps-util-unix.c @@ -1690,11 +1690,18 @@ _dbus_reset_oom_score_adj (const char **error_str_p) /* Success */ ret = TRUE; } + else if (errno == ENOENT) + { + /* If /proc/self/oom_score_adj doesn't exist, assume the kernel + * doesn't support this feature and ignore it. */ + ret = TRUE; + } else { - /* TODO: Historically we ignored this error, although ideally we - * would diagnose it */ - ret = TRUE; + ret = FALSE; + error_str = "open(/proc/self/oom_score_adj)"; + saved_errno = errno; + goto out; } out: