Merge branch 'oom-score-adj' into 'master'

Fix handling of /proc/self/oom_score_adj on Linux

Closes #374 and #378

See merge request dbus/dbus!255
This commit is contained in:
Simon McVittie 2022-02-23 12:29:22 +00:00
commit 10d39a649f
4 changed files with 131 additions and 20 deletions

View file

@ -32,6 +32,7 @@
#include "activation-helper.h"
#include "activation-exit-codes.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -43,6 +44,7 @@
#include <dbus/dbus-misc.h>
#include <dbus/dbus-shell.h>
#include <dbus/dbus-marshal-validate.h>
#include <dbus/dbus-sysdeps-unix.h>
static BusDesktopFile *
desktop_file_for_name (BusConfigParser *parser,
@ -337,11 +339,17 @@ exec_for_correct_user (char *exec, char *user, DBusError *error)
char **argv;
int argc;
dbus_bool_t retval;
const char *error_str = NULL;
argc = 0;
retval = TRUE;
argv = NULL;
/* Resetting the OOM score adjustment is best-effort, so we don't
* treat a failure to do so as fatal. */
if (!_dbus_reset_oom_score_adj (&error_str))
_dbus_warn ("%s: %s", error_str, strerror (errno));
if (!switch_user (user, error))
return FALSE;

View file

@ -1443,27 +1443,13 @@ _dbus_spawn_async_with_babysitter (DBusBabysitter **sitter_p,
_dbus_assert_not_reached ("Got to code after write_err_and_exit()");
}
else if (grandchild_pid == 0)
{
#ifdef __linux__
int fd = -1;
{
/* This might not succeed in a dbus-daemon that started as root
* and dropped privileges, so don't log an error on failure.
* (Also, we can't safely log errors here anyway, because logging
* is not async-signal safe). */
_dbus_reset_oom_score_adj (NULL);
#ifdef O_CLOEXEC
fd = open ("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC);
#endif
if (fd < 0)
{
fd = open ("/proc/self/oom_score_adj", O_WRONLY);
_dbus_fd_set_close_on_exec (fd);
}
if (fd >= 0)
{
if (write (fd, "0", sizeof (char)) < 0)
_dbus_warn ("writing oom_score_adj error: %s", strerror (errno));
_dbus_close (fd, NULL);
}
#endif
/* Go back to ignoring SIGPIPE, since it's evil
*/
signal (SIGPIPE, SIG_IGN);

View file

@ -174,6 +174,8 @@ typedef void (* DBusSignalHandler) (int sig);
void _dbus_set_signal_handler (int sig,
DBusSignalHandler handler);
dbus_bool_t _dbus_reset_oom_score_adj (const char **error_str_p);
/** @} */
DBUS_END_DECLS

View file

@ -1603,3 +1603,118 @@ _dbus_daemon_report_stopping (void)
sd_notify (0, "STOPPING=1");
#endif
}
/**
* If the current process has been protected from the Linux OOM killer
* (the oom_score_adj process parameter is negative), reset it to the
* default level of protection from the OOM killer (set oom_score_adj
* to zero).
*
* This function does not use DBusError, to avoid calling malloc(), so
* that it can be used in contexts where an async-signal-safe function
* is required (for example after fork()). Instead, on failure it sets
* errno and returns something like "Failed to open /dev/null" in
* *error_str_p. Callers are expected to combine *error_str_p
* with _dbus_strerror (errno) to get a full error report.
*/
dbus_bool_t
_dbus_reset_oom_score_adj (const char **error_str_p)
{
#ifdef __linux__
int fd = -1;
dbus_bool_t ret = FALSE;
int saved_errno = 0;
const char *error_str = NULL;
#ifdef O_CLOEXEC
fd = open ("/proc/self/oom_score_adj", O_RDWR | O_CLOEXEC);
#endif
if (fd < 0)
{
fd = open ("/proc/self/oom_score_adj", O_RDWR);
_dbus_fd_set_close_on_exec (fd);
}
if (fd >= 0)
{
ssize_t read_result = -1;
/* It doesn't actually matter whether we read the whole file,
* as long as we get the presence or absence of the minus sign */
char first_char = '\0';
read_result = read (fd, &first_char, 1);
if (read_result < 0)
{
/* This probably can't actually happen in practice: if we can
* open it, then we can hopefully read from it */
ret = FALSE;
error_str = "failed to read from /proc/self/oom_score_adj";
saved_errno = errno;
goto out;
}
/* If we are running with protection from the OOM killer
* (typical for the system dbus-daemon under systemd), then
* oom_score_adj will be negative. Drop that protection,
* returning to oom_score_adj = 0.
*
* Conversely, if we are running with increased susceptibility
* to the OOM killer (as user sessions typically do in
* systemd >= 250), oom_score_adj will be strictly positive,
* and we are not allowed to decrease it to 0 without privileges.
*
* If it's exactly 0 (typical for non-systemd systems, and
* user processes on older systemd) then there's no need to
* alter it.
*
* We shouldn't get an empty result, but if we do, assume it
* means zero and don't try to change it. */
if (read_result == 0 || first_char != '-')
{
/* Nothing needs to be done: the OOM score adjustment is
* non-negative */
ret = TRUE;
goto out;
}
if (pwrite (fd, "0", sizeof (char), 0) < 0)
{
ret = FALSE;
error_str = "writing oom_score_adj error";
saved_errno = errno;
goto out;
}
/* Success */
ret = TRUE;
}
else if (errno == ENOENT)
{
/* If /proc/self/oom_score_adj doesn't exist, assume the kernel
* doesn't support this feature and ignore it. */
ret = TRUE;
}
else
{
ret = FALSE;
error_str = "open(/proc/self/oom_score_adj)";
saved_errno = errno;
goto out;
}
out:
if (fd >= 0)
_dbus_close (fd, NULL);
if (error_str_p != NULL)
*error_str_p = error_str;
errno = saved_errno;
return ret;
#else
/* nothing to do on this platform */
return TRUE;
#endif
}