From c26294d6b081be24549d0c494261e34db93eae86 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 30 Jun 2015 12:28:29 +0200 Subject: [PATCH] core: fix nm_utils_kill_process_sync() not to hang for a zombie process kill(pid,sig) can return success for zombie processes. This caused nm_utils_kill_process_sync() to hang indefinitely. Fix it by also checking the process state. (cherry picked from commit 69c98a336e32935d1032160c1ab2f8ea597a8342) --- src/NetworkManagerUtils.c | 38 +++++++++++++++++++++++++++---- src/NetworkManagerUtils.h | 2 +- src/dhcp-manager/nm-dhcp-client.c | 2 +- src/nm-auth-subject.c | 2 +- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/NetworkManagerUtils.c b/src/NetworkManagerUtils.c index 7001bd936c..a4aa0e3699 100644 --- a/src/NetworkManagerUtils.c +++ b/src/NetworkManagerUtils.c @@ -303,6 +303,7 @@ nm_utils_modprobe (GError **error, gboolean suppress_error_logging, const char * /** * nm_utils_get_start_time_for_pid: * @pid: the process identifier + * @out_state: return the state character, like R, S, Z. See `man 5 proc`. * * Originally copied from polkit source (src/polkit/polkitunixprocess.c) * and adjusted. @@ -313,7 +314,7 @@ nm_utils_modprobe (GError **error, gboolean suppress_error_logging, const char * * The returned start time counts since boot, in the unit HZ (with HZ usually being (1/100) seconds) **/ guint64 -nm_utils_get_start_time_for_pid (pid_t pid) +nm_utils_get_start_time_for_pid (pid_t pid, char *out_state) { guint64 start_time; gchar *filename; @@ -323,6 +324,7 @@ nm_utils_get_start_time_for_pid (pid_t pid) guint num_tokens; gchar *p; gchar *endp; + char state = '\0'; start_time = 0; contents = NULL; @@ -345,6 +347,8 @@ nm_utils_get_start_time_for_pid (pid_t pid) if (p - contents >= (int) length) goto out; + state = p[0]; + tokens = g_strsplit (p, " ", 0); num_tokens = g_strv_length (tokens); @@ -359,6 +363,9 @@ nm_utils_get_start_time_for_pid (pid_t pid) g_strfreev (tokens); out: + if (out_state) + *out_state = state; + g_free (filename); g_free (contents); @@ -813,7 +820,7 @@ out: * Set to zero, to use the default (meaning 20 wakeups per seconds). * * Kill a non-child process synchronously and wait. This function will not return before the - * process with PID @pid is gone. + * process with PID @pid is gone or the process is a zombie. **/ void nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_domain, @@ -827,12 +834,13 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_ int loop_count = 0; gboolean was_waiting = FALSE; char buf_wait[KC_WAITED_TO_STRING]; + char p_state; g_return_if_fail (pid > 0); g_return_if_fail (log_name != NULL); g_return_if_fail (wait_before_kill_msec > 0); - start_time0 = nm_utils_get_start_time_for_pid (pid); + start_time0 = nm_utils_get_start_time_for_pid (pid, &p_state); if (start_time0 == 0) { nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": cannot kill process %ld because it seems already gone", LOG_NAME_ARGS, (long int) pid); @@ -844,6 +852,17 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_ return; } + switch (p_state) { + case 'Z': + case 'x': + case 'X': + nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": cannot kill process %ld because it is already a zombie (%c)", + LOG_NAME_ARGS, (long int) pid, p_state); + return; + default: + break; + } + if (kill (pid, sig) != 0) { errsv = errno; /* ESRCH means, process does not exist or is already a zombie. */ @@ -865,7 +884,7 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_ wait_until = wait_start_us + (((gint64) wait_before_kill_msec) * 1000L); while (TRUE) { - start_time = nm_utils_get_start_time_for_pid (pid); + start_time = nm_utils_get_start_time_for_pid (pid, &p_state); if (start_time != start_time0) { nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": process is gone after sending signal %s%s", @@ -873,6 +892,17 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_ was_waiting ? _kc_waited_to_string (buf_wait, wait_start_us) : ""); return; } + switch (p_state) { + case 'Z': + case 'x': + case 'X': + nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": process is a zombie (%c) after sending signal %s%s", + LOG_NAME_ARGS, p_state, _kc_signal_to_string (sig), + was_waiting ? _kc_waited_to_string (buf_wait, wait_start_us) : ""); + return; + default: + break; + } if (kill (pid, 0) != 0) { errsv = errno; diff --git a/src/NetworkManagerUtils.h b/src/NetworkManagerUtils.h index a9fc2fcd7c..089ffab280 100644 --- a/src/NetworkManagerUtils.h +++ b/src/NetworkManagerUtils.h @@ -69,7 +69,7 @@ str_if_set (const char *str, const char *fallback) return str ? str : fallback; } -guint64 nm_utils_get_start_time_for_pid (pid_t pid); +guint64 nm_utils_get_start_time_for_pid (pid_t pid, char *out_state); void nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_domain, const char *log_name, guint32 wait_before_kill_msec, diff --git a/src/dhcp-manager/nm-dhcp-client.c b/src/dhcp-manager/nm-dhcp-client.c index a39ffcca0d..9aa4809671 100644 --- a/src/dhcp-manager/nm-dhcp-client.c +++ b/src/dhcp-manager/nm-dhcp-client.c @@ -592,7 +592,7 @@ nm_dhcp_client_stop_existing (const char *pid_file, const char *binary_name) const char *exe; /* Ensure the process is a DHCP client */ - start_time = nm_utils_get_start_time_for_pid (tmp); + start_time = nm_utils_get_start_time_for_pid (tmp, NULL); proc_path = g_strdup_printf ("/proc/%ld/cmdline", tmp); if ( start_time && g_file_get_contents (proc_path, &proc_contents, NULL, NULL)) { diff --git a/src/nm-auth-subject.c b/src/nm-auth-subject.c index 13133f2777..3fc0e64a1d 100644 --- a/src/nm-auth-subject.c +++ b/src/nm-auth-subject.c @@ -360,7 +360,7 @@ constructed (GObject *object) if (!priv->unix_process.dbus_sender || !*priv->unix_process.dbus_sender) break; - priv->unix_process.start_time = nm_utils_get_start_time_for_pid (priv->unix_process.pid); + priv->unix_process.start_time = nm_utils_get_start_time_for_pid (priv->unix_process.pid, NULL); if (!priv->unix_process.start_time) { /* could not detect the process start time. The subject is invalid, but don't