core: fix nm_utils_kill_process_sync() not to hang for a zombie process

kill(pid,sig) can return success for zombie processes. This
caused nm_utils_kill_process_sync() to hang indefinitely.

Fix it by also checking the process state.

(cherry picked from commit 69c98a336e)
This commit is contained in:
Thomas Haller 2015-06-30 12:28:29 +02:00
parent dfff2265d3
commit c26294d6b0
4 changed files with 37 additions and 7 deletions

View file

@ -303,6 +303,7 @@ nm_utils_modprobe (GError **error, gboolean suppress_error_logging, const char *
/**
* nm_utils_get_start_time_for_pid:
* @pid: the process identifier
* @out_state: return the state character, like R, S, Z. See `man 5 proc`.
*
* Originally copied from polkit source (src/polkit/polkitunixprocess.c)
* and adjusted.
@ -313,7 +314,7 @@ nm_utils_modprobe (GError **error, gboolean suppress_error_logging, const char *
* The returned start time counts since boot, in the unit HZ (with HZ usually being (1/100) seconds)
**/
guint64
nm_utils_get_start_time_for_pid (pid_t pid)
nm_utils_get_start_time_for_pid (pid_t pid, char *out_state)
{
guint64 start_time;
gchar *filename;
@ -323,6 +324,7 @@ nm_utils_get_start_time_for_pid (pid_t pid)
guint num_tokens;
gchar *p;
gchar *endp;
char state = '\0';
start_time = 0;
contents = NULL;
@ -345,6 +347,8 @@ nm_utils_get_start_time_for_pid (pid_t pid)
if (p - contents >= (int) length)
goto out;
state = p[0];
tokens = g_strsplit (p, " ", 0);
num_tokens = g_strv_length (tokens);
@ -359,6 +363,9 @@ nm_utils_get_start_time_for_pid (pid_t pid)
g_strfreev (tokens);
out:
if (out_state)
*out_state = state;
g_free (filename);
g_free (contents);
@ -813,7 +820,7 @@ out:
* Set to zero, to use the default (meaning 20 wakeups per seconds).
*
* Kill a non-child process synchronously and wait. This function will not return before the
* process with PID @pid is gone.
* process with PID @pid is gone or the process is a zombie.
**/
void
nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_domain,
@ -827,12 +834,13 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_
int loop_count = 0;
gboolean was_waiting = FALSE;
char buf_wait[KC_WAITED_TO_STRING];
char p_state;
g_return_if_fail (pid > 0);
g_return_if_fail (log_name != NULL);
g_return_if_fail (wait_before_kill_msec > 0);
start_time0 = nm_utils_get_start_time_for_pid (pid);
start_time0 = nm_utils_get_start_time_for_pid (pid, &p_state);
if (start_time0 == 0) {
nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": cannot kill process %ld because it seems already gone",
LOG_NAME_ARGS, (long int) pid);
@ -844,6 +852,17 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_
return;
}
switch (p_state) {
case 'Z':
case 'x':
case 'X':
nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": cannot kill process %ld because it is already a zombie (%c)",
LOG_NAME_ARGS, (long int) pid, p_state);
return;
default:
break;
}
if (kill (pid, sig) != 0) {
errsv = errno;
/* ESRCH means, process does not exist or is already a zombie. */
@ -865,7 +884,7 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_
wait_until = wait_start_us + (((gint64) wait_before_kill_msec) * 1000L);
while (TRUE) {
start_time = nm_utils_get_start_time_for_pid (pid);
start_time = nm_utils_get_start_time_for_pid (pid, &p_state);
if (start_time != start_time0) {
nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": process is gone after sending signal %s%s",
@ -873,6 +892,17 @@ nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_
was_waiting ? _kc_waited_to_string (buf_wait, wait_start_us) : "");
return;
}
switch (p_state) {
case 'Z':
case 'x':
case 'X':
nm_log_dbg (log_domain, LOG_NAME_PROCESS_FMT ": process is a zombie (%c) after sending signal %s%s",
LOG_NAME_ARGS, p_state, _kc_signal_to_string (sig),
was_waiting ? _kc_waited_to_string (buf_wait, wait_start_us) : "");
return;
default:
break;
}
if (kill (pid, 0) != 0) {
errsv = errno;

View file

@ -69,7 +69,7 @@ str_if_set (const char *str, const char *fallback)
return str ? str : fallback;
}
guint64 nm_utils_get_start_time_for_pid (pid_t pid);
guint64 nm_utils_get_start_time_for_pid (pid_t pid, char *out_state);
void nm_utils_kill_process_sync (pid_t pid, guint64 start_time, int sig, guint64 log_domain,
const char *log_name, guint32 wait_before_kill_msec,

View file

@ -592,7 +592,7 @@ nm_dhcp_client_stop_existing (const char *pid_file, const char *binary_name)
const char *exe;
/* Ensure the process is a DHCP client */
start_time = nm_utils_get_start_time_for_pid (tmp);
start_time = nm_utils_get_start_time_for_pid (tmp, NULL);
proc_path = g_strdup_printf ("/proc/%ld/cmdline", tmp);
if ( start_time
&& g_file_get_contents (proc_path, &proc_contents, NULL, NULL)) {

View file

@ -360,7 +360,7 @@ constructed (GObject *object)
if (!priv->unix_process.dbus_sender || !*priv->unix_process.dbus_sender)
break;
priv->unix_process.start_time = nm_utils_get_start_time_for_pid (priv->unix_process.pid);
priv->unix_process.start_time = nm_utils_get_start_time_for_pid (priv->unix_process.pid, NULL);
if (!priv->unix_process.start_time) {
/* could not detect the process start time. The subject is invalid, but don't