From b5875ec79ee245a0576c8f3db252273b5aa1dd33 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 11:02:32 +0200 Subject: [PATCH 1/9] core: use nm_clear_fd() helper in nm_utils_spawn_helper() --- src/core/nm-core-utils.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 2686deef96..4ee0d12322 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -4913,13 +4913,9 @@ helper_info_free(gpointer data) nm_clear_g_source_inst(&info->input_source); nm_clear_g_source_inst(&info->output_source); nm_clear_g_source_inst(&info->error_source); - - if (info->child_stdout != -1) - nm_close(info->child_stdout); - if (info->child_stdin != -1) - nm_close(info->child_stdin); - if (info->child_stderr != -1) - nm_close(info->child_stderr); + nm_clear_fd(&info->child_stdout); + nm_clear_fd(&info->child_stdin); + nm_clear_fd(&info->child_stderr); if (info->pid != -1) { nm_assert(info->pid > 1); @@ -5015,8 +5011,7 @@ helper_have_data(int fd, GIOCondition condition, gpointer user_data) return G_SOURCE_CONTINUE; nm_clear_g_source_inst(&info->input_source); - nm_close(info->child_stdout); - info->child_stdout = -1; + nm_clear_fd(&info->child_stdout); _LOG2T(info, "stdout closed"); @@ -5044,9 +5039,7 @@ helper_have_err_data(int fd, GIOCondition condition, gpointer user_data) return G_SOURCE_CONTINUE; nm_clear_g_source_inst(&info->error_source); - nm_close(info->child_stderr); - info->child_stderr = -1; - + nm_clear_fd(&info->child_stderr); return G_SOURCE_CONTINUE; } From f74109e4b03f365ab72d7ff0035e264f38320e98 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 11:09:23 +0200 Subject: [PATCH 2/9] core: rename nmlog defines in "nm-core-utils.c" Those are related to _NMLOG2() macro. Rename. --- src/core/nm-core-utils.c | 46 +++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 4ee0d12322..19027ef96f 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -4877,25 +4877,25 @@ typedef struct { gsize out_buffer_offset; } HelperInfo; -#define _NMLOG_PREFIX_NAME "helper" -#define _NMLOG_DOMAIN LOGD_CORE -#define _NMLOG2(level, info, ...) \ - G_STMT_START \ - { \ - if (nm_logging_enabled((level), (_NMLOG_DOMAIN))) { \ - HelperInfo *_info = (info); \ - \ - _nm_log((level), \ - (_NMLOG_DOMAIN), \ - 0, \ - NULL, \ - NULL, \ - _NMLOG_PREFIX_NAME "[" NM_HASH_OBFUSCATE_PTR_FMT \ - ",%d]: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \ - NM_HASH_OBFUSCATE_PTR(_info), \ - _info->pid _NM_UTILS_MACRO_REST(__VA_ARGS__)); \ - } \ - } \ +#define _NMLOG2_PREFIX_NAME "helper" +#define _NMLOG2_DOMAIN LOGD_CORE +#define _NMLOG2(level, info, ...) \ + G_STMT_START \ + { \ + if (nm_logging_enabled((level), (_NMLOG2_DOMAIN))) { \ + HelperInfo *_info = (info); \ + \ + _nm_log((level), \ + (_NMLOG2_DOMAIN), \ + 0, \ + NULL, \ + NULL, \ + _NMLOG2_PREFIX_NAME "[" NM_HASH_OBFUSCATE_PTR_FMT \ + ",%d]: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \ + NM_HASH_OBFUSCATE_PTR(_info), \ + _info->pid _NM_UTILS_MACRO_REST(__VA_ARGS__)); \ + } \ + } \ G_STMT_END static void @@ -4919,7 +4919,13 @@ helper_info_free(gpointer data) if (info->pid != -1) { nm_assert(info->pid > 1); - nm_utils_kill_child_async(info->pid, SIGKILL, LOGD_CORE, _NMLOG_PREFIX_NAME, 0, NULL, NULL); + nm_utils_kill_child_async(info->pid, + SIGKILL, + LOGD_CORE, + _NMLOG2_PREFIX_NAME, + 0, + NULL, + NULL); } g_free(info); From f9c409d34cfd3a18b520fd5ddeb6693837fc7ffc Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 11:04:27 +0200 Subject: [PATCH 3/9] core: qualify logging lines related to helper with "nm-daemon-helper" Seems to be the better name, because that is also the name of the executable. --- src/core/nm-core-utils.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 19027ef96f..e3538a5079 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -4877,7 +4877,7 @@ typedef struct { gsize out_buffer_offset; } HelperInfo; -#define _NMLOG2_PREFIX_NAME "helper" +#define _NMLOG2_PREFIX_NAME "nm-daemon-helper" #define _NMLOG2_DOMAIN LOGD_CORE #define _NMLOG2(level, info, ...) \ G_STMT_START \ @@ -4919,13 +4919,7 @@ helper_info_free(gpointer data) if (info->pid != -1) { nm_assert(info->pid > 1); - nm_utils_kill_child_async(info->pid, - SIGKILL, - LOGD_CORE, - _NMLOG2_PREFIX_NAME, - 0, - NULL, - NULL); + nm_utils_kill_child_async(info->pid, SIGKILL, LOGD_CORE, "nm-daemon-helper", 0, NULL, NULL); } g_free(info); From 3411f4241892eb6953dd314e72a162fb38a03f11 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 13:29:43 +0200 Subject: [PATCH 4/9] core: store main context in variable in nm_utils_spawn_helper() There is no change in behavior, because the GTask's context is of course g_main_context_get_thread_default(). Still, not point in making that unclear. --- src/core/nm-core-utils.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index e3538a5079..7e428a7f4f 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -5098,6 +5098,7 @@ nm_utils_spawn_helper(const char *const *args, HelperInfo *info; int fd_flags; const char *const *arg; + GMainContext *context; nm_assert(args && args[0]); @@ -5135,16 +5136,18 @@ nm_utils_spawn_helper(const char *const *args, _LOG2D(info, "spawned process with args: %s", (commands = g_strjoinv(" ", (char **) args))); + context = g_task_get_context(info->task); + info->child_watch_source = g_child_watch_source_new(info->pid); g_source_set_callback(info->child_watch_source, G_SOURCE_FUNC(helper_child_terminated), info, NULL); - g_source_attach(info->child_watch_source, g_main_context_get_thread_default()); + g_source_attach(info->child_watch_source, context); info->timeout_source = nm_g_timeout_source_new_seconds(20, G_PRIORITY_DEFAULT, helper_timeout, info, NULL); - g_source_attach(info->timeout_source, g_main_context_get_thread_default()); + g_source_attach(info->timeout_source, context); /* Set file descriptors as non-blocking */ fd_flags = fcntl(info->child_stdin, F_GETFD, 0); @@ -5166,7 +5169,7 @@ nm_utils_spawn_helper(const char *const *args, helper_can_write, info, NULL); - g_source_attach(info->output_source, g_main_context_get_thread_default()); + g_source_attach(info->output_source, context); /* Watch process stdout */ info->in_buffer = NM_STR_BUF_INIT(0, FALSE); @@ -5176,7 +5179,7 @@ nm_utils_spawn_helper(const char *const *args, helper_have_data, info, NULL); - g_source_attach(info->input_source, g_main_context_get_thread_default()); + g_source_attach(info->input_source, context); /* Watch process stderr */ info->err_buffer = NM_STR_BUF_INIT(0, FALSE); @@ -5186,7 +5189,7 @@ nm_utils_spawn_helper(const char *const *args, helper_have_err_data, info, NULL); - g_source_attach(info->error_source, g_main_context_get_thread_default()); + g_source_attach(info->error_source, context); if (cancellable) { gulong signal_id; From a52d620549acb419056342fc978914cffaf8bedb Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 30 Mar 2023 08:16:25 +0200 Subject: [PATCH 5/9] core: assert that nm_utils_spawn_helper() is used with default context --- src/core/nm-core-utils.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 7e428a7f4f..1af875d796 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -5138,6 +5138,12 @@ nm_utils_spawn_helper(const char *const *args, context = g_task_get_context(info->task); + /* The async function makes a lukewarm attempt to honor the current thread default + * context. However, it later uses nm_utils_kill_child_async() which always uses + * g_main_context_default(). For now, the function really can only be used with the + * main context. */ + nm_assert(context == g_main_context_default()); + info->child_watch_source = g_child_watch_source_new(info->pid); g_source_set_callback(info->child_watch_source, G_SOURCE_FUNC(helper_child_terminated), From 597e127bcbf36d6eb59e8348b0e28aee1413e639 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 13:37:13 +0200 Subject: [PATCH 6/9] core: use nm_g_child_watch_source_new() in nm_utils_spawn_helper() --- src/core/nm-core-utils.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 1af875d796..8b66b0ecc8 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -5144,11 +5144,11 @@ nm_utils_spawn_helper(const char *const *args, * main context. */ nm_assert(context == g_main_context_default()); - info->child_watch_source = g_child_watch_source_new(info->pid); - g_source_set_callback(info->child_watch_source, - G_SOURCE_FUNC(helper_child_terminated), - info, - NULL); + info->child_watch_source = nm_g_child_watch_source_new(info->pid, + G_PRIORITY_DEFAULT, + helper_child_terminated, + info, + NULL); g_source_attach(info->child_watch_source, context); info->timeout_source = From 346196792c36a87e6c2c521c16611467207d88b9 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 18:09:35 +0200 Subject: [PATCH 7/9] core/trivial: add code comment about using GChildWatchSource --- src/core/nm-core-utils.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 8b66b0ecc8..3dfdffbfba 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -5144,6 +5144,18 @@ nm_utils_spawn_helper(const char *const *args, * main context. */ nm_assert(context == g_main_context_default()); + /* We are using a GChildWatchSource in combination with kill()/waitpid() + * (where helper_info_free() clears the source and calls + * nm_utils_kill_child_async()). That leads to races where glib might have + * already reaped the process and our waitpid() call fails with: + * + * [TIMESTAMP] kill child process 'nm-daemon-helper' (PID): failed due to unexpected return value -1 by waitpid (No child processes, 10) after sending SIGKILL (9) + * + * This is a bug in glib, addressed by [1]. Maybe there should be a + * workaround here, and not using the child watcher? + * + * [1] https://gitlab.gnome.org/GNOME/glib/-/merge_requests/3353 + */ info->child_watch_source = nm_g_child_watch_source_new(info->pid, G_PRIORITY_DEFAULT, helper_child_terminated, From a65e80e8b6907c5666ff7f41316b0b00b78760f0 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Mar 2023 13:34:40 +0200 Subject: [PATCH 8/9] core: pre-allocate exact buffer size for output in nm_utils_spawn_helper() It's easy enough to know how many bytes are needed. Just allocate the right size (+1, because NMStrBuf really likes to reserve that extra byte for the trailing NUL, even if it's not needed in this case). --- src/core/nm-core-utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 3dfdffbfba..880483b817 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -5099,6 +5099,7 @@ nm_utils_spawn_helper(const char *const *args, int fd_flags; const char *const *arg; GMainContext *context; + gsize n; nm_assert(args && args[0]); @@ -5176,7 +5177,9 @@ nm_utils_spawn_helper(const char *const *args, fcntl(info->child_stderr, F_SETFL, fd_flags | O_NONBLOCK); /* Watch process stdin */ - info->out_buffer = NM_STR_BUF_INIT(NM_UTILS_GET_NEXT_REALLOC_SIZE_40, TRUE); + for (n = 1, arg = args; *arg; arg++) + n += strlen(*arg) + 1u; + info->out_buffer = NM_STR_BUF_INIT(n, TRUE); for (arg = args; *arg; arg++) { nm_str_buf_append(&info->out_buffer, *arg); nm_str_buf_append_c(&info->out_buffer, '\0'); From f58a69656a362a39e8826dd1027b3b5874f8f30a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 29 Mar 2023 13:58:17 +0200 Subject: [PATCH 9/9] core: error out in nm_utils_kill_child_{sync,async}() if first waitpid() gives ECHILD It is wrong trying to send the signal still. Just error out. Note that ECHILD indicates that the process is either not a child or was already reaped. In both cases, that is a bug of the caller who must keep accurate track of the child's process ID. --- src/core/nm-core-utils.c | 32 +++++++++++--------------- src/core/tests/test-core-with-expect.c | 10 ++++---- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/core/nm-core-utils.c b/src/core/nm-core-utils.c index 880483b817..7482292034 100644 --- a/src/core/nm-core-utils.c +++ b/src/core/nm-core-utils.c @@ -507,16 +507,13 @@ nm_utils_kill_child_async(pid_t pid, return; } else if (ret != 0) { errsv = errno; - /* ECHILD means, the process is not a child/does not exist or it has SIGCHILD blocked. */ - if (errsv != ECHILD) { - nm_log_err(LOGD_CORE | log_domain, - LOG_NAME_FMT ": unexpected error while waitpid: %s (%d)", - LOG_NAME_ARGS, - nm_strerror_native(errsv), - errsv); - _kc_invoke_callback(pid, log_domain, log_name, callback, user_data, FALSE, -1); - return; - } + nm_log_err(LOGD_CORE | log_domain, + LOG_NAME_FMT ": unexpected error while waitpid: %s (%d)", + LOG_NAME_ARGS, + nm_strerror_native(errsv), + errsv); + _kc_invoke_callback(pid, log_domain, log_name, callback, user_data, FALSE, -1); + return; } /* send the first signal. */ @@ -647,15 +644,12 @@ nm_utils_kill_child_sync(pid_t pid, goto out; } else if (ret != 0) { errsv = errno; - /* ECHILD means, the process is not a child/does not exist or it has SIGCHILD blocked. */ - if (errsv != ECHILD) { - nm_log_err(LOGD_CORE | log_domain, - LOG_NAME_FMT ": unexpected error while waitpid: %s (%d)", - LOG_NAME_ARGS, - nm_strerror_native(errsv), - errsv); - goto out; - } + nm_log_err(LOGD_CORE | log_domain, + LOG_NAME_FMT ": unexpected error while waitpid: %s (%d)", + LOG_NAME_ARGS, + nm_strerror_native(errsv), + errsv); + goto out; } /* send first signal @sig */ diff --git a/src/core/tests/test-core-with-expect.c b/src/core/tests/test-core-with-expect.c index b05144ac21..015101262c 100644 --- a/src/core/tests/test-core-with-expect.c +++ b/src/core/tests/test-core-with-expect.c @@ -333,9 +333,8 @@ do_test_nm_utils_kill_child(void) /* pid3s should not be a valid process, hence the call should fail. Note, that there * is a race here. */ - NMTST_EXPECT_NM_ERROR( - "kill child process 'test-s-3-2' (*): failed due to unexpected return value -1 by waitpid " - "(No child process*, 10) after sending no signal (0)"); + NMTST_EXPECT_NM_ERROR("kill child process 'test-s-3-2' (*): unexpected error while waitpid: No " + "child process* (10)"); test_nm_utils_kill_child_sync_do("test-s-3-2", pid3s, 0, 0, FALSE, NULL); NMTST_EXPECT_NM_DEBUG("kill child process 'test-s-4' (*): waiting up to 50 milliseconds for " @@ -396,9 +395,8 @@ do_test_nm_utils_kill_child(void) /* pid3a should not be a valid process, hence the call should fail. Note, that there * is a race here. */ - NMTST_EXPECT_NM_ERROR( - "kill child process 'test-a-3-2' (*): failed due to unexpected return value -1 by waitpid " - "(No child process*, 10) after sending no signal (0)"); + NMTST_EXPECT_NM_ERROR("kill child process 'test-a-3-2' (*): unexpected error while " + "waitpid: No child process* (10)"); NMTST_EXPECT_NM_DEBUG( "kill child process 'test-a-3-2' (*): invoke callback: killing child failed"); test_nm_utils_kill_child_async_do("test-a-3-2", pid3a, 0, 0, FALSE, NULL);