nm-dispatcher: split pre-up/pre-down requests into a separate queue

The dispatcher previously serialized all "wait" scripts globally through a single current_request slot and FIFO requests_waiting queue. Pre-up and pre-down actions look only at pre-up.d/ resp. pre-down.d/, which never share script files with the regular dispatcher.d/* lookups, so they cannot truly conflict with non-pre-* requests. Yet the global FIFO meant that a slow or hung script for any unrelated action (for example a misbehaving down or dhcp4-change handler) could queue up in front of a pre-up dispatcher call. Because pre-up synchronously gates the device state machine in IP_CHECK (ip_check_pre_up() in src/core/devices/nm-device.c waits for _dispatcher_complete_proceed_state), the activation of an unrelated connection would stall by the full duration of the in-flight regular script, up to SCRIPT_TIMEOUT (600s). Even an async event like dhcp4-change inherits gating semantics this way if it is queued ahead of a pre-up. Split the queue: REQ_QUEUE_NORMAL for everything routed through dispatcher.d/, REQ_QUEUE_PRE for pre-up/pre-down/vpn-pre-up/vpn-pre-down. The two queues advance independently, so a hung regular script no longer delays connection activation. Signed-off-by: Stefan Agner <stefan@agner.ch>
2026-05-11 09:18:38 +02:00 · 2026-05-01 00:25:44 +02:00 · 2026-05-01 00:25:44 +02:00 · 325b74fbe9
commit 325b74fbe9
parent 19b065bc4a
1 changed files with 103 additions and 56 deletions
--- a/src/nm-dispatcher/nm-dispatcher.c
+++ b/src/nm-dispatcher/nm-dispatcher.c
@ -34,6 +34,15 @@

 typedef struct Request Request;

+/* pre-up/pre-down requests scan only pre-up.d/ resp. pre-down.d/, so they
+ * never share script files with regular dispatcher.d/ requests and must not
+ * serialize against them. We keep two independent queues. */
+typedef enum {
+    REQ_QUEUE_NORMAL,
+    REQ_QUEUE_PRE,
+    _REQ_QUEUE_NUM,
+} RequestQueueIdx;
+
 typedef struct {
    GDBusConnection *dbus_connection;
    GCancellable    *quit_cancellable;
@ -50,8 +59,8 @@ typedef struct {

    gboolean persist;

-    Request *current_request;
-    GQueue  *requests_waiting;
+    Request *current_request[_REQ_QUEUE_NUM];
+    GQueue  *requests_waiting[_REQ_QUEUE_NUM];
    int      num_requests_pending;

    bool exit_with_failure;
@ -253,43 +262,67 @@ _idle_timeout_restart(void)

 /*****************************************************************************/

+static RequestQueueIdx
+request_queue_idx(const Request *request)
+{
+    if (NM_IN_STRSET(request->action,
+                     NMD_ACTION_PRE_UP,
+                     NMD_ACTION_VPN_PRE_UP,
+                     NMD_ACTION_PRE_DOWN,
+                     NMD_ACTION_VPN_PRE_DOWN))
+        return REQ_QUEUE_PRE;
+    return REQ_QUEUE_NORMAL;
+}
+
 /**
- * next_request:
+ * next_request_enqueue:
 *
- * @request: (nullable): the request to set as next. If %NULL, dequeue the next
- * waiting request. Otherwise, try to set the given request.
+ * Try to set @request as the currently active request in its queue. The
+ * current request is a request that has at least one "wait" script;
+ * requests with only "no-wait" scripts are handled right away and not
+ * enqueued. Pre-up/pre-down requests use a separate queue from regular
+ * requests so a slow regular script cannot stall connection activation.
 *
- * Sets the currently active request (@current_request). The current request
- * is a request that has at least on "wait" script, because requests that only
- * consist of "no-wait" scripts are handled right away and not enqueued to
- * @requests_waiting nor set as @current_request.
- *
- * Returns: %TRUE, if there was currently not request in process and it set
- * a new request as current.
+ * Returns: %TRUE if the matching queue was idle and @request was set as
+ * current; %FALSE if it was appended to @requests_waiting instead.
 */
 static gboolean
-next_request(Request *request)
+next_request_enqueue(Request *request)
 {
-    if (request) {
-        if (gl.current_request) {
-            g_queue_push_tail(gl.requests_waiting, request);
-            return FALSE;
-        }
-    } else {
-        /* when calling next_request() without explicit @request, we always
-         * forcefully clear @current_request. That one is certainly
-         * handled already. */
-        gl.current_request = NULL;
+    RequestQueueIdx qidx = request_queue_idx(request);

-        request = g_queue_pop_head(gl.requests_waiting);
-        if (!request)
-            return FALSE;
+    if (gl.current_request[qidx]) {
+        g_queue_push_tail(gl.requests_waiting[qidx], request);
+        return FALSE;
    }

    _LOG_R_D(request, "start running ordered scripts...");
+    gl.current_request[qidx] = request;
+    return TRUE;
+}

-    gl.current_request = request;
+/**
+ * next_request_advance:
+ *
+ * Forcefully clears the current request of the given queue (it must be
+ * fully handled at this point) and pops the next waiting request as
+ * current.
+ *
+ * Returns: %TRUE if there was a waiting request that became current.
+ */
+static gboolean
+next_request_advance(RequestQueueIdx qidx)
+{
+    Request *request;

+    gl.current_request[qidx] = NULL;
+
+    request = g_queue_pop_head(gl.requests_waiting[qidx]);
+    if (!request)
+        return FALSE;
+
+    _LOG_R_D(request, "start running ordered scripts...");
+    gl.current_request[qidx] = request;
    return TRUE;
 }

@ -420,14 +453,21 @@ complete_request(Request *request)

    _LOG_R_T(request, "completed (%u scripts)", request->scripts->len);

-    if (gl.current_request == request)
-        gl.current_request = NULL;
+    {
+        RequestQueueIdx qidx = request_queue_idx(request);
+
+        if (gl.current_request[qidx] == request)
+            gl.current_request[qidx] = NULL;
+    }

    request_free(request);

    nm_assert(gl.num_requests_pending > 0);
    if (--gl.num_requests_pending <= 0) {
-        nm_assert(!gl.current_request && !g_queue_peek_head(gl.requests_waiting));
+        nm_assert(!gl.current_request[REQ_QUEUE_NORMAL]);
+        nm_assert(!gl.current_request[REQ_QUEUE_PRE]);
+        nm_assert(!g_queue_peek_head(gl.requests_waiting[REQ_QUEUE_NORMAL]));
+        nm_assert(!g_queue_peek_head(gl.requests_waiting[REQ_QUEUE_PRE]));
        _idle_timeout_restart();
    }
 }
@ -435,8 +475,9 @@ complete_request(Request *request)
 static void
 complete_script(ScriptInfo *script)
 {
-    Request *request = script->request;
-    gboolean wait    = script->wait;
+    Request        *request = script->request;
+    gboolean        wait    = script->wait;
+    RequestQueueIdx qidx    = request_queue_idx(request);

    if (script->pid != -1 || script->stdout_fd != -1) {
        /* Wait that process has terminated and stdout is closed */
@ -454,7 +495,7 @@ complete_script(ScriptInfo *script)
            return;
    }

-    nm_assert(!wait || gl.current_request == request);
+    nm_assert(!wait || gl.current_request[qidx] == request);

    /* Try to complete the request. @request will be possibly free'd,
     * making @script and @request a dangling pointer. */
@ -463,32 +504,34 @@ complete_script(ScriptInfo *script)
    if (!wait) {
        /* this was a "no-wait" script. We either completed the request,
         * or there is nothing to do. Especially, there is no need to
-         * queue the next_request() -- because no-wait scripts don't block
+         * queue next_request_advance() -- because no-wait scripts don't block
         * requests. However, if this was the last "no-wait" script and
         * there are "wait" scripts ready to run, launch them.
         */
-        if (gl.current_request == request && gl.current_request->num_scripts_nowait == 0) {
-            if (dispatch_one_script(gl.current_request))
+        if (gl.current_request[qidx] == request
+            && gl.current_request[qidx]->num_scripts_nowait == 0) {
+            if (dispatch_one_script(gl.current_request[qidx]))
                return;

-            complete_request(gl.current_request);
+            complete_request(gl.current_request[qidx]);
        } else
            return;
    } else {
        /* if the script is a "wait" script, we already tried above to
         * dispatch the next script. As we didn't do that, it means we
         * just completed the last script of @request and we can continue
-         * with the next request...
+         * with the next request in the same queue...
         *
         * Also, it cannot be that there is another request currently being
-         * processed because only requests with "wait" scripts can become
-         * @current_request. As there can only be one "wait" script running
-         * at any time, it means complete_request() above completed @request. */
-        nm_assert(!gl.current_request);
+         * processed in this queue because only requests with "wait" scripts
+         * can become @current_request. As there can only be one "wait" script
+         * running at any time per queue, it means complete_request() above
+         * completed @request. */
+        nm_assert(!gl.current_request[qidx]);
    }

-    while (next_request(NULL)) {
-        request = gl.current_request;
+    while (next_request_advance(qidx)) {
+        request = gl.current_request[qidx];

        if (dispatch_one_script(request))
            return;
@ -497,7 +540,7 @@ complete_script(ScriptInfo *script)
         * now, or when all pending "no-wait" scripts return. */
        complete_request(request);

-        /* We can immediately start next_request(), because our current
+        /* We can immediately start next_request_advance(), because our current
         * @request has obviously no more "wait" scripts either.
         * Repeat... */
    }
@ -1090,21 +1133,23 @@ _handle_action(GDBusMethodInvocation *invocation, GVariant *parameters, gboolean

    if (num_nowait < request->scripts->len) {
        /* The request has at least one wait script.
-         * Try next_request() to schedule the request for
+         * Try next_request_enqueue() to schedule the request for
         * execution. This either enqueues the request or
-         * sets it as gl.current_request. */
-        if (next_request(request)) {
-            /* @request is now @current_request. Go ahead and
+         * sets it as gl.current_request[qidx]. */
+        if (next_request_enqueue(request)) {
+            /* @request is now @current_request[qidx]. Go ahead and
             * schedule the first wait script. */
            if (!dispatch_one_script(request)) {
+                RequestQueueIdx qidx = request_queue_idx(request);
+
                /* If that fails, we might be already finished with the
                 * request. Try complete_request(). */
                complete_request(request);

-                if (next_request(NULL)) {
-                    /* As @request was successfully scheduled as next_request(), there is no
+                if (next_request_advance(qidx)) {
+                    /* As @request was successfully scheduled, there is no
                     * other request in queue that can be scheduled afterwards. Assert against
-                     * that, but call next_request() to clear current_request. */
+                     * that, but call next_request_advance() to clear current_request[qidx]. */
                    g_assert_not_reached();
                }
            }
@ -1114,8 +1159,8 @@ _handle_action(GDBusMethodInvocation *invocation, GVariant *parameters, gboolean
         * the request right away (we might have failed to schedule any
         * of the scripts). It will be either completed now, or later
         * when the pending scripts return.
-         * We don't enqueue it to gl.requests_waiting.
-         * There is no need to handle next_request(), because @request is
+         * We don't enqueue it to gl.requests_waiting[].
+         * There is no need to handle next_request_advance(), because @request is
         * not the current request anyway and does not interfere with requests
         * that have any "wait" scripts. */
        complete_request(request);
@ -1510,7 +1555,8 @@ main(int argc, char **argv)

    _LOG_X_D("dbus: unique name: %s", g_dbus_connection_get_unique_name(gl.dbus_connection));

-    gl.requests_waiting = g_queue_new();
+    gl.requests_waiting[REQ_QUEUE_NORMAL] = g_queue_new();
+    gl.requests_waiting[REQ_QUEUE_PRE]    = g_queue_new();

    _idle_timeout_restart();

@ -1551,7 +1597,8 @@ done:
                                            nm_steal_int(&gl.service_regist_id));
    }

-    nm_clear_pointer(&gl.requests_waiting, g_queue_free);
+    nm_clear_pointer(&gl.requests_waiting[REQ_QUEUE_NORMAL], g_queue_free);
+    nm_clear_pointer(&gl.requests_waiting[REQ_QUEUE_PRE], g_queue_free);

    nm_clear_g_source_inst(&gl.source_idle_timeout);