mirror of
https://gitlab.freedesktop.org/NetworkManager/NetworkManager.git
synced 2026-05-11 09:18:38 +02:00
nm-dispatcher: split pre-up/pre-down requests into a separate queue
The dispatcher previously serialized all "wait" scripts globally through a single current_request slot and FIFO requests_waiting queue. Pre-up and pre-down actions look only at pre-up.d/ resp. pre-down.d/, which never share script files with the regular dispatcher.d/* lookups, so they cannot truly conflict with non-pre-* requests. Yet the global FIFO meant that a slow or hung script for any unrelated action (for example a misbehaving down or dhcp4-change handler) could queue up in front of a pre-up dispatcher call. Because pre-up synchronously gates the device state machine in IP_CHECK (ip_check_pre_up() in src/core/devices/nm-device.c waits for _dispatcher_complete_proceed_state), the activation of an unrelated connection would stall by the full duration of the in-flight regular script, up to SCRIPT_TIMEOUT (600s). Even an async event like dhcp4-change inherits gating semantics this way if it is queued ahead of a pre-up. Split the queue: REQ_QUEUE_NORMAL for everything routed through dispatcher.d/, REQ_QUEUE_PRE for pre-up/pre-down/vpn-pre-up/vpn-pre-down. The two queues advance independently, so a hung regular script no longer delays connection activation. Signed-off-by: Stefan Agner <stefan@agner.ch>
This commit is contained in:
parent
19b065bc4a
commit
325b74fbe9
1 changed files with 103 additions and 56 deletions
|
|
@ -34,6 +34,15 @@
|
|||
|
||||
typedef struct Request Request;
|
||||
|
||||
/* pre-up/pre-down requests scan only pre-up.d/ resp. pre-down.d/, so they
|
||||
* never share script files with regular dispatcher.d/ requests and must not
|
||||
* serialize against them. We keep two independent queues. */
|
||||
typedef enum {
|
||||
REQ_QUEUE_NORMAL,
|
||||
REQ_QUEUE_PRE,
|
||||
_REQ_QUEUE_NUM,
|
||||
} RequestQueueIdx;
|
||||
|
||||
typedef struct {
|
||||
GDBusConnection *dbus_connection;
|
||||
GCancellable *quit_cancellable;
|
||||
|
|
@ -50,8 +59,8 @@ typedef struct {
|
|||
|
||||
gboolean persist;
|
||||
|
||||
Request *current_request;
|
||||
GQueue *requests_waiting;
|
||||
Request *current_request[_REQ_QUEUE_NUM];
|
||||
GQueue *requests_waiting[_REQ_QUEUE_NUM];
|
||||
int num_requests_pending;
|
||||
|
||||
bool exit_with_failure;
|
||||
|
|
@ -253,43 +262,67 @@ _idle_timeout_restart(void)
|
|||
|
||||
/*****************************************************************************/
|
||||
|
||||
static RequestQueueIdx
|
||||
request_queue_idx(const Request *request)
|
||||
{
|
||||
if (NM_IN_STRSET(request->action,
|
||||
NMD_ACTION_PRE_UP,
|
||||
NMD_ACTION_VPN_PRE_UP,
|
||||
NMD_ACTION_PRE_DOWN,
|
||||
NMD_ACTION_VPN_PRE_DOWN))
|
||||
return REQ_QUEUE_PRE;
|
||||
return REQ_QUEUE_NORMAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* next_request:
|
||||
* next_request_enqueue:
|
||||
*
|
||||
* @request: (nullable): the request to set as next. If %NULL, dequeue the next
|
||||
* waiting request. Otherwise, try to set the given request.
|
||||
* Try to set @request as the currently active request in its queue. The
|
||||
* current request is a request that has at least one "wait" script;
|
||||
* requests with only "no-wait" scripts are handled right away and not
|
||||
* enqueued. Pre-up/pre-down requests use a separate queue from regular
|
||||
* requests so a slow regular script cannot stall connection activation.
|
||||
*
|
||||
* Sets the currently active request (@current_request). The current request
|
||||
* is a request that has at least on "wait" script, because requests that only
|
||||
* consist of "no-wait" scripts are handled right away and not enqueued to
|
||||
* @requests_waiting nor set as @current_request.
|
||||
*
|
||||
* Returns: %TRUE, if there was currently not request in process and it set
|
||||
* a new request as current.
|
||||
* Returns: %TRUE if the matching queue was idle and @request was set as
|
||||
* current; %FALSE if it was appended to @requests_waiting instead.
|
||||
*/
|
||||
static gboolean
|
||||
next_request(Request *request)
|
||||
next_request_enqueue(Request *request)
|
||||
{
|
||||
if (request) {
|
||||
if (gl.current_request) {
|
||||
g_queue_push_tail(gl.requests_waiting, request);
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
/* when calling next_request() without explicit @request, we always
|
||||
* forcefully clear @current_request. That one is certainly
|
||||
* handled already. */
|
||||
gl.current_request = NULL;
|
||||
RequestQueueIdx qidx = request_queue_idx(request);
|
||||
|
||||
request = g_queue_pop_head(gl.requests_waiting);
|
||||
if (!request)
|
||||
return FALSE;
|
||||
if (gl.current_request[qidx]) {
|
||||
g_queue_push_tail(gl.requests_waiting[qidx], request);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
_LOG_R_D(request, "start running ordered scripts...");
|
||||
gl.current_request[qidx] = request;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gl.current_request = request;
|
||||
/**
|
||||
* next_request_advance:
|
||||
*
|
||||
* Forcefully clears the current request of the given queue (it must be
|
||||
* fully handled at this point) and pops the next waiting request as
|
||||
* current.
|
||||
*
|
||||
* Returns: %TRUE if there was a waiting request that became current.
|
||||
*/
|
||||
static gboolean
|
||||
next_request_advance(RequestQueueIdx qidx)
|
||||
{
|
||||
Request *request;
|
||||
|
||||
gl.current_request[qidx] = NULL;
|
||||
|
||||
request = g_queue_pop_head(gl.requests_waiting[qidx]);
|
||||
if (!request)
|
||||
return FALSE;
|
||||
|
||||
_LOG_R_D(request, "start running ordered scripts...");
|
||||
gl.current_request[qidx] = request;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
|
@ -420,14 +453,21 @@ complete_request(Request *request)
|
|||
|
||||
_LOG_R_T(request, "completed (%u scripts)", request->scripts->len);
|
||||
|
||||
if (gl.current_request == request)
|
||||
gl.current_request = NULL;
|
||||
{
|
||||
RequestQueueIdx qidx = request_queue_idx(request);
|
||||
|
||||
if (gl.current_request[qidx] == request)
|
||||
gl.current_request[qidx] = NULL;
|
||||
}
|
||||
|
||||
request_free(request);
|
||||
|
||||
nm_assert(gl.num_requests_pending > 0);
|
||||
if (--gl.num_requests_pending <= 0) {
|
||||
nm_assert(!gl.current_request && !g_queue_peek_head(gl.requests_waiting));
|
||||
nm_assert(!gl.current_request[REQ_QUEUE_NORMAL]);
|
||||
nm_assert(!gl.current_request[REQ_QUEUE_PRE]);
|
||||
nm_assert(!g_queue_peek_head(gl.requests_waiting[REQ_QUEUE_NORMAL]));
|
||||
nm_assert(!g_queue_peek_head(gl.requests_waiting[REQ_QUEUE_PRE]));
|
||||
_idle_timeout_restart();
|
||||
}
|
||||
}
|
||||
|
|
@ -435,8 +475,9 @@ complete_request(Request *request)
|
|||
static void
|
||||
complete_script(ScriptInfo *script)
|
||||
{
|
||||
Request *request = script->request;
|
||||
gboolean wait = script->wait;
|
||||
Request *request = script->request;
|
||||
gboolean wait = script->wait;
|
||||
RequestQueueIdx qidx = request_queue_idx(request);
|
||||
|
||||
if (script->pid != -1 || script->stdout_fd != -1) {
|
||||
/* Wait that process has terminated and stdout is closed */
|
||||
|
|
@ -454,7 +495,7 @@ complete_script(ScriptInfo *script)
|
|||
return;
|
||||
}
|
||||
|
||||
nm_assert(!wait || gl.current_request == request);
|
||||
nm_assert(!wait || gl.current_request[qidx] == request);
|
||||
|
||||
/* Try to complete the request. @request will be possibly free'd,
|
||||
* making @script and @request a dangling pointer. */
|
||||
|
|
@ -463,32 +504,34 @@ complete_script(ScriptInfo *script)
|
|||
if (!wait) {
|
||||
/* this was a "no-wait" script. We either completed the request,
|
||||
* or there is nothing to do. Especially, there is no need to
|
||||
* queue the next_request() -- because no-wait scripts don't block
|
||||
* queue next_request_advance() -- because no-wait scripts don't block
|
||||
* requests. However, if this was the last "no-wait" script and
|
||||
* there are "wait" scripts ready to run, launch them.
|
||||
*/
|
||||
if (gl.current_request == request && gl.current_request->num_scripts_nowait == 0) {
|
||||
if (dispatch_one_script(gl.current_request))
|
||||
if (gl.current_request[qidx] == request
|
||||
&& gl.current_request[qidx]->num_scripts_nowait == 0) {
|
||||
if (dispatch_one_script(gl.current_request[qidx]))
|
||||
return;
|
||||
|
||||
complete_request(gl.current_request);
|
||||
complete_request(gl.current_request[qidx]);
|
||||
} else
|
||||
return;
|
||||
} else {
|
||||
/* if the script is a "wait" script, we already tried above to
|
||||
* dispatch the next script. As we didn't do that, it means we
|
||||
* just completed the last script of @request and we can continue
|
||||
* with the next request...
|
||||
* with the next request in the same queue...
|
||||
*
|
||||
* Also, it cannot be that there is another request currently being
|
||||
* processed because only requests with "wait" scripts can become
|
||||
* @current_request. As there can only be one "wait" script running
|
||||
* at any time, it means complete_request() above completed @request. */
|
||||
nm_assert(!gl.current_request);
|
||||
* processed in this queue because only requests with "wait" scripts
|
||||
* can become @current_request. As there can only be one "wait" script
|
||||
* running at any time per queue, it means complete_request() above
|
||||
* completed @request. */
|
||||
nm_assert(!gl.current_request[qidx]);
|
||||
}
|
||||
|
||||
while (next_request(NULL)) {
|
||||
request = gl.current_request;
|
||||
while (next_request_advance(qidx)) {
|
||||
request = gl.current_request[qidx];
|
||||
|
||||
if (dispatch_one_script(request))
|
||||
return;
|
||||
|
|
@ -497,7 +540,7 @@ complete_script(ScriptInfo *script)
|
|||
* now, or when all pending "no-wait" scripts return. */
|
||||
complete_request(request);
|
||||
|
||||
/* We can immediately start next_request(), because our current
|
||||
/* We can immediately start next_request_advance(), because our current
|
||||
* @request has obviously no more "wait" scripts either.
|
||||
* Repeat... */
|
||||
}
|
||||
|
|
@ -1090,21 +1133,23 @@ _handle_action(GDBusMethodInvocation *invocation, GVariant *parameters, gboolean
|
|||
|
||||
if (num_nowait < request->scripts->len) {
|
||||
/* The request has at least one wait script.
|
||||
* Try next_request() to schedule the request for
|
||||
* Try next_request_enqueue() to schedule the request for
|
||||
* execution. This either enqueues the request or
|
||||
* sets it as gl.current_request. */
|
||||
if (next_request(request)) {
|
||||
/* @request is now @current_request. Go ahead and
|
||||
* sets it as gl.current_request[qidx]. */
|
||||
if (next_request_enqueue(request)) {
|
||||
/* @request is now @current_request[qidx]. Go ahead and
|
||||
* schedule the first wait script. */
|
||||
if (!dispatch_one_script(request)) {
|
||||
RequestQueueIdx qidx = request_queue_idx(request);
|
||||
|
||||
/* If that fails, we might be already finished with the
|
||||
* request. Try complete_request(). */
|
||||
complete_request(request);
|
||||
|
||||
if (next_request(NULL)) {
|
||||
/* As @request was successfully scheduled as next_request(), there is no
|
||||
if (next_request_advance(qidx)) {
|
||||
/* As @request was successfully scheduled, there is no
|
||||
* other request in queue that can be scheduled afterwards. Assert against
|
||||
* that, but call next_request() to clear current_request. */
|
||||
* that, but call next_request_advance() to clear current_request[qidx]. */
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
|
@ -1114,8 +1159,8 @@ _handle_action(GDBusMethodInvocation *invocation, GVariant *parameters, gboolean
|
|||
* the request right away (we might have failed to schedule any
|
||||
* of the scripts). It will be either completed now, or later
|
||||
* when the pending scripts return.
|
||||
* We don't enqueue it to gl.requests_waiting.
|
||||
* There is no need to handle next_request(), because @request is
|
||||
* We don't enqueue it to gl.requests_waiting[].
|
||||
* There is no need to handle next_request_advance(), because @request is
|
||||
* not the current request anyway and does not interfere with requests
|
||||
* that have any "wait" scripts. */
|
||||
complete_request(request);
|
||||
|
|
@ -1510,7 +1555,8 @@ main(int argc, char **argv)
|
|||
|
||||
_LOG_X_D("dbus: unique name: %s", g_dbus_connection_get_unique_name(gl.dbus_connection));
|
||||
|
||||
gl.requests_waiting = g_queue_new();
|
||||
gl.requests_waiting[REQ_QUEUE_NORMAL] = g_queue_new();
|
||||
gl.requests_waiting[REQ_QUEUE_PRE] = g_queue_new();
|
||||
|
||||
_idle_timeout_restart();
|
||||
|
||||
|
|
@ -1551,7 +1597,8 @@ done:
|
|||
nm_steal_int(&gl.service_regist_id));
|
||||
}
|
||||
|
||||
nm_clear_pointer(&gl.requests_waiting, g_queue_free);
|
||||
nm_clear_pointer(&gl.requests_waiting[REQ_QUEUE_NORMAL], g_queue_free);
|
||||
nm_clear_pointer(&gl.requests_waiting[REQ_QUEUE_PRE], g_queue_free);
|
||||
|
||||
nm_clear_g_source_inst(&gl.source_idle_timeout);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue