From d9334503d77167463edef8cf98445c3678271c5a Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 08:38:06 -0400 Subject: [PATCH 1/6] dispatcher: bump script timeout up to 20 seconds (rh #1048345) The dispatcher would kill scripts after 3 seconds, but on heavily-loaded machines, that was sometimes too short even for simple scripts. Bump the timeout up to 20 seconds instead (and change the 10-second quit-on-idle timer to not run when a script is running). Also change the D-Bus call timeout in the daemon to 30 seconds, so that it only triggers if something goes really wrong and the action timeout fails. --- callouts/nm-dispatcher-action.c | 24 ++++++++++++++++-------- src/nm-dispatcher.c | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/callouts/nm-dispatcher-action.c b/callouts/nm-dispatcher-action.c index f48ff0a23a..11aa5dc4f1 100644 --- a/callouts/nm-dispatcher-action.c +++ b/callouts/nm-dispatcher-action.c @@ -148,11 +148,19 @@ quit_timeout_cb (gpointer user_data) return FALSE; } +static void +quit_timeout_cancel (Handler *h) +{ + if (h->quit_id) { + g_source_remove (h->quit_id); + h->quit_id = 0; + } +} + static void quit_timeout_reschedule (Handler *h) { - if (h->quit_id) - g_source_remove (h->quit_id); + quit_timeout_cancel (h); if (!h->persist) h->quit_id = g_timeout_add_seconds (10, quit_timeout_cb, NULL); } @@ -165,8 +173,6 @@ next_script (gpointer user_data) GValueArray *item; guint i; - quit_timeout_reschedule (request->handler); - request->idx++; if (request->idx < request->scripts->len) { dispatch_one_script (request); @@ -203,9 +209,11 @@ next_script (gpointer user_data) } dbus_g_method_return (request->context, results); - - request_free (request); g_ptr_array_unref (results); + + quit_timeout_reschedule (request->handler); + request_free (request); + return FALSE; } @@ -353,7 +361,7 @@ dispatch_one_script (Request *request) if (g_spawn_async ("/", argv, request->envp, G_SPAWN_DO_NOT_REAP_CHILD, child_setup, request, &script->pid, &error)) { request->script_watch_id = g_child_watch_add (script->pid, (GChildWatchFunc) script_watch_cb, script); - request->script_timeout_id = g_timeout_add_seconds (3, script_timeout_cb, script); + request->script_timeout_id = g_timeout_add_seconds (20, script_timeout_cb, script); } else { g_warning ("Failed to execute script '%s': (%d) %s", script->script, error->code, error->message); @@ -435,7 +443,7 @@ impl_dispatch (Handler *h, return; } - quit_timeout_reschedule (h); + quit_timeout_cancel (h); request = g_malloc0 (sizeof (*request)); request->handler = h; diff --git a/src/nm-dispatcher.c b/src/nm-dispatcher.c index 7cf93f8411..a8fee1f3f3 100644 --- a/src/nm-dispatcher.c +++ b/src/nm-dispatcher.c @@ -342,7 +342,7 @@ _dispatcher_call (DispatcherAction action, dispatcher_done_cb, info, (GDestroyNotify) dispatcher_info_free, - 15000, + 30000, G_TYPE_STRING, action_to_string (action), DBUS_TYPE_G_MAP_OF_MAP_OF_VARIANT, connection_hash, DBUS_TYPE_G_MAP_OF_VARIANT, connection_props, From 731ca771f840d34dae1bbef7289d9a9eb51563bc Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 09:04:46 -0400 Subject: [PATCH 2/6] dispatcher: only dispatch one request at a time (rh #1061212) The dispatcher only runs one script at a time for any given request, but would dispatch multiple requests in parallel. So if a device was brought up and then back down quickly, it could end up dispatching the "down" scripts while the "up" scripts were still running. Or if two devices came up at the same time, two instances of the same "up" script might run at the same time, which could cause problems if they both tried to modify the same file. Fix this by only dispatching the scripts for a single request at a time. --- callouts/nm-dispatcher-action.c | 40 ++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/callouts/nm-dispatcher-action.c b/callouts/nm-dispatcher-action.c index 11aa5dc4f1..1a8c99fc00 100644 --- a/callouts/nm-dispatcher-action.c +++ b/callouts/nm-dispatcher-action.c @@ -46,10 +46,14 @@ static GMainLoop *loop = NULL; static gboolean debug = FALSE; +typedef struct Request Request; + typedef struct { GObject parent; /* Private data */ + Request *current_request; + GQueue *pending_requests; guint quit_id; gboolean persist; } Handler; @@ -94,8 +98,6 @@ handler_class_init (HandlerClass *h_class) { } -typedef struct Request Request; - static void dispatch_one_script (Request *request); typedef struct { @@ -165,10 +167,32 @@ quit_timeout_reschedule (Handler *h) h->quit_id = g_timeout_add_seconds (10, quit_timeout_cb, NULL); } +static void +start_request (Request *request) +{ + request->handler->current_request = request; + dispatch_one_script (request); +} + +static void +next_request (Handler *h) +{ + Request *request = g_queue_pop_head (h->pending_requests); + + if (request) { + start_request (request); + return; + } + + h->current_request = NULL; + quit_timeout_reschedule (h); +} + static gboolean next_script (gpointer user_data) { Request *request = user_data; + Handler *h = request->handler; GPtrArray *results; GValueArray *item; guint i; @@ -210,10 +234,9 @@ next_script (gpointer user_data) dbus_g_method_return (request->context, results); g_ptr_array_unref (results); - - quit_timeout_reschedule (request->handler); request_free (request); + next_request (h); return FALSE; } @@ -482,8 +505,10 @@ impl_dispatch (Handler *h, } g_slist_free (sorted_scripts); - /* start dispatching scripts */ - dispatch_one_script (request); + if (h->current_request) + g_queue_push_tail (h->pending_requests, request); + else + start_request (request); } static void @@ -661,6 +686,7 @@ main (int argc, char **argv) if (!handler) return 1; handler->persist = persist; + handler->pending_requests = g_queue_new (); dbus_g_object_type_install_info (HANDLER_TYPE, &dbus_glib_nm_dispatcher_object_info); dbus_g_connection_register_g_object (bus, @@ -672,7 +698,9 @@ main (int argc, char **argv) g_main_loop_run (loop); + g_queue_free (handler->pending_requests); g_object_unref (handler); + dbus_g_connection_unref (bus); if (!debug) From d53357f4202f46dbbbd8c67c73a5d50bd11aba38 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 13:02:16 -0400 Subject: [PATCH 3/6] dispatcher: tell systemd to not kill dispatcher children You're supposed to be able to use dispatcher scripts to spawn long-running processes, but currently systemd will kill them when nm-dispatcher exits. Fix that. https://bugzilla.gnome.org/show_bug.cgi?id=725492 --- data/NetworkManager-dispatcher.service.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/data/NetworkManager-dispatcher.service.in b/data/NetworkManager-dispatcher.service.in index 442f67aaf8..e1b90036f3 100644 --- a/data/NetworkManager-dispatcher.service.in +++ b/data/NetworkManager-dispatcher.service.in @@ -6,6 +6,10 @@ Type=dbus BusName=org.freedesktop.nm_dispatcher ExecStart=@libexecdir@/nm-dispatcher.action +# We want to allow scripts to spawn long-running daemons, so tell +# systemd to not clean up when nm-dispatcher exits +KillMode=process + [Install] Alias=dbus-org.freedesktop.nm-dispatcher.service From 45ed459ae32976a120f607046021b48bf728c1d6 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 13:15:19 -0400 Subject: [PATCH 4/6] dispatcher: Leave PATH set in the dispatcher script environment https://bugzilla.gnome.org/show_bug.cgi?id=724657 --- callouts/nm-dispatcher-utils.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/callouts/nm-dispatcher-utils.c b/callouts/nm-dispatcher-utils.c index 4b333d2638..1f99a0b6c1 100644 --- a/callouts/nm-dispatcher-utils.c +++ b/callouts/nm-dispatcher-utils.c @@ -345,10 +345,10 @@ nm_dispatcher_utils_construct_envp (const char *action, char **out_iface) { const char *iface = NULL, *ip_iface = NULL; - const char *uuid = NULL, *id = NULL; + const char *uuid = NULL, *id = NULL, *path; NMDeviceState dev_state = NM_DEVICE_STATE_UNKNOWN; GValue *value; - char **envp = NULL; + char **envp = NULL, *path_item; GSList *items = NULL, *iter; guint i; GHashTable *con_setting_hash; @@ -359,7 +359,7 @@ nm_dispatcher_utils_construct_envp (const char *action, /* Hostname changes don't require a device nor contain a connection */ if (!strcmp (action, "hostname")) - return g_new0 (char *, 1); + goto done; /* Canonicalize the VPN interface name; "" is used when passing it through * D-Bus so make sure that's fixed up here. @@ -445,12 +445,6 @@ nm_dispatcher_utils_construct_envp (const char *action, items = construct_ip6_items (items, vpn_ip6_props, "VPN_"); } - /* Convert the list to an environment pointer */ - envp = g_new0 (char *, g_slist_length (items) + 1); - for (iter = items, i = 0; iter; iter = g_slist_next (iter), i++) - envp[i] = (char *) iter->data; - g_slist_free (items); - /* Backwards compat: 'iface' is set in this order: * 1) VPN interface name * 2) Device IP interface name @@ -463,6 +457,19 @@ nm_dispatcher_utils_construct_envp (const char *action, else *out_iface = g_strdup (iface); + done: + path = g_getenv ("PATH"); + if (path) { + path_item = g_strdup_printf ("PATH=%s", path); + items = g_slist_prepend (items, path_item); + } + + /* Convert the list to an environment pointer */ + envp = g_new0 (char *, g_slist_length (items) + 1); + for (iter = items, i = 0; iter; iter = g_slist_next (iter), i++) + envp[i] = (char *) iter->data; + g_slist_free (items); + return envp; } From 3b197de59fb330d6a1aa40f76e3e7b1c72686fd7 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 14:26:28 -0400 Subject: [PATCH 5/6] dispatcher: add better debugging Add LOGD_DISPATCH, and if it's set to DEBUG, tell nm-dispatcher to log additional information about each script it runs. --- callouts/nm-dispatcher-action.c | 28 ++++++++++++++++++++++++---- callouts/nm-dispatcher.xml | 6 ++++++ src/logging/nm-logging.c | 3 ++- src/logging/nm-logging.h | 1 + src/nm-dispatcher.c | 29 +++++++++++++++++++---------- 5 files changed, 52 insertions(+), 15 deletions(-) diff --git a/callouts/nm-dispatcher-action.c b/callouts/nm-dispatcher-action.c index 1a8c99fc00..23ae110772 100644 --- a/callouts/nm-dispatcher-action.c +++ b/callouts/nm-dispatcher-action.c @@ -83,6 +83,7 @@ impl_dispatch (Handler *h, const char *vpn_ip_iface, GHashTable *vpn_ip4_props, GHashTable *vpn_ip6_props, + gboolean request_debug, DBusGMethodInvocation *context); #include "nm-dispatcher-glue.h" @@ -116,6 +117,8 @@ struct Request { char *action; char *iface; char **envp; + gboolean debug; + GPtrArray *scripts; /* list of ScriptInfo */ guint idx; @@ -170,6 +173,11 @@ quit_timeout_reschedule (Handler *h) static void start_request (Request *request) { + if (request->iface) + g_message ("Dispatching action '%s' for %s", request->action, request->iface); + else + g_message ("Dispatching action '%s'", request->action); + request->handler->current_request = request; dispatch_one_script (request); } @@ -234,6 +242,13 @@ next_script (gpointer user_data) dbus_g_method_return (request->context, results); g_ptr_array_unref (results); + + if (request->debug) { + if (request->iface) + g_message ("Dispatch '%s' on %s complete", request->action, request->iface); + else + g_message ("Dispatch '%s' complete", request->action); + } request_free (request); next_request (h); @@ -271,7 +286,10 @@ script_watch_cb (GPid pid, gint status, gpointer user_data) script->script); } - if (script->result != DISPATCH_RESULT_SUCCESS) { + if (script->result == DISPATCH_RESULT_SUCCESS) { + if (script->request->debug) + g_message ("Script '%s' complete", script->script); + } else { script->result = DISPATCH_RESULT_FAILED; g_warning ("%s", script->error); } @@ -379,8 +397,8 @@ dispatch_one_script (Request *request) argv[2] = request->action; argv[3] = NULL; - if (debug) - g_message ("Script: %s %s %s", script->script, request->iface ? request->iface : "(none)", request->action); + if (request->debug) + g_message ("Running script '%s'", script->script); if (g_spawn_async ("/", argv, request->envp, G_SPAWN_DO_NOT_REAP_CHILD, child_setup, request, &script->pid, &error)) { request->script_watch_id = g_child_watch_add (script->pid, (GChildWatchFunc) script_watch_cb, script); @@ -451,6 +469,7 @@ impl_dispatch (Handler *h, const char *vpn_ip_iface, GHashTable *vpn_ip4_props, GHashTable *vpn_ip6_props, + gboolean request_debug, DBusGMethodInvocation *context) { GSList *sorted_scripts = NULL; @@ -470,6 +489,7 @@ impl_dispatch (Handler *h, request = g_malloc0 (sizeof (*request)); request->handler = h; + request->debug = request_debug || debug; request->context = context; request->action = g_strdup (str_action); @@ -486,7 +506,7 @@ impl_dispatch (Handler *h, vpn_ip6_props, &iface); - if (debug) { + if (request->debug) { g_message ("------------ Action ID %p '%s' Interface %s Environment ------------", context, str_action, iface ? iface : "(none)"); for (p = request->envp; *p; p++) diff --git a/callouts/nm-dispatcher.xml b/callouts/nm-dispatcher.xml index cba61508db..1ae138b35b 100644 --- a/callouts/nm-dispatcher.xml +++ b/callouts/nm-dispatcher.xml @@ -75,6 +75,12 @@ + + + Whether to log debug output. + + + Results of dispatching operations. Each element of the returned diff --git a/src/logging/nm-logging.c b/src/logging/nm-logging.c index 5ac2671679..a7cd19101e 100644 --- a/src/logging/nm-logging.c +++ b/src/logging/nm-logging.c @@ -50,7 +50,7 @@ nm_log_handler (const gchar *log_domain, LOGD_CORE | LOGD_DEVICE | LOGD_OLPC_MESH | LOGD_WIMAX | \ LOGD_INFINIBAND | LOGD_FIREWALL | LOGD_ADSL | LOGD_BOND | \ LOGD_VLAN | LOGD_BRIDGE | LOGD_DBUS_PROPS | LOGD_TEAM | LOGD_CONCHECK | \ - LOGD_DCB) + LOGD_DCB | LOGD_DISPATCH) #define LOGD_DEFAULT (LOGD_ALL & ~(LOGD_WIFI_SCAN | LOGD_DBUS_PROPS)) @@ -108,6 +108,7 @@ static const LogDesc domain_descs[] = { { LOGD_TEAM, "TEAM" }, { LOGD_CONCHECK, "CONCHECK" }, { LOGD_DCB, "DCB" }, + { LOGD_DISPATCH, "DISPATCH" }, { 0, NULL } }; diff --git a/src/logging/nm-logging.h b/src/logging/nm-logging.h index d9e483c744..8287ab9b99 100644 --- a/src/logging/nm-logging.h +++ b/src/logging/nm-logging.h @@ -62,6 +62,7 @@ enum { LOGD_TEAM = (1LL << 32), LOGD_CONCHECK = (1LL << 33), LOGD_DCB = (1LL << 34), /* Data Center Bridging */ + LOGD_DISPATCH = (1LL << 35), }; #define LOGD_DHCP (LOGD_DHCP4 | LOGD_DHCP6) diff --git a/src/nm-dispatcher.c b/src/nm-dispatcher.c index a8fee1f3f3..63773862c6 100644 --- a/src/nm-dispatcher.c +++ b/src/nm-dispatcher.c @@ -179,16 +179,16 @@ dispatcher_done_cb (DBusGProxy *proxy, DBusGProxyCall *call, gpointer user_data) DispatchResult result; if (item->n_values != 3) { - nm_log_dbg (LOGD_CORE, "Unexpected number of items in " - "dispatcher result (got %d, expectd 3)", - item->n_values); + nm_log_dbg (LOGD_DISPATCH, "Unexpected number of items in " + "dispatcher result (got %d, expectd 3)", + item->n_values); goto next; } /* Script */ tmp = g_value_array_get_nth (item, 0); if (G_VALUE_TYPE (tmp) != G_TYPE_STRING) { - nm_log_dbg (LOGD_CORE, "Dispatcher result %d element 0 invalid type %s", + nm_log_dbg (LOGD_DISPATCH, "Dispatcher result %d element 0 invalid type %s", i, G_VALUE_TYPE_NAME (tmp)); goto next; } @@ -197,7 +197,7 @@ dispatcher_done_cb (DBusGProxy *proxy, DBusGProxyCall *call, gpointer user_data) /* Result */ tmp = g_value_array_get_nth (item, 1); if (G_VALUE_TYPE (tmp) != G_TYPE_UINT) { - nm_log_dbg (LOGD_CORE, "Dispatcher result %d element 1 invalid type %s", + nm_log_dbg (LOGD_DISPATCH, "Dispatcher result %d element 1 invalid type %s", i, G_VALUE_TYPE_NAME (tmp)); goto next; } @@ -206,14 +206,14 @@ dispatcher_done_cb (DBusGProxy *proxy, DBusGProxyCall *call, gpointer user_data) /* Error */ tmp = g_value_array_get_nth (item, 2); if (G_VALUE_TYPE (tmp) != G_TYPE_STRING) { - nm_log_dbg (LOGD_CORE, "Dispatcher result %d element 2 invalid type %s", + nm_log_dbg (LOGD_DISPATCH, "Dispatcher result %d element 2 invalid type %s", i, G_VALUE_TYPE_NAME (tmp)); goto next; } err = g_value_get_string (tmp); if (result != DISPATCH_RESULT_SUCCESS) { - nm_log_warn (LOGD_CORE, "Dispatcher script \"%s\" failed with %s: %s", + nm_log_warn (LOGD_DISPATCH, "Dispatcher script \"%s\" failed with %s: %s", script, dispatch_result_to_string (result), err); } @@ -223,7 +223,7 @@ next: g_ptr_array_free (results, TRUE); } else { g_assert (error); - nm_log_warn (LOGD_CORE, "Dispatcher failed: (%d) %s", error->code, error->message); + nm_log_warn (LOGD_DISPATCH, "Dispatcher failed: (%d) %s", error->code, error->message); } if (info->callback) @@ -285,8 +285,16 @@ _dispatcher_call (DispatcherAction action, DispatchInfo *info; /* All actions except 'hostname' require a device */ - if (action != DISPATCHER_ACTION_HOSTNAME) + if (action == DISPATCHER_ACTION_HOSTNAME) { + nm_log_dbg (LOGD_DISPATCH, "dispatching action '%s'", + action_to_string (action)); + } else { g_return_val_if_fail (NM_IS_DEVICE (device), NULL); + + nm_log_dbg (LOGD_DISPATCH, "(%s) dispatching action '%s'", + nm_device_get_iface (device), action_to_string (action)); + } + /* VPN actions require at least an IPv4 config (for now) */ if (action == DISPATCHER_ACTION_VPN_UP) g_return_val_if_fail (vpn_ip4_config != NULL, NULL); @@ -297,7 +305,7 @@ _dispatcher_call (DispatcherAction action, NM_DISPATCHER_DBUS_PATH, NM_DISPATCHER_DBUS_IFACE); if (!proxy) { - nm_log_err (LOGD_CORE, "could not get dispatcher proxy!"); + nm_log_err (LOGD_DISPATCH, "could not get dispatcher proxy!"); return NULL; } @@ -354,6 +362,7 @@ _dispatcher_call (DispatcherAction action, G_TYPE_STRING, vpn_iface ? vpn_iface : "", DBUS_TYPE_G_MAP_OF_VARIANT, vpn_ip4_props, DBUS_TYPE_G_MAP_OF_VARIANT, vpn_ip6_props, + G_TYPE_BOOLEAN, nm_logging_enabled (LOGL_DEBUG, LOGD_DISPATCH), G_TYPE_INVALID); g_hash_table_destroy (connection_hash); g_hash_table_destroy (connection_props); From bb1c46f66d4b15329d2fb7613e3a08d5d52c721e Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 31 Mar 2014 12:57:16 -0400 Subject: [PATCH 6/6] man: updates to the dispatcher section Mention that the dispatcher kills slow scripts (https://bugzilla.redhat.com/show_bug.cgi?id=982734) and that it runs all scripts that have been queued, even if later events make that "wrong". (Mentioned in https://bugzilla.gnome.org/show_bug.cgi?id=721971, although that bug proposes changing this behavior, not documenting it.) --- man/NetworkManager.xml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/man/NetworkManager.xml b/man/NetworkManager.xml index 5ef19b890a..70e49aa16a 100644 --- a/man/NetworkManager.xml +++ b/man/NetworkManager.xml @@ -194,6 +194,15 @@ In case of VPN, VPN_IP_IFACE is set, and IP4_*, IP6_* variables with VPN prefix are exported too, like VPN_IP4_ADDRESS_0, VPN_IP4_NUM_ADDRESSES. + + Dispatcher scripts are run one at a time, but asynchronously from the main + NetworkManager process, and will be killed if they run for too long. If your script + might take arbitrarily long to complete, you should spawn a child process and have the + parent return immediately. Also beware that once a script is queued, it will always be + run, even if a later event renders it obsolete. (Eg, if an interface goes up, and then + back down again quickly, it is possible that one or more "up" scripts will be run + after the interface has gone down.) +