diff --git a/src/devices/team/nm-device-team.c b/src/devices/team/nm-device-team.c index c8f510b9b5..de0c2ee7ed 100644 --- a/src/devices/team/nm-device-team.c +++ b/src/devices/team/nm-device-team.c @@ -40,6 +40,7 @@ #include "nm-enum-types.h" #include "nm-team-enum-types.h" #include "nm-core-internal.h" +#include "gsystem-local-alloc.h" #include "nm-device-team-glue.h" @@ -65,6 +66,8 @@ enum { LAST_PROP }; +static gboolean teamd_start (NMDevice *device, NMSettingTeam *s_team); + /******************************************************************/ static guint32 @@ -259,18 +262,7 @@ master_update_slave_connection (NMDevice *self, /******************************************************************/ static void -teamd_timeout_remove (NMDevice *device) -{ - NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device); - - if (priv->teamd_timeout) { - g_source_remove (priv->teamd_timeout); - priv->teamd_timeout = 0; - } -} - -static void -teamd_cleanup (NMDevice *device, gboolean device_state_failed) +teamd_cleanup (NMDevice *device, gboolean free_tdc) { NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device); @@ -279,24 +271,21 @@ teamd_cleanup (NMDevice *device, gboolean device_state_failed) priv->teamd_process_watch = 0; } + if (priv->teamd_timeout) { + g_source_remove (priv->teamd_timeout); + priv->teamd_timeout = 0; + } + if (priv->teamd_pid > 0) { nm_utils_kill_child_async (priv->teamd_pid, SIGTERM, LOGD_TEAM, "teamd", 2000, NULL, NULL); priv->teamd_pid = 0; } - if (priv->tdc) { + if (priv->tdc && free_tdc) { teamdctl_disconnect (priv->tdc); teamdctl_free (priv->tdc); priv->tdc = NULL; } - - teamd_timeout_remove (device); - - if (device_state_failed) { - if (nm_device_is_activating (device) || - (nm_device_get_state (device) == NM_DEVICE_STATE_ACTIVATED)) - nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); - } } static gboolean @@ -307,11 +296,18 @@ teamd_timeout_cb (gpointer user_data) NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device); g_return_val_if_fail (priv->teamd_timeout, FALSE); + priv->teamd_timeout = 0; - _LOGI (LOGD_TEAM, "teamd timed out."); - teamd_cleanup (device, TRUE); + if (priv->teamd_pid && !priv->tdc) { + /* Timed out launching our own teamd process */ + _LOGW (LOGD_TEAM, "teamd timed out."); + teamd_cleanup (device, TRUE); - return FALSE; + g_warn_if_fail (nm_device_is_activating (device)); + nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); + } + + return G_SOURCE_REMOVE; } static void @@ -328,27 +324,54 @@ teamd_dbus_appeared (GDBusConnection *connection, g_return_if_fail (priv->teamd_dbus_watch); _LOGI (LOGD_TEAM, "teamd appeared on D-Bus"); - teamd_timeout_remove (device); nm_device_queue_recheck_assume (device); + /* If another teamd grabbed the bus name while our teamd was starting, + * just ignore the death of our teamd and run with the existing one. + */ + if (priv->teamd_process_watch) { + gs_unref_variant GVariant *ret = NULL; + guint32 pid; + + ret = g_dbus_connection_call_sync (connection, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetConnectionUnixProcessID", + g_variant_new ("(s)", name_owner), + NULL, + G_DBUS_CALL_FLAGS_NO_AUTO_START, + 2000, + NULL, + NULL); + g_variant_get (ret, "(u)", &pid); + + if (pid != priv->teamd_pid) + teamd_cleanup (device, FALSE); + } + + /* Grab a teamd control handle even if we aren't going to use it + * immediately. But if we are, and grabbing it failed, fail the + * device activation. + */ success = ensure_teamd_connection (device); if (nm_device_get_state (device) == NM_DEVICE_STATE_PREPARE) { if (success) nm_device_activate_schedule_stage2_device_config (device); else if (!nm_device_uses_assumed_connection (device)) nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); - return; } } static void -teamd_dbus_vanished (GDBusConnection *connection, +teamd_dbus_vanished (GDBusConnection *dbus_connection, const gchar *name, gpointer user_data) { NMDeviceTeam *self = NM_DEVICE_TEAM (user_data); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDevice *device = NM_DEVICE (self); + NMDeviceState state = nm_device_get_state (device); g_return_if_fail (priv->teamd_dbus_watch); @@ -363,6 +386,15 @@ teamd_dbus_vanished (GDBusConnection *connection, _LOGI (LOGD_TEAM, "teamd vanished from D-Bus"); teamd_cleanup (device, TRUE); + + /* Attempt to respawn teamd */ + if (state >= NM_DEVICE_STATE_PREPARE && state <= NM_DEVICE_STATE_ACTIVATED) { + NMConnection *connection = nm_device_get_connection (device); + + g_assert (connection); + if (!teamd_start (device, nm_connection_get_setting_team (connection))) + nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); + } } static void @@ -371,32 +403,49 @@ teamd_process_watch_cb (GPid pid, gint status, gpointer user_data) NMDeviceTeam *self = NM_DEVICE_TEAM (user_data); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDevice *device = NM_DEVICE (self); + NMDeviceState state = nm_device_get_state (device); g_return_if_fail (priv->teamd_process_watch); - _LOGI (LOGD_TEAM, "teamd died with status %d", status); - priv->teamd_process_watch = 0; + _LOGD (LOGD_TEAM, "teamd died with status %d", status); priv->teamd_pid = 0; - teamd_cleanup (device, TRUE); + priv->teamd_process_watch = 0; + + /* If teamd quit within 5 seconds of starting, it's probably hosed + * and will just die again, so fail the activation. + */ + if (priv->teamd_timeout && + (state >= NM_DEVICE_STATE_PREPARE) && + (state <= NM_DEVICE_STATE_ACTIVATED)) { + _LOGW (LOGD_TEAM, "teamd process quit unexpectedly; failing activation"); + teamd_cleanup (device, TRUE); + nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); + } } -static void -nm_device_team_watch_dbus (NMDeviceTeam *self) +static gboolean +teamd_kill (NMDeviceTeam *self, const char *teamd_binary, GError **error) { - NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); - const char *iface = nm_device_get_ip_iface (NM_DEVICE (self)); - char *tmp_str = NULL; + gs_unref_ptrarray GPtrArray *argv = NULL; + gs_free char *tmp_str = NULL; - /* Register D-Bus name watcher */ - tmp_str = g_strdup_printf ("org.libteam.teamd.%s", iface); - priv->teamd_dbus_watch = g_bus_watch_name (G_BUS_TYPE_SYSTEM, - tmp_str, - G_BUS_NAME_WATCHER_FLAGS_NONE, - teamd_dbus_appeared, - teamd_dbus_vanished, - NM_DEVICE (self), - NULL); - g_free (tmp_str); + if (!teamd_binary) { + teamd_binary = nm_utils_find_helper ("teamd", NULL, NULL); + if (!teamd_binary) { + _LOGW (LOGD_TEAM, "Activation: (team) failed to start teamd: teamd binary not found"); + return FALSE; + } + } + + argv = g_ptr_array_new (); + g_ptr_array_add (argv, (gpointer) teamd_binary); + g_ptr_array_add (argv, (gpointer) "-k"); + g_ptr_array_add (argv, (gpointer) "-t"); + g_ptr_array_add (argv, (gpointer) nm_device_get_iface (NM_DEVICE (self))); + g_ptr_array_add (argv, NULL); + + _LOGD (LOGD_TEAM, "running: %s", (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata))); + return g_spawn_sync ("/", (char **) argv->pdata, NULL, 0, NULL, NULL, NULL, NULL, NULL, error); } static gboolean @@ -405,23 +454,11 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team) NMDeviceTeam *self = NM_DEVICE_TEAM (device); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); const char *iface = nm_device_get_ip_iface (device); - char *tmp_str = NULL; - const char *config; + gs_unref_ptrarray GPtrArray *argv = NULL; + gs_free_error GError *error = NULL; + gs_free char *tmp_str = NULL; const char *teamd_binary; - GPtrArray *argv; - GError *error = NULL; - gboolean ret; - int status; - - if (priv->teamd_process_watch || - priv->teamd_pid > 0 || - priv->tdc || - priv->teamd_timeout) - { - /* FIXME g_assert that this never hits. For now, be more reluctant, and try to recover. */ - g_warn_if_reached (); - teamd_cleanup (device, FALSE); - } + const char *config; teamd_binary = nm_utils_find_helper ("teamd", NULL, NULL); if (!teamd_binary) { @@ -429,20 +466,12 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team) return FALSE; } - /* Kill teamd for same named device first if it is there */ - argv = g_ptr_array_new (); - g_ptr_array_add (argv, (gpointer) teamd_binary); - g_ptr_array_add (argv, (gpointer) "-k"); - g_ptr_array_add (argv, (gpointer) "-t"); - g_ptr_array_add (argv, (gpointer) iface); - g_ptr_array_add (argv, NULL); - - _LOGD (LOGD_TEAM, "running: %s", - (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata))); - g_clear_pointer (&tmp_str, g_free); - - ret = g_spawn_sync ("/", (char **) argv->pdata, NULL, 0, NULL, NULL, NULL, NULL, &status, &error); - g_ptr_array_free (argv, TRUE); + if (priv->teamd_process_watch || priv->teamd_pid > 0 || priv->tdc) { + g_warn_if_reached (); + if (!priv->teamd_pid) + teamd_kill (self, teamd_binary, NULL); + teamd_cleanup (device, TRUE); + } /* Start teamd now */ argv = g_ptr_array_new (); @@ -465,72 +494,90 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team) g_ptr_array_add (argv, (gpointer) "-gg"); g_ptr_array_add (argv, NULL); - _LOGD (LOGD_TEAM, "running: %s", - (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata))); - g_clear_pointer (&tmp_str, g_free); - - /* Start a timeout for teamd to appear at D-Bus */ - priv->teamd_timeout = g_timeout_add_seconds (5, teamd_timeout_cb, device); - - ret = g_spawn_async ("/", (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD, - nm_utils_setpgid, NULL, &priv->teamd_pid, &error); - g_ptr_array_free (argv, TRUE); - if (!ret) { + _LOGD (LOGD_TEAM, "running: %s", (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata))); + if (!g_spawn_async ("/", (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD, + nm_utils_setpgid, NULL, &priv->teamd_pid, &error)) { _LOGW (LOGD_TEAM, "Activation: (team) failed to start teamd: %s", error->message); - g_clear_error (&error); - teamd_cleanup (device, FALSE); + teamd_cleanup (device, TRUE); return FALSE; } + /* Start a timeout for teamd to appear at D-Bus */ + if (!priv->teamd_timeout) + priv->teamd_timeout = g_timeout_add_seconds (5, teamd_timeout_cb, device); + /* Monitor the child process so we know when it dies */ priv->teamd_process_watch = g_child_watch_add (priv->teamd_pid, teamd_process_watch_cb, device); - _LOGI (LOGD_TEAM, "Activation: (team) started teamd..."); + _LOGI (LOGD_TEAM, "Activation: (team) started teamd [pid %u]...", (guint) priv->teamd_pid); return TRUE; } -static void -teamd_stop (NMDevice *device) -{ - NMDeviceTeam *self = NM_DEVICE_TEAM (device); - NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); - - if (priv->teamd_pid > 0) - _LOGI (LOGD_TEAM, "Deactivation: stopping teamd..."); - else - _LOGD (LOGD_TEAM, "Deactivation: stopping teamd (not started)..."); - teamd_cleanup (device, FALSE); -} - static NMActStageReturn act_stage1_prepare (NMDevice *device, NMDeviceStateReason *reason) { + NMDeviceTeam *self = NM_DEVICE_TEAM (device); + NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; + gs_free_error GError *error = NULL; NMConnection *connection; NMSettingTeam *s_team; + const char *cfg; g_return_val_if_fail (reason != NULL, NM_ACT_STAGE_RETURN_FAILURE); ret = NM_DEVICE_CLASS (nm_device_team_parent_class)->act_stage1_prepare (device, reason); - if (ret == NM_ACT_STAGE_RETURN_SUCCESS) { - connection = nm_device_get_connection (device); - g_assert (connection); - s_team = nm_connection_get_setting_team (connection); - g_assert (s_team); - if (teamd_start (device, s_team)) - ret = NM_ACT_STAGE_RETURN_POSTPONE; - else - ret = NM_ACT_STAGE_RETURN_FAILURE; + if (ret != NM_ACT_STAGE_RETURN_SUCCESS) + return ret; + + connection = nm_device_get_connection (device); + g_assert (connection); + s_team = nm_connection_get_setting_team (connection); + g_assert (s_team); + + if (priv->tdc) { + /* If the existing teamd config is the same as we're about to use, + * then we can proceed. If it's not the same, and we have a PID, + * kill it so we can respawn it with the right config. If we don't + * have a PID, then we must fail. + */ + cfg = teamdctl_config_get_raw (priv->tdc); + if (cfg && strcmp (cfg, nm_setting_team_get_config (s_team)) == 0) { + _LOGD (LOGD_TEAM, "using existing matching teamd config"); + return NM_ACT_STAGE_RETURN_SUCCESS; + } + + if (!priv->teamd_pid) { + _LOGD (LOGD_TEAM, "existing teamd config mismatch; killing existing via teamdctl"); + if (!teamd_kill (self, NULL, &error)) { + _LOGW (LOGD_TEAM, "existing teamd config mismatch; failed to kill existing teamd: %s", error->message); + *reason = NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED; + return NM_ACT_STAGE_RETURN_FAILURE; + } + } + + _LOGD (LOGD_TEAM, "existing teamd config mismatch; respawning..."); + teamd_cleanup (device, TRUE); } - return ret; + + return teamd_start (device, s_team) ? + NM_ACT_STAGE_RETURN_POSTPONE : NM_ACT_STAGE_RETURN_FAILURE; } static void deactivate (NMDevice *device) { - teamd_stop (device); + NMDeviceTeam *self = NM_DEVICE_TEAM (device); + NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); + + if (priv->teamd_pid || priv->tdc) + _LOGI (LOGD_TEAM, "deactivation: stopping teamd..."); + + if (!priv->teamd_pid) + teamd_kill (self, NULL, NULL); + teamd_cleanup (device, TRUE); } static gboolean @@ -678,11 +725,22 @@ nm_device_team_init (NMDeviceTeam * self) static void constructed (GObject *object) { - NMDeviceTeam *self = NM_DEVICE_TEAM (object); + NMDevice *device = NM_DEVICE (object); + NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (object); + char *tmp_str = NULL; G_OBJECT_CLASS (nm_device_team_parent_class)->constructed (object); - nm_device_team_watch_dbus (self); + /* Register D-Bus name watcher */ + tmp_str = g_strdup_printf ("org.libteam.teamd.%s", nm_device_get_ip_iface (device)); + priv->teamd_dbus_watch = g_bus_watch_name (G_BUS_TYPE_SYSTEM, + tmp_str, + G_BUS_NAME_WATCHER_FLAGS_NONE, + teamd_dbus_appeared, + teamd_dbus_vanished, + NM_DEVICE (device), + NULL); + g_free (tmp_str); } static void @@ -722,15 +780,15 @@ set_property (GObject *object, guint prop_id, static void dispose (GObject *object) { - NMDeviceTeam *self = NM_DEVICE_TEAM (object); - NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); + NMDevice *device = NM_DEVICE (object); + NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (object); if (priv->teamd_dbus_watch) { g_bus_unwatch_name (priv->teamd_dbus_watch); priv->teamd_dbus_watch = 0; } - teamd_cleanup (NM_DEVICE (object), FALSE); + teamd_cleanup (device, TRUE); G_OBJECT_CLASS (nm_device_team_parent_class)->dispose (object); }