team: respawn teamd instead of exiting (rh #1145988) (bgo #745903)

This commit is contained in:
Dan Williams 2015-04-02 15:03:25 -05:00
commit 4f4e570a7d

View file

@ -41,6 +41,7 @@
#include "nm-team-enum-types.h" #include "nm-team-enum-types.h"
#include "nm-posix-signals.h" #include "nm-posix-signals.h"
#include "nm-core-internal.h" #include "nm-core-internal.h"
#include "gsystem-local-alloc.h"
#include "nm-device-team-glue.h" #include "nm-device-team-glue.h"
@ -66,6 +67,8 @@ enum {
LAST_PROP LAST_PROP
}; };
static gboolean teamd_start (NMDevice *device, NMSettingTeam *s_team);
/******************************************************************/ /******************************************************************/
static guint32 static guint32
@ -260,18 +263,7 @@ master_update_slave_connection (NMDevice *self,
/******************************************************************/ /******************************************************************/
static void static void
teamd_timeout_remove (NMDevice *device) teamd_cleanup (NMDevice *device, gboolean free_tdc)
{
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device);
if (priv->teamd_timeout) {
g_source_remove (priv->teamd_timeout);
priv->teamd_timeout = 0;
}
}
static void
teamd_cleanup (NMDevice *device, gboolean device_state_failed)
{ {
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device);
@ -280,24 +272,21 @@ teamd_cleanup (NMDevice *device, gboolean device_state_failed)
priv->teamd_process_watch = 0; priv->teamd_process_watch = 0;
} }
if (priv->teamd_timeout) {
g_source_remove (priv->teamd_timeout);
priv->teamd_timeout = 0;
}
if (priv->teamd_pid > 0) { if (priv->teamd_pid > 0) {
nm_utils_kill_child_async (priv->teamd_pid, SIGTERM, LOGD_TEAM, "teamd", 2000, NULL, NULL); nm_utils_kill_child_async (priv->teamd_pid, SIGTERM, LOGD_TEAM, "teamd", 2000, NULL, NULL);
priv->teamd_pid = 0; priv->teamd_pid = 0;
} }
if (priv->tdc) { if (priv->tdc && free_tdc) {
teamdctl_disconnect (priv->tdc); teamdctl_disconnect (priv->tdc);
teamdctl_free (priv->tdc); teamdctl_free (priv->tdc);
priv->tdc = NULL; priv->tdc = NULL;
} }
teamd_timeout_remove (device);
if (device_state_failed) {
if (nm_device_is_activating (device) ||
(nm_device_get_state (device) == NM_DEVICE_STATE_ACTIVATED))
nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
}
} }
static gboolean static gboolean
@ -308,11 +297,18 @@ teamd_timeout_cb (gpointer user_data)
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (device);
g_return_val_if_fail (priv->teamd_timeout, FALSE); g_return_val_if_fail (priv->teamd_timeout, FALSE);
priv->teamd_timeout = 0;
_LOGI (LOGD_TEAM, "teamd timed out."); if (priv->teamd_pid && !priv->tdc) {
teamd_cleanup (device, TRUE); /* Timed out launching our own teamd process */
_LOGW (LOGD_TEAM, "teamd timed out.");
teamd_cleanup (device, TRUE);
return FALSE; g_warn_if_fail (nm_device_is_activating (device));
nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
}
return G_SOURCE_REMOVE;
} }
static void static void
@ -329,27 +325,54 @@ teamd_dbus_appeared (GDBusConnection *connection,
g_return_if_fail (priv->teamd_dbus_watch); g_return_if_fail (priv->teamd_dbus_watch);
_LOGI (LOGD_TEAM, "teamd appeared on D-Bus"); _LOGI (LOGD_TEAM, "teamd appeared on D-Bus");
teamd_timeout_remove (device);
nm_device_queue_recheck_assume (device); nm_device_queue_recheck_assume (device);
/* If another teamd grabbed the bus name while our teamd was starting,
* just ignore the death of our teamd and run with the existing one.
*/
if (priv->teamd_process_watch) {
gs_unref_variant GVariant *ret = NULL;
guint32 pid;
ret = g_dbus_connection_call_sync (connection,
"org.freedesktop.DBus",
"/org/freedesktop/DBus",
"org.freedesktop.DBus",
"GetConnectionUnixProcessID",
g_variant_new ("(s)", name_owner),
NULL,
G_DBUS_CALL_FLAGS_NO_AUTO_START,
2000,
NULL,
NULL);
g_variant_get (ret, "(u)", &pid);
if (pid != priv->teamd_pid)
teamd_cleanup (device, FALSE);
}
/* Grab a teamd control handle even if we aren't going to use it
* immediately. But if we are, and grabbing it failed, fail the
* device activation.
*/
success = ensure_teamd_connection (device); success = ensure_teamd_connection (device);
if (nm_device_get_state (device) == NM_DEVICE_STATE_PREPARE) { if (nm_device_get_state (device) == NM_DEVICE_STATE_PREPARE) {
if (success) if (success)
nm_device_activate_schedule_stage2_device_config (device); nm_device_activate_schedule_stage2_device_config (device);
else if (!nm_device_uses_assumed_connection (device)) else if (!nm_device_uses_assumed_connection (device))
nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED); nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
return;
} }
} }
static void static void
teamd_dbus_vanished (GDBusConnection *connection, teamd_dbus_vanished (GDBusConnection *dbus_connection,
const gchar *name, const gchar *name,
gpointer user_data) gpointer user_data)
{ {
NMDeviceTeam *self = NM_DEVICE_TEAM (user_data); NMDeviceTeam *self = NM_DEVICE_TEAM (user_data);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
NMDevice *device = NM_DEVICE (self); NMDevice *device = NM_DEVICE (self);
NMDeviceState state = nm_device_get_state (device);
g_return_if_fail (priv->teamd_dbus_watch); g_return_if_fail (priv->teamd_dbus_watch);
@ -364,6 +387,15 @@ teamd_dbus_vanished (GDBusConnection *connection,
_LOGI (LOGD_TEAM, "teamd vanished from D-Bus"); _LOGI (LOGD_TEAM, "teamd vanished from D-Bus");
teamd_cleanup (device, TRUE); teamd_cleanup (device, TRUE);
/* Attempt to respawn teamd */
if (state >= NM_DEVICE_STATE_PREPARE && state <= NM_DEVICE_STATE_ACTIVATED) {
NMConnection *connection = nm_device_get_connection (device);
g_assert (connection);
if (!teamd_start (device, nm_connection_get_setting_team (connection)))
nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
}
} }
static void static void
@ -372,13 +404,24 @@ teamd_process_watch_cb (GPid pid, gint status, gpointer user_data)
NMDeviceTeam *self = NM_DEVICE_TEAM (user_data); NMDeviceTeam *self = NM_DEVICE_TEAM (user_data);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
NMDevice *device = NM_DEVICE (self); NMDevice *device = NM_DEVICE (self);
NMDeviceState state = nm_device_get_state (device);
g_return_if_fail (priv->teamd_process_watch); g_return_if_fail (priv->teamd_process_watch);
_LOGI (LOGD_TEAM, "teamd died with status %d", status); _LOGD (LOGD_TEAM, "teamd died with status %d", status);
priv->teamd_process_watch = 0;
priv->teamd_pid = 0; priv->teamd_pid = 0;
teamd_cleanup (device, TRUE); priv->teamd_process_watch = 0;
/* If teamd quit within 5 seconds of starting, it's probably hosed
* and will just die again, so fail the activation.
*/
if (priv->teamd_timeout &&
(state >= NM_DEVICE_STATE_PREPARE) &&
(state <= NM_DEVICE_STATE_ACTIVATED)) {
_LOGW (LOGD_TEAM, "teamd process quit unexpectedly; failing activation");
teamd_cleanup (device, TRUE);
nm_device_state_changed (device, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
}
} }
static void static void
@ -398,23 +441,29 @@ teamd_child_setup (gpointer user_data G_GNUC_UNUSED)
nm_unblock_posix_signals (NULL); nm_unblock_posix_signals (NULL);
} }
static void static gboolean
nm_device_team_watch_dbus (NMDeviceTeam *self) teamd_kill (NMDeviceTeam *self, const char *teamd_binary, GError **error)
{ {
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); gs_unref_ptrarray GPtrArray *argv = NULL;
const char *iface = nm_device_get_ip_iface (NM_DEVICE (self)); gs_free char *tmp_str = NULL;
char *tmp_str = NULL;
/* Register D-Bus name watcher */ if (!teamd_binary) {
tmp_str = g_strdup_printf ("org.libteam.teamd.%s", iface); teamd_binary = nm_utils_find_helper ("teamd", NULL, NULL);
priv->teamd_dbus_watch = g_bus_watch_name (G_BUS_TYPE_SYSTEM, if (!teamd_binary) {
tmp_str, _LOGW (LOGD_TEAM, "Activation: (team) failed to start teamd: teamd binary not found");
G_BUS_NAME_WATCHER_FLAGS_NONE, return FALSE;
teamd_dbus_appeared, }
teamd_dbus_vanished, }
NM_DEVICE (self),
NULL); argv = g_ptr_array_new ();
g_free (tmp_str); g_ptr_array_add (argv, (gpointer) teamd_binary);
g_ptr_array_add (argv, (gpointer) "-k");
g_ptr_array_add (argv, (gpointer) "-t");
g_ptr_array_add (argv, (gpointer) nm_device_get_iface (NM_DEVICE (self)));
g_ptr_array_add (argv, NULL);
_LOGD (LOGD_TEAM, "running: %s", (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata)));
return g_spawn_sync ("/", (char **) argv->pdata, NULL, 0, nm_unblock_posix_signals, NULL, NULL, NULL, NULL, error);
} }
static gboolean static gboolean
@ -423,23 +472,11 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team)
NMDeviceTeam *self = NM_DEVICE_TEAM (device); NMDeviceTeam *self = NM_DEVICE_TEAM (device);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
const char *iface = nm_device_get_ip_iface (device); const char *iface = nm_device_get_ip_iface (device);
char *tmp_str = NULL; gs_unref_ptrarray GPtrArray *argv = NULL;
const char *config; gs_free_error GError *error = NULL;
gs_free char *tmp_str = NULL;
const char *teamd_binary; const char *teamd_binary;
GPtrArray *argv; const char *config;
GError *error = NULL;
gboolean ret;
int status;
if (priv->teamd_process_watch ||
priv->teamd_pid > 0 ||
priv->tdc ||
priv->teamd_timeout)
{
/* FIXME g_assert that this never hits. For now, be more reluctant, and try to recover. */
g_warn_if_reached ();
teamd_cleanup (device, FALSE);
}
teamd_binary = nm_utils_find_helper ("teamd", NULL, NULL); teamd_binary = nm_utils_find_helper ("teamd", NULL, NULL);
if (!teamd_binary) { if (!teamd_binary) {
@ -447,20 +484,12 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team)
return FALSE; return FALSE;
} }
/* Kill teamd for same named device first if it is there */ if (priv->teamd_process_watch || priv->teamd_pid > 0 || priv->tdc) {
argv = g_ptr_array_new (); g_warn_if_reached ();
g_ptr_array_add (argv, (gpointer) teamd_binary); if (!priv->teamd_pid)
g_ptr_array_add (argv, (gpointer) "-k"); teamd_kill (self, teamd_binary, NULL);
g_ptr_array_add (argv, (gpointer) "-t"); teamd_cleanup (device, TRUE);
g_ptr_array_add (argv, (gpointer) iface); }
g_ptr_array_add (argv, NULL);
_LOGD (LOGD_TEAM, "running: %s",
(tmp_str = g_strjoinv (" ", (gchar **) argv->pdata)));
g_clear_pointer (&tmp_str, g_free);
ret = g_spawn_sync ("/", (char **) argv->pdata, NULL, 0, nm_unblock_posix_signals, NULL, NULL, NULL, &status, &error);
g_ptr_array_free (argv, TRUE);
/* Start teamd now */ /* Start teamd now */
argv = g_ptr_array_new (); argv = g_ptr_array_new ();
@ -483,72 +512,90 @@ teamd_start (NMDevice *device, NMSettingTeam *s_team)
g_ptr_array_add (argv, (gpointer) "-gg"); g_ptr_array_add (argv, (gpointer) "-gg");
g_ptr_array_add (argv, NULL); g_ptr_array_add (argv, NULL);
_LOGD (LOGD_TEAM, "running: %s", _LOGD (LOGD_TEAM, "running: %s", (tmp_str = g_strjoinv (" ", (gchar **) argv->pdata)));
(tmp_str = g_strjoinv (" ", (gchar **) argv->pdata))); if (!g_spawn_async ("/", (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
g_clear_pointer (&tmp_str, g_free); teamd_child_setup, NULL, &priv->teamd_pid, &error)) {
/* Start a timeout for teamd to appear at D-Bus */
priv->teamd_timeout = g_timeout_add_seconds (5, teamd_timeout_cb, device);
ret = g_spawn_async ("/", (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
&teamd_child_setup, NULL, &priv->teamd_pid, &error);
g_ptr_array_free (argv, TRUE);
if (!ret) {
_LOGW (LOGD_TEAM, "Activation: (team) failed to start teamd: %s", error->message); _LOGW (LOGD_TEAM, "Activation: (team) failed to start teamd: %s", error->message);
g_clear_error (&error); teamd_cleanup (device, TRUE);
teamd_cleanup (device, FALSE);
return FALSE; return FALSE;
} }
/* Start a timeout for teamd to appear at D-Bus */
if (!priv->teamd_timeout)
priv->teamd_timeout = g_timeout_add_seconds (5, teamd_timeout_cb, device);
/* Monitor the child process so we know when it dies */ /* Monitor the child process so we know when it dies */
priv->teamd_process_watch = g_child_watch_add (priv->teamd_pid, priv->teamd_process_watch = g_child_watch_add (priv->teamd_pid,
teamd_process_watch_cb, teamd_process_watch_cb,
device); device);
_LOGI (LOGD_TEAM, "Activation: (team) started teamd..."); _LOGI (LOGD_TEAM, "Activation: (team) started teamd [pid %u]...", (guint) priv->teamd_pid);
return TRUE; return TRUE;
} }
static void
teamd_stop (NMDevice *device)
{
NMDeviceTeam *self = NM_DEVICE_TEAM (device);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
if (priv->teamd_pid > 0)
_LOGI (LOGD_TEAM, "Deactivation: stopping teamd...");
else
_LOGD (LOGD_TEAM, "Deactivation: stopping teamd (not started)...");
teamd_cleanup (device, FALSE);
}
static NMActStageReturn static NMActStageReturn
act_stage1_prepare (NMDevice *device, NMDeviceStateReason *reason) act_stage1_prepare (NMDevice *device, NMDeviceStateReason *reason)
{ {
NMDeviceTeam *self = NM_DEVICE_TEAM (device);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS;
gs_free_error GError *error = NULL;
NMConnection *connection; NMConnection *connection;
NMSettingTeam *s_team; NMSettingTeam *s_team;
const char *cfg;
g_return_val_if_fail (reason != NULL, NM_ACT_STAGE_RETURN_FAILURE); g_return_val_if_fail (reason != NULL, NM_ACT_STAGE_RETURN_FAILURE);
ret = NM_DEVICE_CLASS (nm_device_team_parent_class)->act_stage1_prepare (device, reason); ret = NM_DEVICE_CLASS (nm_device_team_parent_class)->act_stage1_prepare (device, reason);
if (ret == NM_ACT_STAGE_RETURN_SUCCESS) { if (ret != NM_ACT_STAGE_RETURN_SUCCESS)
connection = nm_device_get_connection (device); return ret;
g_assert (connection);
s_team = nm_connection_get_setting_team (connection); connection = nm_device_get_connection (device);
g_assert (s_team); g_assert (connection);
if (teamd_start (device, s_team)) s_team = nm_connection_get_setting_team (connection);
ret = NM_ACT_STAGE_RETURN_POSTPONE; g_assert (s_team);
else
ret = NM_ACT_STAGE_RETURN_FAILURE; if (priv->tdc) {
/* If the existing teamd config is the same as we're about to use,
* then we can proceed. If it's not the same, and we have a PID,
* kill it so we can respawn it with the right config. If we don't
* have a PID, then we must fail.
*/
cfg = teamdctl_config_get_raw (priv->tdc);
if (cfg && strcmp (cfg, nm_setting_team_get_config (s_team)) == 0) {
_LOGD (LOGD_TEAM, "using existing matching teamd config");
return NM_ACT_STAGE_RETURN_SUCCESS;
}
if (!priv->teamd_pid) {
_LOGD (LOGD_TEAM, "existing teamd config mismatch; killing existing via teamdctl");
if (!teamd_kill (self, NULL, &error)) {
_LOGW (LOGD_TEAM, "existing teamd config mismatch; failed to kill existing teamd: %s", error->message);
*reason = NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED;
return NM_ACT_STAGE_RETURN_FAILURE;
}
}
_LOGD (LOGD_TEAM, "existing teamd config mismatch; respawning...");
teamd_cleanup (device, TRUE);
} }
return ret;
return teamd_start (device, s_team) ?
NM_ACT_STAGE_RETURN_POSTPONE : NM_ACT_STAGE_RETURN_FAILURE;
} }
static void static void
deactivate (NMDevice *device) deactivate (NMDevice *device)
{ {
teamd_stop (device); NMDeviceTeam *self = NM_DEVICE_TEAM (device);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
if (priv->teamd_pid || priv->tdc)
_LOGI (LOGD_TEAM, "deactivation: stopping teamd...");
if (!priv->teamd_pid)
teamd_kill (self, NULL, NULL);
teamd_cleanup (device, TRUE);
} }
static gboolean static gboolean
@ -696,11 +743,22 @@ nm_device_team_init (NMDeviceTeam * self)
static void static void
constructed (GObject *object) constructed (GObject *object)
{ {
NMDeviceTeam *self = NM_DEVICE_TEAM (object); NMDevice *device = NM_DEVICE (object);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (object);
char *tmp_str = NULL;
G_OBJECT_CLASS (nm_device_team_parent_class)->constructed (object); G_OBJECT_CLASS (nm_device_team_parent_class)->constructed (object);
nm_device_team_watch_dbus (self); /* Register D-Bus name watcher */
tmp_str = g_strdup_printf ("org.libteam.teamd.%s", nm_device_get_ip_iface (device));
priv->teamd_dbus_watch = g_bus_watch_name (G_BUS_TYPE_SYSTEM,
tmp_str,
G_BUS_NAME_WATCHER_FLAGS_NONE,
teamd_dbus_appeared,
teamd_dbus_vanished,
NM_DEVICE (device),
NULL);
g_free (tmp_str);
} }
static void static void
@ -740,15 +798,15 @@ set_property (GObject *object, guint prop_id,
static void static void
dispose (GObject *object) dispose (GObject *object)
{ {
NMDeviceTeam *self = NM_DEVICE_TEAM (object); NMDevice *device = NM_DEVICE (object);
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self); NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (object);
if (priv->teamd_dbus_watch) { if (priv->teamd_dbus_watch) {
g_bus_unwatch_name (priv->teamd_dbus_watch); g_bus_unwatch_name (priv->teamd_dbus_watch);
priv->teamd_dbus_watch = 0; priv->teamd_dbus_watch = 0;
} }
teamd_cleanup (NM_DEVICE (object), FALSE); teamd_cleanup (device, TRUE);
G_OBJECT_CLASS (nm_device_team_parent_class)->dispose (object); G_OBJECT_CLASS (nm_device_team_parent_class)->dispose (object);
} }