team: wait that existing instance is killed before starting teamd again

teamd uses a PID file to guarantee a single instance is running for
each device. If we spawn a new teamd process without waiting the
termination of the existing one, the new process can fail:

 <debug> [1486191713.2530] kill child process 'teamd' (2676): wait for process to terminate after sending SIGTERM (15) (send SIGKILL in 2000 milliseconds)...
 ...
 <debug> [1486191713.2539] device[0x7f737f5d7c40] (team1): running: /usr/bin/teamd -o -n -U -D -N -t team1 -c {"runner": {"name": "activebackup"}} -gg
 Using team device "team1".
 Using PID file "/var/run/teamd/team1.pid"
 This program is not intended to be run as root.
 Daemon already running on PID 2676.
 Failed: File exists

To avoid this, keep track that a kill is in progress and postpone the
start of teamd.

https://bugzilla.redhat.com/show_bug.cgi?id=1415641
This commit is contained in:
Beniamino Galvani 2017-08-30 18:00:45 +02:00
parent 35f189f1eb
commit 4fe884ad7e

View file

@ -56,6 +56,8 @@ typedef struct {
guint teamd_read_timeout;
guint teamd_dbus_watch;
char *config;
gboolean kill_in_progress;
NMConnection *connection;
} NMDeviceTeamPrivate;
struct _NMDeviceTeam {
@ -288,6 +290,26 @@ master_update_slave_connection (NMDevice *self,
}
/*****************************************************************************/
static void
teamd_kill_cb (pid_t pid, gboolean success, int child_status, void *user_data)
{
NMDevice *device = NM_DEVICE (user_data);
NMDeviceTeam *self = (NMDeviceTeam *) device;
NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (self);
priv->kill_in_progress = FALSE;
if (priv->connection) {
_LOGT (LOGD_TEAM, "kill terminated, starting teamd...");
if (!teamd_start (device, priv->connection)) {
nm_device_state_changed (device,
NM_DEVICE_STATE_FAILED,
NM_DEVICE_STATE_REASON_TEAMD_CONTROL_FAILED);
}
g_clear_object (&priv->connection);
}
g_object_unref (device);
}
static void
teamd_cleanup (NMDevice *device, gboolean free_tdc)
@ -299,7 +321,12 @@ teamd_cleanup (NMDevice *device, gboolean free_tdc)
nm_clear_g_source (&priv->teamd_read_timeout);
if (priv->teamd_pid > 0) {
nm_utils_kill_child_async (priv->teamd_pid, SIGTERM, LOGD_TEAM, "teamd", 2000, NULL, NULL);
priv->kill_in_progress = TRUE;
nm_utils_kill_child_async (priv->teamd_pid, SIGTERM,
LOGD_TEAM, "teamd",
2000,
teamd_kill_cb,
g_object_ref (device));
priv->teamd_pid = 0;
}
@ -322,7 +349,7 @@ teamd_timeout_cb (gpointer user_data)
if (priv->teamd_pid && !priv->tdc) {
/* Timed out launching our own teamd process */
_LOGW (LOGD_TEAM, "teamd timed out.");
_LOGW (LOGD_TEAM, "teamd timed out");
teamd_cleanup (device, TRUE);
g_warn_if_fail (nm_device_is_activating (device));
@ -645,6 +672,12 @@ act_stage1_prepare (NMDevice *device, NMDeviceStateReason *out_failure_reason)
teamd_cleanup (device, TRUE);
}
if (priv->kill_in_progress) {
_LOGT (LOGD_TEAM, "kill in progress, wait before starting teamd");
priv->connection = g_object_ref (connection);
return NM_ACT_STAGE_RETURN_POSTPONE;
}
return teamd_start (device, connection) ?
NM_ACT_STAGE_RETURN_POSTPONE : NM_ACT_STAGE_RETURN_FAILURE;
}
@ -661,6 +694,7 @@ deactivate (NMDevice *device)
if (!priv->teamd_pid)
teamd_kill (self, NULL, NULL);
teamd_cleanup (device, TRUE);
g_clear_object (&priv->connection);
}
static gboolean