From ed849eadc1645739cd61b820217ab8795aacb6b8 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Thu, 25 Jun 2020 11:43:14 +0200 Subject: [PATCH 1/4] platform: do not rely on the presence of sriov_totalvfs sysfs file The file doesn't exist for all interfaces that support SR-IOV. In particular, netdevsim devices support SR-IOV but don't expose the file. (cherry picked from commit 63a932b851cd569a77fbc20628d91fda0f7e01b7) --- src/platform/nm-linux-platform.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/platform/nm-linux-platform.c b/src/platform/nm-linux-platform.c index a3ae3922f0..fbcc2b3136 100644 --- a/src/platform/nm-linux-platform.c +++ b/src/platform/nm-linux-platform.c @@ -7250,7 +7250,7 @@ link_supports_sriov (NMPlatform *platform, int ifindex) nm_auto_pop_netns NMPNetns *netns = NULL; nm_auto_close int dirfd = -1; char ifname[IFNAMSIZ]; - int total = -1; + int num = -1; if (!nm_platform_netns_push (platform, &netns)) return FALSE; @@ -7259,13 +7259,13 @@ link_supports_sriov (NMPlatform *platform, int ifindex) if (dirfd < 0) return FALSE; - total = nm_platform_sysctl_get_int32 (platform, - NMP_SYSCTL_PATHID_NETDIR (dirfd, - ifname, - "device/sriov_totalvfs"), - -1); + num = nm_platform_sysctl_get_int32 (platform, + NMP_SYSCTL_PATHID_NETDIR (dirfd, + ifname, + "device/sriov_numvfs"), + -1); - return total > 0; + return num != -1; } static int @@ -7408,15 +7408,7 @@ link_set_sriov_params_async (NMPlatform *platform, ifname, "device/sriov_totalvfs"), 10, 0, G_MAXUINT, 0); - if (errno) { - g_set_error (&error, - NM_UTILS_ERROR, - NM_UTILS_ERROR_UNKNOWN, - "failed reading sriov_totalvfs value: %s", - nm_strerror_native (errno)); - goto out_idle; - } - if (num_vfs > total) { + if (!errno && num_vfs > total) { _LOGW ("link: %d only supports %u VFs (requested %u)", ifindex, total, num_vfs); num_vfs = total; } From 01997b255035ed0c38ecf8b16883d0634f3b8b41 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Thu, 25 Jun 2020 17:40:48 +0200 Subject: [PATCH 2/4] device: clear queued sriov operation on dispose When dispose() is called, there can't be any pending operation because they keep a reference to the device. Instead, there can be a a queued operation not yet executed. Destroy it. (cherry picked from commit 6fcb077a98a4681af15d2431977d367256f8e667) --- src/devices/nm-device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/devices/nm-device.c b/src/devices/nm-device.c index f3b682a780..1f745e642f 100644 --- a/src/devices/nm-device.c +++ b/src/devices/nm-device.c @@ -17892,6 +17892,12 @@ dispose (GObject *object) nm_clear_g_source (&priv->concheck_x[0].p_cur_id); nm_clear_g_source (&priv->concheck_x[1].p_cur_id); + nm_assert (!priv->sriov.pending); + if (priv->sriov.next) { + nm_g_slice_free (priv->sriov.next); + priv->sriov.next = NULL; + } + G_OBJECT_CLASS (nm_device_parent_class)->dispose (object); if (nm_clear_g_source (&priv->queued_state.id)) { From b140adc40dca68d9b6e7fee40084e0511e146b17 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Wed, 1 Apr 2020 15:18:10 +0200 Subject: [PATCH 3/4] device: allow queuing SR-IOV operation from a callback Keep priv->sriov.pending set during the callback set so that it becomes possible to insert a new operation from the callback itself. (cherry picked from commit 74ccda8a713f707fe09f218b737865db6579f955) --- src/devices/nm-device.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/devices/nm-device.c b/src/devices/nm-device.c index 1f745e642f..7148bb1e31 100644 --- a/src/devices/nm-device.c +++ b/src/devices/nm-device.c @@ -614,6 +614,7 @@ typedef struct _NMDevicePrivate { SriovOp *pending; /* SR-IOV operation currently running */ SriovOp *next; /* next SR-IOV operation scheduled */ } sriov; + guint sriov_reset_pending; struct { guint timeout_id; @@ -4765,15 +4766,12 @@ sriov_op_cb (GError *error, gpointer user_data) nm_assert (op == priv->sriov.pending); - priv->sriov.pending = NULL; - g_clear_object (&op->cancellable); if (op->callback) op->callback (error, op->callback_data); - nm_assert (!priv->sriov.pending); - + priv->sriov.pending = NULL; nm_g_slice_free (op); if (priv->sriov.next) { @@ -4791,6 +4789,8 @@ sriov_op_queue_op (NMDevice *self, if (priv->sriov.next) { SriovOp *op_next = g_steal_pointer (&priv->sriov.next); + priv->sriov.next = op; + /* Cancel the next operation immediately */ if (op_next->callback) { gs_free_error GError *error = NULL; @@ -4800,17 +4800,10 @@ sriov_op_queue_op (NMDevice *self, } nm_g_slice_free (op_next); + return; + } - if (!priv->sriov.pending) { - /* This (having "next" set but "pending" not) can only happen if we are - * called from inside the callback again. - * - * That means we append the new request as "next" and return. Once - * the callback returns, it will schedule the request. */ - priv->sriov.next = op; - return; - } - } else if (priv->sriov.pending) { + if (priv->sriov.pending) { priv->sriov.next = op; g_cancellable_cancel (priv->sriov.pending->cancellable); return; @@ -15927,23 +15920,28 @@ deactivate_ready (NMDevice *self, NMDeviceStateReason reason) if (priv->dispatcher.call_id) return; - if ( priv->sriov.pending - || priv->sriov.next) + if (priv->sriov_reset_pending > 0) return; - nm_device_queue_state (self, NM_DEVICE_STATE_DISCONNECTED, reason); + if (priv->state == NM_DEVICE_STATE_DEACTIVATING) + nm_device_queue_state (self, NM_DEVICE_STATE_DISCONNECTED, reason); } static void sriov_deactivate_cb (GError *error, gpointer user_data) { NMDevice *self; + NMDevicePrivate *priv; gpointer reason; + nm_utils_user_data_unpack (user_data, &self, &reason); + priv = NM_DEVICE_GET_PRIVATE (self); + nm_assert (priv->sriov_reset_pending > 0); + priv->sriov_reset_pending--; + if (nm_utils_error_is_cancelled_or_disposing (error)) return; - nm_utils_user_data_unpack (user_data, &self, &reason); deactivate_ready (self, (NMDeviceStateReason) reason); } @@ -16273,6 +16271,7 @@ _set_state_full (NMDevice *self, if ( priv->ifindex > 0 && (s_sriov = nm_device_get_applied_setting (self, NM_TYPE_SETTING_SRIOV))) { + priv->sriov_reset_pending++; sriov_op_queue (self, 0, NM_TERNARY_TRUE, From ef9f26a1bf534c7a28dae3a2dbd5f667d363ee74 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Wed, 1 Apr 2020 11:23:15 +0200 Subject: [PATCH 4/4] device: reset SR-IOV parameters on activation failure SR-IOV parameters are reset when deactivating a connection; do the same also on failure. https://bugzilla.redhat.com/show_bug.cgi?id=1819587 (cherry picked from commit 4d6ea18de4b170f730f71cf49461474619c00947) --- src/devices/nm-device.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/devices/nm-device.c b/src/devices/nm-device.c index 7148bb1e31..de09e48072 100644 --- a/src/devices/nm-device.c +++ b/src/devices/nm-device.c @@ -15928,7 +15928,7 @@ deactivate_ready (NMDevice *self, NMDeviceStateReason reason) } static void -sriov_deactivate_cb (GError *error, gpointer user_data) +sriov_reset_on_deactivate_cb (GError *error, gpointer user_data) { NMDevice *self; NMDevicePrivate *priv; @@ -15939,12 +15939,31 @@ sriov_deactivate_cb (GError *error, gpointer user_data) nm_assert (priv->sriov_reset_pending > 0); priv->sriov_reset_pending--; - if (nm_utils_error_is_cancelled_or_disposing (error)) + if (nm_utils_error_is_cancelled (error)) return; deactivate_ready (self, (NMDeviceStateReason) reason); } +static void +sriov_reset_on_failure_cb (GError *error, gpointer user_data) +{ + NMDevice *self = user_data; + NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE (self); + + nm_assert (priv->sriov_reset_pending > 0); + priv->sriov_reset_pending--; + + if (nm_utils_error_is_cancelled (error)) + return; + + if (priv->state == NM_DEVICE_STATE_FAILED) { + nm_device_queue_state (self, + NM_DEVICE_STATE_DISCONNECTED, + NM_DEVICE_STATE_REASON_NONE); + } +} + static void deactivate_async_ready (NMDevice *self, GError *error, @@ -16275,7 +16294,7 @@ _set_state_full (NMDevice *self, sriov_op_queue (self, 0, NM_TERNARY_TRUE, - sriov_deactivate_cb, + sriov_reset_on_deactivate_cb, nm_utils_user_data_pack (self, (gpointer) reason)); } } @@ -16327,6 +16346,16 @@ _set_state_full (NMDevice *self, if (sett_conn && !nm_settings_connection_get_timestamp (sett_conn, NULL)) nm_settings_connection_update_timestamp (sett_conn, (guint64) 0); + if ( priv->ifindex > 0 + && (s_sriov = nm_device_get_applied_setting (self, NM_TYPE_SETTING_SRIOV))) { + priv->sriov_reset_pending++; + sriov_op_queue (self, + 0, + NM_TERNARY_TRUE, + sriov_reset_on_failure_cb, + self); + break; + } /* Schedule the transition to DISCONNECTED. The device can't transition * immediately because we can't change states again from the state * handler for a variety of reasons.