From 799175d51cb2ad2a79c90287a536b2c9144d5c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Bene=C5=A1?= Date: Fri, 27 Mar 2026 10:57:17 +0100 Subject: [PATCH] policy: try next auto-activate candidate immediately on failure When auto-activating a port connection, if the activation fails (e.g., because the port's controller connection is not active on the controller device), immediately try the next compatible candidate in the same loop iteration instead of blocking the failed connection and scheduling an asynchronous recheck. This fixes a race condition with conflicting controller profiles: 1. "nmcli con up bond0b" deactivates bond0a, activates bond0b on bond0 2. dummy0 (port of bond0a) gets deactivated and deleted 3. Auto-activate picks dummy0a (first match) for the re-created dummy0 4. dummy0a activation fails: controller bond0a is not active 5. dummy0a is blocked, async recheck is scheduled 6. Before the recheck runs, bond0b completes activation without ports 7. Recheck finally picks dummy0b, but bond0b already moved to "activated" without any ports attached By trying the next candidate (dummy0b) synchronously right away, the correct port profile is activated in time for the controller to complete its activation with the port properly attached. Reproduced with the NetworkManager-ci test bond_conflicting_device_names under CPU stress on a 2-core VM with NM 1.57.3~dev. Also observed in Beaker CI without artificial stress on loaded machines. --- src/core/nm-policy.c | 85 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/src/core/nm-policy.c b/src/core/nm-policy.c index f7be1a9f87..67a18f0f7c 100644 --- a/src/core/nm-policy.c +++ b/src/core/nm-policy.c @@ -1536,7 +1536,17 @@ _auto_activate_device(NMPolicy *self, NMDevice *device) if (!connections[0]) return; - /* Find the first connection that should be auto-activated */ + /* Find the first connection that should be auto-activated. + * + * Try to activate each candidate immediately. If activation fails + * (e.g. because a port's controller connection is not active), + * block the failed candidate and try the next one right away + * instead of scheduling an asynchronous recheck. + * + * This avoids a race where the asynchronous recheck arrives too + * late: by that time the controller device may have already + * transitioned to the "activated" state without any ports attached. + */ best_connection = NULL; for (i = 0; i < len; i++) { NMSettingsConnection *candidate = connections[i]; @@ -1557,47 +1567,50 @@ _auto_activate_device(NMPolicy *self, NMDevice *device) if (permission && !nm_settings_connection_check_permission(candidate, permission)) continue; - if (nm_device_can_auto_connect(device, candidate, &specific_object)) { - best_connection = candidate; - break; + if (!nm_device_can_auto_connect(device, candidate, &specific_object)) + continue; + + _LOGI(LOGD_DEVICE, + "auto-activating connection '%s' (%s)", + nm_settings_connection_get_id(candidate), + nm_settings_connection_get_uuid(candidate)); + + subject = nm_auth_subject_new_internal(); + ac = nm_manager_activate_connection( + priv->manager, + candidate, + NULL, + specific_object, + device, + subject, + NM_ACTIVATION_TYPE_MANAGED, + NM_ACTIVATION_REASON_AUTOCONNECT, + NM_ACTIVATION_STATE_FLAG_LIFETIME_BOUND_TO_PROFILE_VISIBILITY, + &error); + if (!ac) { + _LOGI(LOGD_DEVICE, + "connection '%s' auto-activation failed: %s", + nm_settings_connection_get_id(candidate), + error->message); + nm_manager_devcon_autoconnect_blocked_reason_set( + priv->manager, + device, + candidate, + NM_SETTINGS_AUTOCONNECT_BLOCKED_REASON_FAILED, + TRUE); + g_clear_object(&subject); + g_clear_error(&error); + nm_clear_g_free(&specific_object); + continue; } + + best_connection = candidate; + break; } if (!best_connection) return; - _LOGI(LOGD_DEVICE, - "auto-activating connection '%s' (%s)", - nm_settings_connection_get_id(best_connection), - nm_settings_connection_get_uuid(best_connection)); - - subject = nm_auth_subject_new_internal(); - ac = nm_manager_activate_connection( - priv->manager, - best_connection, - NULL, - specific_object, - device, - subject, - NM_ACTIVATION_TYPE_MANAGED, - NM_ACTIVATION_REASON_AUTOCONNECT, - NM_ACTIVATION_STATE_FLAG_LIFETIME_BOUND_TO_PROFILE_VISIBILITY, - &error); - if (!ac) { - _LOGI(LOGD_DEVICE, - "connection '%s' auto-activation failed: %s", - nm_settings_connection_get_id(best_connection), - error->message); - nm_manager_devcon_autoconnect_blocked_reason_set( - priv->manager, - device, - best_connection, - NM_SETTINGS_AUTOCONNECT_BLOCKED_REASON_FAILED, - TRUE); - nm_policy_device_recheck_auto_activate_schedule(self, device); - return; - } - /* Subscribe to AC state-changed signal to detect when the * activation fails in early stages without changing device * state.