From 6991333bc0c40ff08070f43dfb398ed69c8dd2f6 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 9 Jun 2022 16:51:31 +0900 Subject: [PATCH 01/10] ppp-manager: ip6: fix dns not being used ipv6 DNS received on ppp interface were being ignored because their priority was not set. Fix this by using default priority in impl_ppp_manager_set_ip6_config(), as was done for ip4_config in b2e559fab2fa ("core: initialize l3cd dns-priority for ppp and wwan") Fixes: 58287cbcc0c8 ('core: rework IP configuration in NetworkManager using layer 3 configuration') https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/1022 --- src/core/ppp/nm-ppp-manager.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/ppp/nm-ppp-manager.c b/src/core/ppp/nm-ppp-manager.c index 896c233d6d..9759b9b397 100644 --- a/src/core/ppp/nm-ppp-manager.c +++ b/src/core/ppp/nm-ppp-manager.c @@ -659,6 +659,8 @@ impl_ppp_manager_set_ip6_config(NMDBusObject *obj, priv->ifindex, NM_IP_CONFIG_SOURCE_PPP); + nm_l3_config_data_set_dns_priority(l3cd, AF_INET6, 0); + address = (NMPlatformIP6Address){ .plen = 64, .addr_source = NM_IP_CONFIG_SOURCE_PPP, From 4d7b494eb3b39f9ff6d20a862a4487200d3954de Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 9 Jun 2022 16:54:17 +0900 Subject: [PATCH 02/10] ppp-manager: ip6: set interface mtu based on ppp config impl_ppp_manager_set_ip4_config always has been setting interface mtu based on ppp configuration: do the same for ip6 in case it matters. --- src/core/ppp/nm-ppp-manager.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/ppp/nm-ppp-manager.c b/src/core/ppp/nm-ppp-manager.c index 9759b9b397..5c2b7681bb 100644 --- a/src/core/ppp/nm-ppp-manager.c +++ b/src/core/ppp/nm-ppp-manager.c @@ -642,6 +642,7 @@ impl_ppp_manager_set_ip6_config(NMDBusObject *obj, nm_auto_unref_l3cd_init NML3ConfigData *l3cd = NULL; NMPlatformIP6Address address; struct in6_addr a; + guint32 mtu; NMUtilsIPv6IfaceId iid = NM_UTILS_IPV6_IFACE_ID_INIT; gboolean has_peer = FALSE; gs_unref_variant GVariant *config_dict = NULL; @@ -652,13 +653,14 @@ impl_ppp_manager_set_ip6_config(NMDBusObject *obj, nm_clear_g_source(&priv->ppp_timeout_handler); - if (!set_ip_config_common(self, config_dict, NULL)) + if (!set_ip_config_common(self, config_dict, &mtu)) goto out; l3cd = nm_l3_config_data_new(nm_platform_get_multi_idx(NM_PLATFORM_GET), priv->ifindex, NM_IP_CONFIG_SOURCE_PPP); + nm_l3_config_data_set_mtu(l3cd, mtu); nm_l3_config_data_set_dns_priority(l3cd, AF_INET6, 0); address = (NMPlatformIP6Address){ From c6228a5815c5b2cb8a1016d5a1bee7a4f8cbbdb4 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 9 Jun 2022 15:10:09 +0200 Subject: [PATCH 03/10] contrib: install iproute-tc in "nm-in-container.sh" --- contrib/scripts/nm-in-container.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/scripts/nm-in-container.sh b/contrib/scripts/nm-in-container.sh index c594fe402a..d5331e4c55 100755 --- a/contrib/scripts/nm-in-container.sh +++ b/contrib/scripts/nm-in-container.sh @@ -293,6 +293,7 @@ RUN dnf install -y \\ gtk-doc \\ intltool \\ iproute \\ + iproute-tc \\ iptables \\ jansson-devel \\ libasan \\ From d15f5420c71aab6a705ff86775d070d3cc64e449 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 10 Jun 2022 08:24:58 +0200 Subject: [PATCH 04/10] contrib: use `less -f` in NM-log to avoid prompt for non-text input --- contrib/scripts/NM-log | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/scripts/NM-log b/contrib/scripts/NM-log index 05b7c4b940..85faea8639 100755 --- a/contrib/scripts/NM-log +++ b/contrib/scripts/NM-log @@ -73,11 +73,11 @@ NM-log() { else a="${1--}" shift - /usr/bin/less "$a" "$@" + /usr/bin/less -f "$a" "$@" fi ) | \ NM_LOG_GREP="$NM_LOG_GREP" NM-colorize | \ - LESS=FRSXM less -R --shift=5 + LESS=FRSXM less -f -R --shift=5 } if [[ "$NM_not_sourced" != "" ]]; then From d6429d3ddbc7a7426dc2e199fec7a0910ebe5033 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 6 Jun 2022 15:43:05 +0200 Subject: [PATCH 05/10] device: ensure DHCP is restarted every time the link goes up Currently we call nm_device_update_dynamic_ip_setup() in carrier_changed() every time the carrier goes up again and the device is activating, to kick a restart of DHCP. Since we process link events in a idle handler, it can happen that the handler is called only once for different events; in particular device_link_changed() might be called once for a link-down/link-up sequence. carrier_changed() is "level-triggered" - it cares only about the current carrier state. nm_device_update_dynamic_ip_setup() should instead be "edge-triggered" - invoked every time the link goes from down to up. We have a mechanism for that in device_link_changed(), use it. Fixes-test: @ipv4_spurious_leftover_route https://bugzilla.redhat.com/show_bug.cgi?id=2079406 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1250 --- src/core/devices/nm-device.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index e6c79e994c..94d37e4a8a 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -6276,10 +6276,6 @@ carrier_changed(NMDevice *self, gboolean carrier) if (nm_device_is_master(self)) { if (carrier) { - /* Force master to retry getting ip addresses when carrier - * is restored. */ - if (priv->state == NM_DEVICE_STATE_ACTIVATED) - nm_device_update_dynamic_ip_setup(self); /* If needed, also resume IP configuration that is * waiting for carrier. */ if (priv->state == NM_DEVICE_STATE_IP_CONFIG) @@ -6307,12 +6303,6 @@ carrier_changed(NMDevice *self, gboolean carrier) * the device. */ nm_device_emit_recheck_auto_activate(self); - } else if (priv->state == NM_DEVICE_STATE_ACTIVATED) { - /* If the device is active without a carrier (probably because it is - * tagged for carrier ignore) ensure that when the carrier appears we - * renew DHCP leases and such. - */ - nm_device_update_dynamic_ip_setup(self); } } else { if (priv->state == NM_DEVICE_STATE_UNAVAILABLE) { @@ -6700,6 +6690,14 @@ device_link_changed(gpointer user_data) if (priv->state >= NM_DEVICE_STATE_IP_CONFIG && priv->state <= NM_DEVICE_STATE_ACTIVATED && !nm_device_sys_iface_state_is_external(self)) nm_device_l3cfg_commit(self, NM_L3_CFG_COMMIT_TYPE_REAPPLY, FALSE); + + /* If the device is active without a carrier (probably because it is + * tagged for carrier ignore) ensure that when the carrier appears we + * renew DHCP leases and such. + */ + if (priv->state == NM_DEVICE_STATE_ACTIVATED) { + nm_device_update_dynamic_ip_setup(self); + } } if (update_unmanaged_specs) From e8275d713921a8f663d16bee0607756c50dcfa5a Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 27 May 2022 17:11:02 +0200 Subject: [PATCH 06/10] core: add nm_l3cfg_block_obj_pruning() Add a function prevent the removal of addresses and routes from the interface for a given address family. --- src/core/nm-l3cfg.c | 95 ++++++++++++++++++++++++++++++++++++++------- src/core/nm-l3cfg.h | 5 +++ 2 files changed, 87 insertions(+), 13 deletions(-) diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c index dff6959215..bbb42d722f 100644 --- a/src/core/nm-l3cfg.c +++ b/src/core/nm-l3cfg.c @@ -207,6 +207,12 @@ typedef struct { gboolean force_commit_once : 1; } L3ConfigData; +struct _NML3CfgBlockHandle { + NML3Cfg *self; + CList lst; + gboolean is_ipv4; +}; + /*****************************************************************************/ NM_GOBJECT_PROPERTIES_DEFINE(NML3Cfg, PROP_NETNS, PROP_IFINDEX, ); @@ -248,6 +254,14 @@ typedef struct _NML3CfgPrivate { GSource *nacd_event_down_source; gint64 nacd_event_down_ratelimited_until_msec; + union { + struct { + CList blocked_lst_head_6; + CList blocked_lst_head_4; + }; + CList blocked_lst_head_x[2]; + }; + union { struct { NMIPConfig *ipconfig_6; @@ -4239,20 +4253,29 @@ _l3_commit_one(NML3Cfg *self, g_array_append_val(ipv6_temp_addrs_keep, addr->address); } } - addresses_prune = - nm_platform_ip_address_get_prune_list(self->priv.platform, - addr_family, - self->priv.ifindex, - nm_g_array_data(ipv6_temp_addrs_keep), - nm_g_array_len(ipv6_temp_addrs_keep)); - routes_prune = nm_platform_ip_route_get_prune_list(self->priv.platform, - addr_family, - self->priv.ifindex, - route_table_sync); - _obj_state_zombie_lst_prune_all(self, addr_family); - } else - _obj_state_zombie_lst_get_prune_lists(self, addr_family, &addresses_prune, &routes_prune); + if (c_list_is_empty(&self->priv.p->blocked_lst_head_x[IS_IPv4])) { + addresses_prune = + nm_platform_ip_address_get_prune_list(self->priv.platform, + addr_family, + self->priv.ifindex, + nm_g_array_data(ipv6_temp_addrs_keep), + nm_g_array_len(ipv6_temp_addrs_keep)); + + routes_prune = nm_platform_ip_route_get_prune_list(self->priv.platform, + addr_family, + self->priv.ifindex, + route_table_sync); + _obj_state_zombie_lst_prune_all(self, addr_family); + } + } else { + if (c_list_is_empty(&self->priv.p->blocked_lst_head_x[IS_IPv4])) { + _obj_state_zombie_lst_get_prune_lists(self, + addr_family, + &addresses_prune, + &routes_prune); + } + } /* FIXME(l3cfg): need to honor and set nm_l3_config_data_get_ndisc_*(). */ /* FIXME(l3cfg): need to honor and set nm_l3_config_data_get_mtu(). */ @@ -4373,6 +4396,48 @@ _l3_commit(NML3Cfg *self, NML3CfgCommitType commit_type, gboolean is_idle) _nm_l3cfg_emit_signal_notify_simple(self, NM_L3_CONFIG_NOTIFY_TYPE_POST_COMMIT); } +NML3CfgBlockHandle * +nm_l3cfg_block_obj_pruning(NML3Cfg *self, int addr_family) +{ + const int IS_IPv4 = NM_IS_IPv4(addr_family); + NML3CfgBlockHandle *handle; + + if (!self) + return NULL; + + nm_assert(NM_IS_L3CFG(self)); + + handle = g_slice_new(NML3CfgBlockHandle); + handle->self = g_object_ref(self); + handle->is_ipv4 = IS_IPv4; + c_list_link_tail(&self->priv.p->blocked_lst_head_x[IS_IPv4], &handle->lst); + + _LOGT("obj-pruning for IPv%c: blocked (%zu)", + nm_utils_addr_family_to_char(addr_family), + c_list_length(&self->priv.p->blocked_lst_head_x[IS_IPv4])); + + return handle; +} + +void +nm_l3cfg_unblock_obj_pruning(NML3CfgBlockHandle *handle) +{ + gs_unref_object NML3Cfg *self = handle->self; + const int IS_IPv4 = handle->is_ipv4; + + nm_assert(NM_IS_L3CFG(self)); + nm_assert(c_list_is_linked(&handle->lst)); + nm_assert(c_list_contains(&self->priv.p->blocked_lst_head_x[IS_IPv4], &handle->lst)); + + c_list_unlink_stale(&handle->lst); + + _LOGT("obj-pruning for IPv%c: unblocked (%zu)", + IS_IPv4 ? '4' : '6', + c_list_length(&self->priv.p->blocked_lst_head_x[IS_IPv4])); + + nm_g_slice_free(handle); +} + /* See DOC(l3cfg:commit-type) */ void nm_l3cfg_commit(NML3Cfg *self, NML3CfgCommitType commit_type) @@ -4677,6 +4742,8 @@ nm_l3cfg_init(NML3Cfg *self) c_list_init(&self->priv.p->obj_state_lst_head); c_list_init(&self->priv.p->obj_state_temporary_not_available_lst_head); c_list_init(&self->priv.p->obj_state_zombie_lst_head); + c_list_init(&self->priv.p->blocked_lst_head_4); + c_list_init(&self->priv.p->blocked_lst_head_6); self->priv.p->obj_state_hash = g_hash_table_new_full(nmp_object_indirect_id_hash, nmp_object_indirect_id_equal, @@ -4725,6 +4792,8 @@ finalize(GObject *object) nm_assert(!self->priv.p->ipv4ll); nm_assert(c_list_is_empty(&self->priv.p->commit_type_lst_head)); + nm_assert(c_list_is_empty(&self->priv.p->blocked_lst_head_4)); + nm_assert(c_list_is_empty(&self->priv.p->blocked_lst_head_6)); nm_assert(!self->priv.p->commit_on_idle_source); diff --git a/src/core/nm-l3cfg.h b/src/core/nm-l3cfg.h index 63ccd5a141..bead34933e 100644 --- a/src/core/nm-l3cfg.h +++ b/src/core/nm-l3cfg.h @@ -456,4 +456,9 @@ struct _NMIPConfig *nm_l3cfg_ipconfig_acquire(NML3Cfg *self, int addr_family); /*****************************************************************************/ +typedef struct _NML3CfgBlockHandle NML3CfgBlockHandle; + +NML3CfgBlockHandle *nm_l3cfg_block_obj_pruning(NML3Cfg *self, int addr_family); +void nm_l3cfg_unblock_obj_pruning(NML3CfgBlockHandle *handle); + #endif /* __NM_L3CFG_H__ */ From b41b11d613a2ed466975eaf4b7a13ff67662a772 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 13 Jun 2022 11:21:15 +0200 Subject: [PATCH 07/10] ppp: don't remove addresses from interface while IPCP/IPV6CP is running pppd also tries to configure addresses by itself through some ioctls. If we remove between those calls an address that was added, pppd fails and quits. To avoid this race condition, don't remove addresses while IPCP and IPV6CP are running. Once pppd sends an IP configuration, it has finished configuring the interface and we can proceed normally. https://bugzilla.redhat.com/show_bug.cgi?id=2085382 --- src/core/devices/nm-device-ppp.c | 37 +++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/core/devices/nm-device-ppp.c b/src/core/devices/nm-device-ppp.c index 61e32348c4..6615b65e02 100644 --- a/src/core/devices/nm-device-ppp.c +++ b/src/core/devices/nm-device-ppp.c @@ -23,6 +23,13 @@ typedef struct _NMDevicePppPrivate { NMPppMgr *ppp_mgr; + union { + struct { + NML3CfgBlockHandle *l3cfg_block_handle_6; + NML3CfgBlockHandle *l3cfg_block_handle_4; + }; + NML3CfgBlockHandle *l3cfg_block_handle_x[2]; + }; } NMDevicePppPrivate; struct _NMDevicePpp { @@ -69,8 +76,10 @@ _ppp_mgr_stage3_maybe_ready(NMDevicePpp *self) const NMPppMgrIPData *ip_data; ip_data = nm_ppp_mgr_get_ip_data(priv->ppp_mgr, addr_family); - if (ip_data->ip_received) + if (ip_data->ip_received) { + nm_clear_pointer(&priv->l3cfg_block_handle_x[IS_IPv4], nm_l3cfg_unblock_obj_pruning); nm_device_devip_set_state(device, addr_family, NM_DEVICE_IP_STATE_READY, ip_data->l3cd); + } } if (nm_ppp_mgr_get_state(priv->ppp_mgr) >= NM_PPP_MGR_STATE_HAVE_IP_CONFIG) @@ -80,9 +89,10 @@ _ppp_mgr_stage3_maybe_ready(NMDevicePpp *self) static void _ppp_mgr_callback(NMPppMgr *ppp_mgr, const NMPppMgrCallbackData *callback_data, gpointer user_data) { - NMDevicePpp *self = NM_DEVICE_PPP(user_data); - NMDevice *device = NM_DEVICE(self); - NMDeviceState device_state; + NMDevicePpp *self = NM_DEVICE_PPP(user_data); + NMDevice *device = NM_DEVICE(self); + NMDevicePppPrivate *priv = NM_DEVICE_PPP_GET_PRIVATE(self); + NMDeviceState device_state; if (callback_data->callback_type != NM_PPP_MGR_CALLBACK_TYPE_STATE_CHANGED) return; @@ -112,6 +122,19 @@ _ppp_mgr_callback(NMPppMgr *ppp_mgr, const NMPppMgrCallbackData *callback_data, return; } + /* pppd also tries to configure addresses by itself through some + * ioctls. If we remove between those calls an address that was added, + * pppd fails and quits. Temporarily block the removal of addresses + * and routes. */ + if (!priv->l3cfg_block_handle_4) { + priv->l3cfg_block_handle_4 = + nm_l3cfg_block_obj_pruning(nm_device_get_l3cfg(device), AF_INET); + } + if (!priv->l3cfg_block_handle_6) { + priv->l3cfg_block_handle_6 = + nm_l3cfg_block_obj_pruning(nm_device_get_l3cfg(device), AF_INET6); + } + if (old_name) nm_manager_remove_device(NM_MANAGER_GET, old_name, NM_DEVICE_TYPE_PPP); @@ -257,7 +280,11 @@ create_and_realize(NMDevice *device, static void deactivate(NMDevice *device) { - NMDevicePpp *self = NM_DEVICE_PPP(device); + NMDevicePpp *self = NM_DEVICE_PPP(device); + NMDevicePppPrivate *priv = NM_DEVICE_PPP_GET_PRIVATE(self); + + nm_clear_pointer(&priv->l3cfg_block_handle_4, nm_l3cfg_unblock_obj_pruning); + nm_clear_pointer(&priv->l3cfg_block_handle_6, nm_l3cfg_unblock_obj_pruning); _ppp_mgr_cleanup(self); } From 0fa8c5f94c1ed653d0d4c0f1ac31f23efc3b5954 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 29 May 2022 22:40:10 +0200 Subject: [PATCH 08/10] device: stop checking the IP configuration state when cancelling activation The @bond_mode_8023ad test has been seen failing, with a log like this: [...3.0484] device[...] (eth1): Activation: connection 'bond0.0' master deactivated [...3.0484] device[...] (eth1): add_pending_action (2): 'queued-state-change-deactivating' [...3.0484] device[...] (eth1): queue-state[deactivating, reason:new-activation, id:709]: queue state change What happened is that eth1 has been activating. It was already enslaved to a bond and was in an ip-config state when the bond was removed. A change to "deactivating" state has been enqueued. But then this happened: [...3.0942] device[...] (eth1): ip4: check-state: state done => done, is_failed=0, is_pending=0, is_started=0 temp_na=0, may-fail-4=1, may-fail-6=1; disabled4; manualip4=done; ignore6 manualip6=done [...3.0942] device[...] (eth1): ip: check-state: (combined) state pending => done [...3.0943] device[...] (eth1): ip: set (combined) state done (was pending, reason: check-ip-state) [...3.0943] device (eth1): state change: ip-config -> ip-check (reason 'none', sys-iface-state: 'managed') [...3.0943] device[...] (eth1): add_pending_action (3): 'in-state-change' [...3.0943] device[...] (eth1): queue-state[deactivating, reason:new-activation, id:709]: clear queued state change The IP config succeeded and the queued "deactivating" change was overriden by the IP4 check result, prompting a change to "ip-check". With the master still missing. Not good. Let's terminate the appempts to check the IP state when we cancel the activation, so that it doesn't override the enqueued state change. Fixes-test: @bond_mode_8023ad https://bugzilla.redhat.com/show_bug.cgi?id=2080928 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1245 --- src/core/devices/nm-device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index 94d37e4a8a..8a1c80f313 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -15108,6 +15108,9 @@ _cancel_activation(NMDevice *self) _dispatcher_cleanup(self); ip_check_gw_ping_cleanup(self); + _dev_ip_state_cleanup(self, AF_INET, FALSE); + _dev_ip_state_cleanup(self, AF_INET6, FALSE); + /* Break the activation chain */ activation_source_clear(self); } From 1fe8166fc9fb93dc64992325e31e7611725aaeb2 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 2 Jun 2022 13:26:10 +0200 Subject: [PATCH 09/10] device: only deactivate when the master we've enslaved to goes away Sometimes weird things happen. Let dummy0 be an externally created device that has a master. We decide to activate a connection that has no master on it: active-connection[0x55ed7ba78400]: constructed (NMActRequest, version-id 4, type managed) device[0a458361f9fed8f5] (dummy0): sys-iface-state: external -> managed device[0a458361f9fed8f5] (dummy0): queue activation request waiting for currently active connection to disconnect device (dummy0): disconnecting for new activation request. device (dummy0): state change: activated -> deactivating (reason 'new-activation', sys-iface-state: 'managed') device (br0): master: release one slave 0a458361f9fed8f5/dummy0 (enslaved)(no-config) Note the "no-config" above. We'set priv->master = NULL, but didn't communicate the change to the platform. I believe this is not good. device (br0): bridge port dummy0 was detached device (dummy0): released from master device br0 active-connection[0x55ed7ba782e0]: set state deactivating (was activated) device (dummy0): ip4: set state none (was done, reason: ip-state-clear) device (dummy0): ip6: set state none (was done, reason: ip-state-clear) device (dummy0): state change: deactivating -> disconnected (reason 'new-activation', sys-iface-state: 'managed') platform: (dummy0) emit signal link-changed changed: 102: dummy0 mtu 1500 master 101 arp 1 dummy* init addrgenmode none addr EA:8D:DD:DF:1F:B7 brd FF:FF:FF:FF:FF:FF driver dummy rx:0,0 tx:39,4746 Now the platform sent us a new link, the "master" property is still set. device[0a458361f9fed8f5] (dummy0): queued link change for ifindex 102 device[0a458361f9fed8f5] (dummy0): deactivating device (reason 'new-activation') [60] device (dummy0): ip: set (combined) state none (was done, reason: ip-state-clear) config: device-state: write #102 (/run/NetworkManager/devices/102); managed=managed, perm-hw-addr-fake=EA:8D:DD:DF:1F:B7, route-metric-default=0-0 active-connection[0x55ed7ba782e0]: set state deactivated (was deactivating) active-connection[0x55ed7ba782e0]: check-master-ready: already signalled (state deactivated, master 0x55ed7ba781c0 is in state activated) device (dummy0): Activation: starting connection 'dummy1' (ec6fca51-84e6-4a5b-a297-f602252c9f69) device[0a458361f9fed8f5] (dummy0): activation-stage: schedule activate_stage1_device_prepare l3cfg[ae290b5c1f585d6c,ifindex=102]: emit signal (platform-change-on-idle, obj-type-flags=0x2a) device (br0): master: add one slave 0a458361f9fed8f5/dummy0 Amidst the new activation we're processing the netlink message we got. We set priv->master back, effectively nullifying the release above. device (dummy0): state change: disconnected -> prepare (reason 'none', sys-iface-state: 'managed') device[0a458361f9fed8f5] (dummy0): add_pending_action (2): 'in-state-change' active-connection[0x55ed7ba78400]: set state activating (was unknown) manager: NetworkManager state is now CONNECTING active-connection[0x55ed7ba78400]: check-master-ready: not signalling (state activating, no master) device[8fff58d61c7686ce] (br0): slave dummy0 state change 30 (disconnected) -> 40 (prepare) device[0a458361f9fed8f5] (dummy0): remove_pending_action (1): 'in-state-change' device (br0): master: release one slave 0a458361f9fed8f5/dummy0 (not enslaved) (force-configure) platform: (dummy0) link: releasing 102 from master 'br0' (101) device (br0): detached bridge port dummy0 Now stage1 cleans the device up, removing it from the master. device[0a458361f9fed8f5] (dummy0): Activation: connection 'dummy1' master deactivated device (dummy0): ip4: set state none (was pending, reason: ip-state-clear) device (dummy0): ip6: set state none (was pending, reason: ip-state-clear) device[0a458361f9fed8f5] (dummy0): add_pending_action (2): 'queued-state-change-deactivating' We decide to deal with this by enqueuing a deactivation. That is not great -- we shouldn't even have had this master! This patch takes the deactivation path only if we were willingly enslaved to the master in question. --- src/core/devices/nm-device.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index 8a1c80f313..753e320066 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -7920,6 +7920,9 @@ nm_device_slave_notify_release(NMDevice *self, NMDeviceStateReason reason) g_return_if_fail(priv->master); + if (!priv->is_enslaved) + return; + if (priv->state > NM_DEVICE_STATE_DISCONNECTED && priv->state <= NM_DEVICE_STATE_ACTIVATED) { switch (nm_device_state_reason_check(reason)) { case NM_DEVICE_STATE_REASON_DEPENDENCY_FAILED: @@ -7949,14 +7952,12 @@ nm_device_slave_notify_release(NMDevice *self, NMDeviceStateReason reason) } else _LOGI(LOGD_DEVICE, "released from master device %s", nm_device_get_iface(priv->master)); - if (priv->is_enslaved) { - priv->is_enslaved = FALSE; + priv->is_enslaved = FALSE; - _notify(self, PROP_MASTER); + _notify(self, PROP_MASTER); - nm_clear_pointer(&NM_DEVICE_GET_PRIVATE(priv->master)->ports_variant, g_variant_unref); - nm_gobject_notify_together(priv->master, PROP_PORTS, PROP_SLAVES); - } + nm_clear_pointer(&NM_DEVICE_GET_PRIVATE(priv->master)->ports_variant, g_variant_unref); + nm_gobject_notify_together(priv->master, PROP_PORTS, PROP_SLAVES); } /** From 1f61f3f2398d11004cdb2f67a05bcb4385f310fe Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 2 Jun 2022 13:26:17 +0200 Subject: [PATCH 10/10] device: release slaves when an external device is going managed When we're deactivating an externally created device that has a master because we're activating a connection on it, actually remove the device from the master. Otherwise unpleasant things happen: active-connection[0x55ed7ba78400]: constructed (NMActRequest, version-id 4, type managed) device[0a458361f9fed8f5] (dummy0): sys-iface-state: external -> managed device[0a458361f9fed8f5] (dummy0): queue activation request waiting for currently active connection to disconnect device (dummy0): disconnecting for new activation request. device (dummy0): state change: activated -> deactivating (reason 'new-activation', sys-iface-state: 'managed') device (br0): master: release one slave 0a458361f9fed8f5/dummy0 (enslaved)(no-config) Note the "no-config" above. We'set priv->master = NULL, but didn't communicate the change to the platform. I believe this is not good. This patch changes that. device (br0): bridge port dummy0 was detached device (dummy0): released from master device br0 active-connection[0x55ed7ba782e0]: set state deactivating (was activated) device (dummy0): ip4: set state none (was done, reason: ip-state-clear) device (dummy0): ip6: set state none (was done, reason: ip-state-clear) device (dummy0): state change: deactivating -> disconnected (reason 'new-activation', sys-iface-state: 'managed') platform: (dummy0) emit signal link-changed changed: 102: dummy0 mtu 1500 master 101 arp 1 dummy* init addrgenmode none addr EA:8D:DD:DF:1F:B7 brd FF:FF:FF:FF:FF:FF driver dummy rx:0,0 tx:39,4746 Now the platform sent us a new link, the "master" property is still set. device[0a458361f9fed8f5] (dummy0): queued link change for ifindex 102 device[0a458361f9fed8f5] (dummy0): deactivating device (reason 'new-activation') [60] device (dummy0): ip: set (combined) state none (was done, reason: ip-state-clear) config: device-state: write #102 (/run/NetworkManager/devices/102); managed=managed, perm-hw-addr-fake=EA:8D:DD:DF:1F:B7, route-metric-default=0-0 active-connection[0x55ed7ba782e0]: set state deactivated (was deactivating) active-connection[0x55ed7ba782e0]: check-master-ready: already signalled (state deactivated, master 0x55ed7ba781c0 is in state activated) device (dummy0): Activation: starting connection 'dummy1' (ec6fca51-84e6-4a5b-a297-f602252c9f69) device[0a458361f9fed8f5] (dummy0): activation-stage: schedule activate_stage1_device_prepare l3cfg[ae290b5c1f585d6c,ifindex=102]: emit signal (platform-change-on-idle, obj-type-flags=0x2a) device (br0): master: add one slave 0a458361f9fed8f5/dummy0 Amidst the new activation we're processing the netlink message we got. We set priv->master back, effectively nullifying the release above. Sad. device (dummy0): state change: disconnected -> prepare (reason 'none', sys-iface-state: 'managed') device[0a458361f9fed8f5] (dummy0): add_pending_action (2): 'in-state-change' active-connection[0x55ed7ba78400]: set state activating (was unknown) manager: NetworkManager state is now CONNECTING active-connection[0x55ed7ba78400]: check-master-ready: not signalling (state activating, no master) device[8fff58d61c7686ce] (br0): slave dummy0 state change 30 (disconnected) -> 40 (prepare) device[0a458361f9fed8f5] (dummy0): remove_pending_action (1): 'in-state-change' device (br0): master: release one slave 0a458361f9fed8f5/dummy0 (not enslaved) (force-configure) platform: (dummy0) link: releasing 102 from master 'br0' (101) device (br0): detached bridge port dummy0 Now things go south. The stage1 cleans the device up, removing it from the master and the device itself decides it should deactivate itself because it lots its master regardless of the fact that it should not have one and it's in fact an unwanted carryover from previous activation. I believe this is also wrong. device[0a458361f9fed8f5] (dummy0): Activation: connection 'dummy1' master deactivated device (dummy0): ip4: set state none (was pending, reason: ip-state-clear) device (dummy0): ip6: set state none (was pending, reason: ip-state-clear) device[0a458361f9fed8f5] (dummy0): add_pending_action (2): 'queued-state-change-deactivating' device[0a458361f9fed8f5] (dummy0): queue-state[deactivating, reason:connection-assumed, id:298]: queue state change device[0a458361f9fed8f5] (dummy0): activation-stage: synchronously invoke activate_stage2_device_config device (dummy0): state change: prepare -> config (reason 'none', sys-iface-state: 'managed') Now things are really weird. We synchronously go to config, effectively overriding the queued deactivation. We've really messed up. --- src/core/devices/nm-device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index 753e320066..21828f88b8 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -7654,8 +7654,9 @@ slave_state_changed(NMDevice *slave, } if (release) { - configure = priv->sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_MANAGED - && nm_device_sys_iface_state_get(slave) != NM_DEVICE_SYS_IFACE_STATE_EXTERNAL; + configure = (priv->sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_MANAGED + && nm_device_sys_iface_state_get(slave) != NM_DEVICE_SYS_IFACE_STATE_EXTERNAL) + || nm_device_sys_iface_state_get(slave) == NM_DEVICE_SYS_IFACE_STATE_MANAGED; nm_device_master_release_slave(self, slave,