From bc53ad4976eb736a5e7781d0a8aeb0e5549c117a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Nov 2023 12:37:01 +0100 Subject: [PATCH 1/6] platform: fix handling "weight" for IPv4 routes The hash/cmp functions were wrong with respect to IPv4 routes. Fix that. - the weight was cast to a guint8, which is too small to hold all values. - NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison would normalize a zero weight to one NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u)); That was very wrong. - the handling of the weight depends on the n_nexthops parameter. See _ip4_route_weight_normalize(). The remarkable thing is that upper layers find it useful to track IPv4 single-hop routes with a non-zero weight. Consequently, this is honored by NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID to treat single-hop routes different, when their weight differs. However, adding such a route in kernel will not work. To kernel, single-hop routes have no weight. While the route exists as distinct in our hash tables, according to the implemented identity, it never exists in kernel (or NMPlatform cache). Well, you can call nm_platform_ip_route_add() with such a route, but the result will have a weight of zero (making it a different route). See also nm_platform_ip_route_normalize(). This works all mostly fine. The only thing to take care is that you cannot look into the NMPlatform cache and ever find a IPv4 single-hop route with a non-zero weight. If you preform such a lookup, realize that such routes don't exist in platform. You can however normalize the weight to zero first (nm_platform_ip_route_normalize()) and look for a similar route with a zero weight. Fixes: 1bbdecf5e125 ('platform: manage ECMP routes') --- src/libnm-platform/nm-platform.c | 116 ++++++++++++++++++++++++------- src/libnm-platform/nm-platform.h | 37 ++++++---- 2 files changed, 113 insertions(+), 40 deletions(-) diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index a4751113da..8b4c137eb5 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -5274,6 +5274,33 @@ _route_pref_normalize(guint8 pref) : NM_ICMPV6_ROUTER_PREF_MEDIUM; } +static guint16 +_ip4_route_weight_normalize(guint n_nexthops, guint16 weight, gboolean normalize_ecmp_weight) +{ + if (n_nexthops > 1u) { + /* This is a multihop-route. The weight is relevant. + * + * We only normalize a zero to one (because in kernel such weights + * don't exist. */ + return NM_MAX(weight, 1u); + } + if (n_nexthops == 0) { + /* This route has no next-hop (e.g. blackhole type). The weight is + * always irrelevant. Normalize to zero. */ + return 0; + } + + /* We have a IPv4 single-hop route. In kernel, the weight does not exist. + * It's always zero. + * + * For upper layers, we find it useful to track such routes with a positive + * weight. They are candidates to be merged into a multi-hop ECMP route. + * + * Depending on what the caller requests, we normalize it (or leave it + * unchanged). */ + return normalize_ecmp_weight ? 0u : weight; +} + /** * nm_platform_ip_route_normalize: * @addr_family: AF_INET or AF_INET6 @@ -5286,6 +5313,15 @@ _route_pref_normalize(guint8 pref) * Note that this function is related to NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY * in that if two routes compare semantically equal, after normalizing they also shall * compare equal with NM_PLATFORM_IP_ROUTE_CMP_TYPE_FULL. + * + * Note that a positive "weight" of IPv4 single hop routes is not meaningful in + * kernel. While we track such routes at upper layers, they don't exist in + * kernel (well, they exist, with their weight set to zero, which makes them a + * different route according to NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID. It will + * be normalized to zero too, making basically it a different route. + * + * Also, "metric_any" is normalized to FALSE. This also makes it a different route + * according to NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID. */ void nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) @@ -5299,6 +5335,33 @@ nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) route->rt_source = nmp_utils_ip_config_source_round_trip_rtprot(route->rt_source); + /* For the most part, nm_platform_ip_route_normalize() tries to normalize some fields + * as it happens when they go through kernel. + * + * In most cases, NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison performs the same + * relaxed comparison. For example, normalize() will normalize "scope_inv", and also + * the NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison will do that on-the-fly. Optimally, + * looking into a hash table gives you the same result, whether you normalize the + * needle first (or whether the entries in the hash table are normalized). + * + * Unfortunately, that's not always the case. Examples: + * + * - "metric": we have a "metric_any" field. This is used by higher layers + * to indicate that the metric is dynamically chosen (e.g. by the default + * metric of the default route). As such, as far as NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID + * is concerned, the "metric_any" and "metric" values are treated as distinguishing + * properties. But when we add a route in kernel, "metric_any" no longer exist. + * It becomes a fixed metric. Normalize will fix the metric. + * - "weight": for IPv4 single-hop routes, the weight does not exist in kernel. We however + * use the field to track ECMP information in higher layers. Consequently, + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID treats the weight as-is, while normalization + * (and adding it to kernel) will mangle it. + * + * You thus must be careful when you track NMPlatformIP4Route that make use of such + * higher-level features, which cannot be represented in kernel or the NMPlatform + * cache. + */ + switch (addr_family) { case AF_INET: r4 = (NMPlatformIP4Route *) route; @@ -5306,6 +5369,8 @@ nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) route->metric_any = FALSE; r4->network = nm_ip4_addr_clear_host_address(r4->network, r4->plen); r4->scope_inv = _ip_route_scope_inv_get_normalized(r4); + r4->n_nexthops = nm_platform_ip4_route_get_n_nexthops(r4); + r4->weight = _ip4_route_weight_normalize(r4->n_nexthops, r4->weight, TRUE); break; case AF_INET6: r6 = (NMPlatformIP6Route *) route; @@ -7039,7 +7104,7 @@ nm_platform_ip4_route_to_string_full(const NMPlatformIP4Route *route, route->plen, n_nexthops <= 1 && s_gateway[0] ? " via " : "", n_nexthops <= 1 ? s_gateway : "", - NM_PRINT_FMT_QUOTED2(n_nexthops <= 1 && route->weight != 0, + NM_PRINT_FMT_QUOTED2(n_nexthops <= 1 && route->weight != 0u, " weight ", nm_sprintf_buf(weight_str, "%u", route->weight), ""), @@ -8596,12 +8661,11 @@ nm_platform_ip4_rt_nexthop_hash_update(const NMPlatformIP4RtNextHop *obj, gboolean for_id, NMHashState *h) { - guint8 w; + guint16 w; nm_assert(obj); w = for_id ? NM_MAX(obj->weight, 1u) : obj->weight; - nm_hash_update_vals(h, obj->ifindex, obj->gateway, w); } @@ -8610,6 +8674,8 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, NMPlatformIPRouteCmpType cmp_type, NMHashState *h) { + guint n_nexthops; + switch (cmp_type) { case NM_PLATFORM_IP_ROUTE_CMP_TYPE_WEAK_ID: case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID: @@ -8647,15 +8713,17 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, obj->lock_mtu, obj->lock_mss)); if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) { + n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj); nm_hash_update_vals(h, obj->ifindex, - nm_platform_ip4_route_get_n_nexthops(obj), + n_nexthops, obj->gateway, - (guint8) NM_MAX(obj->weight, 1u)); + _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE)); } } break; case NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY: + n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj); nm_hash_update_vals( h, obj->type_coerced, @@ -8664,9 +8732,9 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, nm_ip4_addr_clear_host_address(obj->network, obj->plen), obj->plen, obj->metric, - nm_platform_ip4_route_get_n_nexthops(obj), + n_nexthops, obj->gateway, - (guint8) NM_MAX(obj->weight, 1u), + _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE), nmp_utils_ip_config_source_round_trip_rtprot(obj->rt_source), _ip_route_scope_inv_get_normalized(obj), obj->tos, @@ -8732,14 +8800,9 @@ nm_platform_ip4_rt_nexthop_cmp(const NMPlatformIP4RtNextHop *a, const NMPlatformIP4RtNextHop *b, gboolean for_id) { - guint8 w_a; - guint8 w_b; + guint16 w_a; + guint16 w_b; - /* Note that weight zero is not valid (in kernel). We thus treat - * weight zero usually the same as 1. - * - * Not here for cmp/hash_update functions. These functions check for the exact - * bit-pattern, and not the it means at other places. */ NM_CMP_SELF(a, b); NM_CMP_FIELD(a, b, ifindex); NM_CMP_FIELD(a, b, gateway); @@ -8756,6 +8819,8 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, const NMPlatformIP4Route *b, NMPlatformIPRouteCmpType cmp_type) { + guint n_nexthops; + NM_CMP_SELF(a, b); switch (cmp_type) { case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID: @@ -8802,9 +8867,10 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) { NM_CMP_FIELD(a, b, ifindex); NM_CMP_FIELD(a, b, gateway); - NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u)); - NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a), - nm_platform_ip4_route_get_n_nexthops(b)); + n_nexthops = nm_platform_ip4_route_get_n_nexthops(a); + NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b)); + NM_CMP_DIRECT(_ip4_route_weight_normalize(n_nexthops, a->weight, FALSE), + _ip4_route_weight_normalize(n_nexthops, b->weight, FALSE)); } } break; @@ -8825,16 +8891,16 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_FIELD(a, b, plen); NM_CMP_FIELD_UNSAFE(a, b, metric_any); NM_CMP_FIELD(a, b, metric); - if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { - NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a), - nm_platform_ip4_route_get_n_nexthops(b)); - } else - NM_CMP_FIELD(a, b, n_nexthops); NM_CMP_FIELD(a, b, gateway); - if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) - NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u)); - else + if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { + n_nexthops = nm_platform_ip4_route_get_n_nexthops(a); + NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b)); + NM_CMP_DIRECT(_ip4_route_weight_normalize(n_nexthops, a->weight, FALSE), + _ip4_route_weight_normalize(n_nexthops, b->weight, FALSE)); + } else { + NM_CMP_FIELD(a, b, n_nexthops); NM_CMP_FIELD(a, b, weight); + } if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { NM_CMP_DIRECT(nmp_utils_ip_config_source_round_trip_rtprot(a->rt_source), nmp_utils_ip_config_source_round_trip_rtprot(b->rt_source)); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index 557b4190a8..2794d4b36f 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -444,18 +444,27 @@ struct _NMPlatformIP4Route { * pref_src must match, unless set to 0.0.0.0 to match any. */ in_addr_t pref_src; - /* This is the weight of for the first next-hop, in case of n_nexthops > 1. + /* This is the weight of for the first next-hop. * - * If n_nexthops is zero, this value is undefined (should be zero). - * If n_nexthops is 1, this also doesn't matter, but it's usually set to - * zero. - * If n_nexthops is greater or equal to one, this is the weight of - * the first hop. + * For multi-hop routes (n_nexthops > 1) this is the weight of the first + * hop. Note that the valid range is from 1-256. Zero is treated the same + * as 1 (for NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison). * - * Note that upper layers (nm_utils_ip_route_attribute_to_platform()) use this flag to indicate - * whether this is a multihop route. Single-hop, non-ECMP routes will have a weight of zero. + * For routes without next-hop (e.g. blackhole type), the weight is + * meaningless. It should be set to zero. NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID + * will treat it as zero. * - * The valid range for weight in kernel is 1-256. */ + * For single-hop routes, in kernel they don't have a weight. That means, + * all routes in the platform cache have a weight of zero. For tracking + * purposes, we find it useful that upper layers have single-hop routes + * with a positive weight. Such routes can never exist in kernel. Trying + * to add such a route will somewhat work, because + * nm_platform_ip_route_normalize() normalizes the weight to zero + * (effectively adding another route, according to + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID). A lookup in the platform cache with + * such a route will not yield a result. It does not exist there. If you + * want to find such a route, normalize it first. + */ guint16 weight; /* rtm_tos (iproute2: tos) @@ -704,13 +713,11 @@ typedef struct { typedef struct { int ifindex; in_addr_t gateway; - /* The valid range for weight is 1-256. Single hop routes in kernel - * don't have a weight, we assign them weight zero (to indicate the - * weight is missing). + + /* The weight of the next hop. The valid range for weight is 1-256. * - * Upper layers (nm_utils_ip_route_attribute_to_platform()) care about - * the distinction of unset weight (no-ECMP). They express no-ECMP as - * zero. + * Zero is allowed too, but treated as 1 (by + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison). */ guint16 weight; From 8d205faa4650f3115f66699b9d2c07df865186ec Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 29 Nov 2023 12:01:08 +0100 Subject: [PATCH 2/6] core: assert that tracked objects in NML3Cfg don't have metric_any It wouldn't work otherwise. The object state is used to track routes and compare them to what we find in platform. A "metric_any" is useful at higher layers, to track a route where the metric is decided by somebody else. But at the point when we add such an object to the object-state, a fixed metric must be chosen. Assert for that. --- src/core/nm-l3cfg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c index 752febfaaf..48f4a84a8a 100644 --- a/src/core/nm-l3cfg.c +++ b/src/core/nm-l3cfg.c @@ -797,6 +797,14 @@ _nm_n_acd_data_probe_new(NML3Cfg *self, in_addr_t addr, guint32 timeout_msec, gp nm_assert(_self->priv.p->combined_l3cd_commited); \ \ if (NM_MORE_ASSERTS > 5) { \ + /* metric-any must be resolved before adding the object. Otherwise, + * their real metric is not known, and they cannot be compared to objects + * from NMPlatform cache. */ \ + nm_assert(!NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ + NMP_OBJECT_TYPE_IP4_ROUTE, \ + NMP_OBJECT_TYPE_IP6_ROUTE) \ + || !NMP_OBJECT_CAST_IP_ROUTE(_obj_state->obj)->metric_any); \ + \ nm_assert(c_list_contains(&_self->priv.p->obj_state_lst_head, \ &_obj_state->os_lst)); \ nm_assert(_obj_state->os_plobj \ From 7a748ae556ba34a7e6c35ddde1fe06eedeb5b94b Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 28 Nov 2023 15:20:33 +0100 Subject: [PATCH 3/6] core: fix nm_netns_ip_route_ecmp_commit() to return regular single-hop route nm_netns_ip_route_ecmp_commit() returns the ECMP routes that didn't find a merge-partner. It expects NML3Cfg to configure such routes in platform. Note that these routes have a positive "weight", which was used for tracking the ECMP information in NML3Cfg and NMNetns. But in kernel, single-hop routes don't have a weight. All routes in NMPlatform will have a zero weight. While it somewhat works to call nm_platform_ip_route_add() with a single-hop route of non-zero weight, the result will be a different(!) route. This means for example that nm_platform_ip_route_sync() will search the NMPlatform cache for whether the routes exist, but in the cache single-hop routes with positive weight never exist. Previously this happened and nm_platform_ip_route_sync() would not delete those routes when it should. It's also important because NML3Cfg tracks the object states of routes that it wants to configure, vs routes in kernel (NMPlatform cache). The route in kernel has a weight of zero. The route we want to configure must also have a weight of zero. The solution is that nm_netns_ip_route_ecmp_commit() does not tell NML3Cfg to add a route, that cannot be added. Instead, it must first normalize the weight to zero, to have a "regular" single-hop route. Fixes: 5b5ce4268211 ('nm-netns: track ECMP routes') --- src/core/nm-netns.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index ad156f99ed..9cb441531b 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -910,6 +910,8 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, } if (route->n_nexthops <= 1) { + NMPObject *route_clone; + /* This is a single hop route. Return it to the caller. */ if (!*out_singlehop_routes) { /* Note that the returned array does not own a reference. This @@ -918,7 +920,28 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, *out_singlehop_routes = g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref); } - g_ptr_array_add(*out_singlehop_routes, (gpointer) nmp_object_ref(route_obj)); + + /* We have here a IPv4 single-hop route. For internal tracking purposes, + * this route has a positive "weight" (which was used to mark it as a candidate + * for ECMP merging). Now we want to return this route to NML3Cfg and add it + * as regular single-hop routes. + * + * A single-hop route in kernel always has a "weight" of zero. This route + * cannot be added as-is. Well, if we would, then the result would be + * a different(!) route (with a zero "weight"). + * + * Anticipate that and normalize the route now to be a regular single-hop + * route (with weight zero). nm_platform_ip_route_normalize() does that. + * We really want to return a regular route here, not the route with a positive + * weight that exists for internal tracking purposes. + */ + nm_assert(NMP_OBJECT_GET_TYPE(route_obj) == NMP_OBJECT_TYPE_IP4_ROUTE); + nm_assert(route_obj->ip4_route.weight > 0u); + + route_clone = nmp_object_clone(route_obj, FALSE); + nm_platform_ip_route_normalize(AF_INET, NMP_OBJECT_CAST_IP_ROUTE(route_clone)); + g_ptr_array_add(*out_singlehop_routes, route_clone); + if (changed) { _LOGT("ecmp-route: single-hop %s", nmp_object_to_string(route_obj, From 02efd9639617d690829dae8d1dadacb75b7886b4 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 18 Oct 2023 23:29:55 +0200 Subject: [PATCH 4/6] netns: schedule a commit when a single-hop route is merged When a single-hop route is merged with another single-hop route from a different interface creating a new ECMP route, NetworkManager needs to schedule a commit on the l3cfg that is managing the first single-hop route. Otherwise, the single-hop will continue to be there until a new commit is scheduled. Fixes: d9d33e2accdf ('netns: fix configuring onlink routes for ECMP routes') --- src/core/nm-netns.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index 9cb441531b..ed33d33676 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -905,8 +905,14 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, if (obj_del) { if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->n_nexthops > 1) nm_platform_object_delete(priv->platform, obj_del); - else if (track_obj->l3cfg != l3cfg) - nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO); + else if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->ifindex != nm_l3cfg_get_ifindex(l3cfg)) { + /* A single-hop route from a different interface was merged + * into a ECMP route. Now, it is time to notify the l3cfg that + * is managing that single-hop route to remove it. */ + nm_l3cfg_commit_on_idle_schedule( + nm_netns_l3cfg_get(self, NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->ifindex), + NM_L3_CFG_COMMIT_TYPE_UPDATE); + } } if (route->n_nexthops <= 1) { From ffe22c498f03f4a9370337cdeccf33b40a60e87b Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 24 Nov 2023 14:46:27 +0100 Subject: [PATCH 5/6] l3cfg: factor out _obj_state_data_new() for creating object state Will be used next. --- src/core/nm-l3cfg.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c index 48f4a84a8a..1dc9b766ed 100644 --- a/src/core/nm-l3cfg.c +++ b/src/core/nm-l3cfg.c @@ -1008,6 +1008,21 @@ out: nm_assert_obj_state(self, obj_state); } +static void +_obj_states_track_new(NML3Cfg *self, const NMPObject *obj) +{ + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + ObjStateData *obj_state; + + obj_state = _obj_state_data_new( + obj, + nm_platform_lookup_obj(self->priv.platform, NMP_CACHE_ID_TYPE_OBJECT_TYPE, obj)); + c_list_link_tail(&self->priv.p->obj_state_lst_head, &obj_state->os_lst); + g_hash_table_add(self->priv.p->obj_state_hash, obj_state); + _LOGD("obj-state: track: %s", _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + nm_assert_obj_state(self, obj_state); +} + static void _obj_states_update_all(NML3Cfg *self) { @@ -1052,16 +1067,7 @@ _obj_states_update_all(NML3Cfg *self) obj_state = g_hash_table_lookup(self->priv.p->obj_state_hash, &obj); if (!obj_state) { - obj_state = - _obj_state_data_new(obj, - nm_platform_lookup_obj(self->priv.platform, - NMP_CACHE_ID_TYPE_OBJECT_TYPE, - obj)); - c_list_link_tail(&self->priv.p->obj_state_lst_head, &obj_state->os_lst); - g_hash_table_add(self->priv.p->obj_state_hash, obj_state); - _LOGD("obj-state: track: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - nm_assert_obj_state(self, obj_state); + _obj_states_track_new(self, obj); continue; } From 5d26452da5cb151723561cb8db5466bb8df4dec5 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 24 Nov 2023 14:46:27 +0100 Subject: [PATCH 6/6] l3cfg: handle dynamic added routes tracking and deletion Dynamic added routes i.e ECMP single-hop routes, are not managed by l3cd as the other ones. Therefore, they need to be tracked properly and marked as dirty when they are. Without this, the state of those ECMP single hop routes is not properly tracked, and they are for example not removed by NML3Cfg when they should. Usually, routes to be configured originate from the combined NML3ConfigData and are resolved early during a commit. For example, _obj_states_update_all() creates for each such route an obj_state_hash entry. Let's call those static, or "non-dynamic". Later, we can receive additional routes. We get them back from NMNetns during nm_netns_ip_route_ecmp_commit() (_commit_collect_routes()). Let's call them "dynamic". For those routes, we also must track an obj-state. Now we have two reasons why an obj-state is tracked. The "non-dynamic" and the "dynamic" one. Add two flags "os_dynamic" and "os_non_dynamic" to the ObjStateData and consider the flags at the necessary places. Co-authored-by: Fernando Fernandez Mancera --- src/core/nm-l3cfg.c | 363 ++++++++++++++++++++++++++++---------------- 1 file changed, 235 insertions(+), 128 deletions(-) diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c index 1dc9b766ed..eb13968196 100644 --- a/src/core/nm-l3cfg.c +++ b/src/core/nm-l3cfg.c @@ -165,7 +165,18 @@ typedef struct { /* This flag is only used temporarily to do a bulk update and * clear all the ones that are no longer in used. */ - bool os_dirty : 1; + bool os_non_dynamic_dirty : 1; + + /* Indicates that we have a object in combined_l3cd_commited that keeps the + * object state alive. */ + bool os_non_dynamic : 1; + + /* Indicates that there is a dynamic route from _commit_collect_routes(), that keeps the + * object state alive. */ + bool os_dynamic : 1; + + /* Indicates that this dynamic obj-state is marked as dirty. */ + bool os_dynamic_dirty : 1; } ObjStateData; G_STATIC_ASSERT(G_STRUCT_OFFSET(ObjStateData, obj) == 0); @@ -772,59 +783,70 @@ _nm_n_acd_data_probe_new(NML3Cfg *self, in_addr_t addr, guint32 timeout_msec, gp /*****************************************************************************/ -#define nm_assert_obj_state(self, obj_state) \ - G_STMT_START \ - { \ - if (NM_MORE_ASSERTS > 0) { \ - const NML3Cfg *_self = (self); \ - const ObjStateData *_obj_state = (obj_state); \ - \ - nm_assert(_obj_state); \ - nm_assert(NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ - NMP_OBJECT_TYPE_IP4_ADDRESS, \ - NMP_OBJECT_TYPE_IP6_ADDRESS, \ - NMP_OBJECT_TYPE_IP4_ROUTE, \ - NMP_OBJECT_TYPE_IP6_ROUTE)); \ - nm_assert(!_obj_state->os_plobj || _obj_state->os_was_in_platform); \ - nm_assert(_obj_state->os_failedobj_expiry_msec != 0 \ - || _obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || !_obj_state->os_plobj); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 \ - || c_list_is_empty(&_obj_state->os_zombie_lst)); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || _obj_state->obj); \ - if (_self) { \ - if (c_list_is_empty(&_obj_state->os_zombie_lst)) { \ - nm_assert(_self->priv.p->combined_l3cd_commited); \ - \ - if (NM_MORE_ASSERTS > 5) { \ +#define nm_assert_obj_state(self, obj_state) \ + G_STMT_START \ + { \ + if (NM_MORE_ASSERTS > 0) { \ + const NML3Cfg *_self = (self); \ + const ObjStateData *_obj_state = (obj_state); \ + \ + nm_assert(_obj_state); \ + nm_assert(NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ + NMP_OBJECT_TYPE_IP4_ADDRESS, \ + NMP_OBJECT_TYPE_IP6_ADDRESS, \ + NMP_OBJECT_TYPE_IP4_ROUTE, \ + NMP_OBJECT_TYPE_IP6_ROUTE)); \ + nm_assert(!_obj_state->os_plobj || _obj_state->os_was_in_platform); \ + nm_assert(_obj_state->os_failedobj_expiry_msec != 0 \ + || _obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || !_obj_state->os_plobj); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 \ + || c_list_is_empty(&_obj_state->os_zombie_lst)); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || _obj_state->obj); \ + nm_assert(!_obj_state->os_plobj \ + || NMP_OBJECT_GET_TYPE(_obj_state->obj) \ + == NMP_OBJECT_GET_TYPE(_obj_state->os_plobj)); \ + if (_self) { \ + if (c_list_is_empty(&_obj_state->os_zombie_lst)) { \ + nm_assert(_self->priv.p->combined_l3cd_commited); \ + \ + if (NM_MORE_ASSERTS > 5) { \ /* metric-any must be resolved before adding the object. Otherwise, * their real metric is not known, and they cannot be compared to objects - * from NMPlatform cache. */ \ - nm_assert(!NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ - NMP_OBJECT_TYPE_IP4_ROUTE, \ - NMP_OBJECT_TYPE_IP6_ROUTE) \ - || !NMP_OBJECT_CAST_IP_ROUTE(_obj_state->obj)->metric_any); \ - \ - nm_assert(c_list_contains(&_self->priv.p->obj_state_lst_head, \ - &_obj_state->os_lst)); \ - nm_assert(_obj_state->os_plobj \ - == nm_platform_lookup_obj(_self->priv.platform, \ - NMP_CACHE_ID_TYPE_OBJECT_TYPE, \ - _obj_state->obj)); \ - nm_assert( \ - c_list_is_empty(&obj_state->os_zombie_lst) \ - ? (_obj_state->obj \ - == nm_dedup_multi_entry_get_obj(nm_l3_config_data_lookup_obj( \ - _self->priv.p->combined_l3cd_commited, \ - _obj_state->obj))) \ - : (!nm_l3_config_data_lookup_obj( \ - _self->priv.p->combined_l3cd_commited, \ - _obj_state->obj))); \ - } \ - } \ - } \ - } \ - } \ + * from NMPlatform cache. */ \ + nm_assert(!NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ + NMP_OBJECT_TYPE_IP4_ROUTE, \ + NMP_OBJECT_TYPE_IP6_ROUTE) \ + || !NMP_OBJECT_CAST_IP_ROUTE(_obj_state->obj)->metric_any); \ + \ + nm_assert(c_list_contains(&_self->priv.p->obj_state_lst_head, \ + &_obj_state->os_lst)); \ + nm_assert(_obj_state->os_plobj \ + == nm_platform_lookup_obj(_self->priv.platform, \ + NMP_CACHE_ID_TYPE_OBJECT_TYPE, \ + _obj_state->obj)); \ + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { \ + nm_assert(!obj_state->os_non_dynamic); \ + nm_assert(!obj_state->os_non_dynamic_dirty); \ + nm_assert(!obj_state->os_dynamic); \ + nm_assert(!obj_state->os_dynamic_dirty); \ + } \ + if (obj_state->os_non_dynamic) { \ + nm_assert( \ + _obj_state->obj \ + == nm_dedup_multi_entry_get_obj(nm_l3_config_data_lookup_obj( \ + _self->priv.p->combined_l3cd_commited, \ + _obj_state->obj))); \ + } else { \ + nm_assert(!nm_l3_config_data_lookup_obj( \ + _self->priv.p->combined_l3cd_commited, \ + _obj_state->obj)); \ + } \ + } \ + } \ + } \ + } \ + } \ G_STMT_END static ObjStateData * @@ -838,7 +860,10 @@ _obj_state_data_new(const NMPObject *obj, const NMPObject *plobj) .os_plobj = nmp_object_ref(plobj), .os_was_in_platform = !!plobj, .os_nm_configured = FALSE, - .os_dirty = FALSE, + .os_non_dynamic_dirty = FALSE, + .os_non_dynamic = FALSE, + .os_dynamic = FALSE, + .os_dynamic_dirty = FALSE, .os_failedobj_expiry_msec = 0, .os_failedobj_prioq_idx = NM_PRIOQ_IDX_NULL, .os_zombie_lst = C_LIST_INIT(obj_state->os_zombie_lst), @@ -907,34 +932,6 @@ _obj_state_data_to_string(const ObjStateData *obj_state, char *buf, gsize buf_si return buf0; } -static gboolean -_obj_state_data_update(ObjStateData *obj_state, const NMPObject *obj) -{ - gboolean changed = FALSE; - - nm_assert_obj_state(NULL, obj_state); - nm_assert(obj); - nm_assert(nmp_object_id_equal(obj_state->obj, obj)); - - obj_state->os_dirty = FALSE; - - if (obj_state->obj != obj) { - nm_auto_nmpobj const NMPObject *obj_old = NULL; - - if (!nmp_object_equal(obj_state->obj, obj)) - changed = TRUE; - obj_old = g_steal_pointer(&obj_state->obj); - obj_state->obj = nmp_object_ref(obj); - } - - if (!c_list_is_empty(&obj_state->os_zombie_lst)) { - c_list_unlink(&obj_state->os_zombie_lst); - changed = TRUE; - } - - return changed; -} - /*****************************************************************************/ static void @@ -1009,7 +1006,7 @@ out: } static void -_obj_states_track_new(NML3Cfg *self, const NMPObject *obj) +_obj_states_track_new(NML3Cfg *self, const NMPObject *obj, gboolean dynamic) { char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; ObjStateData *obj_state; @@ -1017,12 +1014,142 @@ _obj_states_track_new(NML3Cfg *self, const NMPObject *obj) obj_state = _obj_state_data_new( obj, nm_platform_lookup_obj(self->priv.platform, NMP_CACHE_ID_TYPE_OBJECT_TYPE, obj)); + obj_state->os_dynamic = dynamic; + obj_state->os_non_dynamic = !dynamic; c_list_link_tail(&self->priv.p->obj_state_lst_head, &obj_state->os_lst); g_hash_table_add(self->priv.p->obj_state_hash, obj_state); - _LOGD("obj-state: track: %s", _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + _LOGD("obj-state: track%s: %s", + dynamic ? " (dynamic)" : "", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); nm_assert_obj_state(self, obj_state); } +static gboolean +_obj_states_track_update(NML3Cfg *self, + ObjStateData *obj_state, + const NMPObject *obj, + gboolean dynamic) +{ + gboolean changed = FALSE; + + nm_assert_obj_state(NULL, obj_state); + nm_assert(obj); + nm_assert(nmp_object_id_equal(obj_state->obj, obj)); + + if (dynamic) { + if (!obj_state->os_dynamic) + changed = TRUE; + obj_state->os_dynamic_dirty = FALSE; + obj_state->os_dynamic = TRUE; + } else { + if (!obj_state->os_non_dynamic) + changed = TRUE; + obj_state->os_non_dynamic_dirty = FALSE; + obj_state->os_non_dynamic = TRUE; + } + + if (obj_state->obj != obj && (!dynamic || !obj_state->os_non_dynamic)) { + nm_auto_nmpobj const NMPObject *obj_old = NULL; + + if (!nmp_object_equal(obj_state->obj, obj)) + changed = TRUE; + obj_old = g_steal_pointer(&obj_state->obj); + obj_state->obj = nmp_object_ref(obj); + } + + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + c_list_unlink(&obj_state->os_zombie_lst); + changed = TRUE; + } + + if (changed) { + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + + _LOGD("obj-state: update: %s (static: %d, dynamic: %d)", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf)), + !!obj_state->os_non_dynamic, + !!obj_state->os_dynamic); + } + + nm_assert_obj_state(self, obj_state); + return changed; +} + +static gboolean +_obj_states_track_mark_dirty(NML3Cfg *self, gboolean dynamic) +{ + ObjStateData *obj_state; + gboolean any_dirty = FALSE; + + c_list_for_each_entry (obj_state, &self->priv.p->obj_state_lst_head, os_lst) { + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + /* we can ignore zombies. */ + continue; + } + if (dynamic) { + if (!obj_state->os_dynamic) + continue; + obj_state->os_dynamic_dirty = TRUE; + } else { + if (!obj_state->os_non_dynamic) + continue; + obj_state->os_non_dynamic_dirty = TRUE; + } + any_dirty = TRUE; + } + + return any_dirty; +} + +static void +_obj_states_track_prune_dirty(NML3Cfg *self, gboolean also_dynamic) +{ + GHashTableIter h_iter; + ObjStateData *obj_state; + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + + g_hash_table_iter_init(&h_iter, self->priv.p->obj_state_hash); + while (g_hash_table_iter_next(&h_iter, (gpointer *) &obj_state, NULL)) { + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + /* The object is half-dead already and only kept for cleanup. But + * it does not need to be untracked. */ + continue; + } + + /* Resolve the "dirty" flags. */ + if (obj_state->os_non_dynamic_dirty) { + obj_state->os_non_dynamic = FALSE; + obj_state->os_non_dynamic_dirty = FALSE; + } + if (also_dynamic) { + if (obj_state->os_dynamic_dirty) { + obj_state->os_dynamic = FALSE; + obj_state->os_dynamic_dirty = FALSE; + } + } + + if (obj_state->os_non_dynamic || obj_state->os_dynamic) { + /* This obj-state is still alive. Keep it. */ + continue; + } + + if (obj_state->os_plobj && obj_state->os_nm_configured) { + nm_assert(obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); + c_list_link_tail(&self->priv.p->obj_state_zombie_lst_head, &obj_state->os_zombie_lst); + obj_state->os_zombie_count = ZOMBIE_COUNT_START; + _LOGD("obj-state: now zombie: %s", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + continue; + } + + _LOGD("obj-state: untrack: %s", _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + nm_prioq_remove(&self->priv.p->failedobj_prioq, + obj_state, + &obj_state->os_failedobj_prioq_idx); + g_hash_table_iter_remove(&h_iter); + } +} + static void _obj_states_update_all(NML3Cfg *self) { @@ -1032,21 +1159,13 @@ _obj_states_update_all(NML3Cfg *self) NMP_OBJECT_TYPE_IP4_ROUTE, NMP_OBJECT_TYPE_IP6_ROUTE, }; - char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; ObjStateData *obj_state; int i; gboolean any_dirty = FALSE; nm_assert(NM_IS_L3CFG(self)); - c_list_for_each_entry (obj_state, &self->priv.p->obj_state_lst_head, os_lst) { - if (!c_list_is_empty(&obj_state->os_zombie_lst)) { - /* we can ignore zombies. */ - continue; - } - any_dirty = TRUE; - obj_state->os_dirty = TRUE; - } + any_dirty = _obj_states_track_mark_dirty(self, FALSE); for (i = 0; i < (int) G_N_ELEMENTS(obj_types); i++) { const NMPObjectType obj_type = obj_types[i]; @@ -1064,50 +1183,24 @@ _obj_states_update_all(NML3Cfg *self) /* this is a nodev route. We don't track an obj-state for this. */ continue; } - + if (obj_type == NMP_OBJECT_TYPE_IP4_ROUTE + && NMP_OBJECT_CAST_IP4_ROUTE(obj)->weight > 0) { + /* this route weight is bigger than 0, that means we don't know + * which kind of route this will be. It can only be determined during commit. */ + continue; + } obj_state = g_hash_table_lookup(self->priv.p->obj_state_hash, &obj); if (!obj_state) { - _obj_states_track_new(self, obj); + _obj_states_track_new(self, obj, FALSE); continue; } - if (_obj_state_data_update(obj_state, obj)) { - _LOGD("obj-state: update: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - } - - nm_assert_obj_state(self, obj_state); + _obj_states_track_update(self, obj_state, obj, FALSE); } } - if (any_dirty) { - GHashTableIter h_iter; - - g_hash_table_iter_init(&h_iter, self->priv.p->obj_state_hash); - while (g_hash_table_iter_next(&h_iter, (gpointer *) &obj_state, NULL)) { - if (!c_list_is_empty(&obj_state->os_zombie_lst)) - continue; - if (!obj_state->os_dirty) - continue; - - if (obj_state->os_plobj && obj_state->os_nm_configured) { - nm_assert(obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); - c_list_link_tail(&self->priv.p->obj_state_zombie_lst_head, - &obj_state->os_zombie_lst); - obj_state->os_zombie_count = ZOMBIE_COUNT_START; - _LOGD("obj-state: now zombie: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - continue; - } - - _LOGD("obj-state: untrack: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - nm_prioq_remove(&self->priv.p->failedobj_prioq, - obj_state, - &obj_state->os_failedobj_prioq_idx); - g_hash_table_iter_remove(&h_iter); - } - } + if (any_dirty) + _obj_states_track_prune_dirty(self, FALSE); } typedef struct { @@ -1279,6 +1372,13 @@ loop_done: if (singlehop_routes) { for (i = 0; i < singlehop_routes->len; i++) { const NMPObject *obj = singlehop_routes->pdata[i]; + ObjStateData *obj_state; + + obj_state = g_hash_table_lookup(self->priv.p->obj_state_hash, &obj); + if (!obj_state) + _obj_states_track_new(self, obj, TRUE); + else + _obj_states_track_update(self, obj_state, obj, TRUE); if (!_obj_states_sync_filter(self, obj, commit_type)) continue; @@ -4855,6 +4955,7 @@ _l3_commit_one(NML3Cfg *self, NMIPRouteTableSyncMode route_table_sync; char sbuf_commit_type[50]; guint i; + gboolean any_dirty = FALSE; nm_assert(NM_IS_L3CFG(self)); nm_assert(NM_IN_SET(commit_type, @@ -4867,6 +4968,9 @@ _l3_commit_one(NML3Cfg *self, nm_utils_addr_family_to_char(addr_family), _l3_cfg_commit_type_to_string(commit_type, sbuf_commit_type, sizeof(sbuf_commit_type))); + if (IS_IPv4) + any_dirty = _obj_states_track_mark_dirty(self, TRUE); + addresses = _commit_collect_addresses(self, addr_family, commit_type); _commit_collect_routes(self, @@ -4891,6 +4995,9 @@ _l3_commit_one(NML3Cfg *self, if (route_table_sync == NM_IP_ROUTE_TABLE_SYNC_MODE_NONE) route_table_sync = NM_IP_ROUTE_TABLE_SYNC_MODE_MAIN; + if (any_dirty) + _obj_states_track_prune_dirty(self, TRUE); + if (commit_type == NM_L3_CFG_COMMIT_TYPE_REAPPLY) { gs_unref_array GArray *ipv6_temp_addrs_keep = NULL;