diff --git a/src/core/nm-l3cfg.c b/src/core/nm-l3cfg.c index 752febfaaf..eb13968196 100644 --- a/src/core/nm-l3cfg.c +++ b/src/core/nm-l3cfg.c @@ -165,7 +165,18 @@ typedef struct { /* This flag is only used temporarily to do a bulk update and * clear all the ones that are no longer in used. */ - bool os_dirty : 1; + bool os_non_dynamic_dirty : 1; + + /* Indicates that we have a object in combined_l3cd_commited that keeps the + * object state alive. */ + bool os_non_dynamic : 1; + + /* Indicates that there is a dynamic route from _commit_collect_routes(), that keeps the + * object state alive. */ + bool os_dynamic : 1; + + /* Indicates that this dynamic obj-state is marked as dirty. */ + bool os_dynamic_dirty : 1; } ObjStateData; G_STATIC_ASSERT(G_STRUCT_OFFSET(ObjStateData, obj) == 0); @@ -772,51 +783,70 @@ _nm_n_acd_data_probe_new(NML3Cfg *self, in_addr_t addr, guint32 timeout_msec, gp /*****************************************************************************/ -#define nm_assert_obj_state(self, obj_state) \ - G_STMT_START \ - { \ - if (NM_MORE_ASSERTS > 0) { \ - const NML3Cfg *_self = (self); \ - const ObjStateData *_obj_state = (obj_state); \ - \ - nm_assert(_obj_state); \ - nm_assert(NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ - NMP_OBJECT_TYPE_IP4_ADDRESS, \ - NMP_OBJECT_TYPE_IP6_ADDRESS, \ - NMP_OBJECT_TYPE_IP4_ROUTE, \ - NMP_OBJECT_TYPE_IP6_ROUTE)); \ - nm_assert(!_obj_state->os_plobj || _obj_state->os_was_in_platform); \ - nm_assert(_obj_state->os_failedobj_expiry_msec != 0 \ - || _obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || !_obj_state->os_plobj); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 \ - || c_list_is_empty(&_obj_state->os_zombie_lst)); \ - nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || _obj_state->obj); \ - if (_self) { \ - if (c_list_is_empty(&_obj_state->os_zombie_lst)) { \ - nm_assert(_self->priv.p->combined_l3cd_commited); \ - \ - if (NM_MORE_ASSERTS > 5) { \ - nm_assert(c_list_contains(&_self->priv.p->obj_state_lst_head, \ - &_obj_state->os_lst)); \ - nm_assert(_obj_state->os_plobj \ - == nm_platform_lookup_obj(_self->priv.platform, \ - NMP_CACHE_ID_TYPE_OBJECT_TYPE, \ - _obj_state->obj)); \ - nm_assert( \ - c_list_is_empty(&obj_state->os_zombie_lst) \ - ? (_obj_state->obj \ - == nm_dedup_multi_entry_get_obj(nm_l3_config_data_lookup_obj( \ - _self->priv.p->combined_l3cd_commited, \ - _obj_state->obj))) \ - : (!nm_l3_config_data_lookup_obj( \ - _self->priv.p->combined_l3cd_commited, \ - _obj_state->obj))); \ - } \ - } \ - } \ - } \ - } \ +#define nm_assert_obj_state(self, obj_state) \ + G_STMT_START \ + { \ + if (NM_MORE_ASSERTS > 0) { \ + const NML3Cfg *_self = (self); \ + const ObjStateData *_obj_state = (obj_state); \ + \ + nm_assert(_obj_state); \ + nm_assert(NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ + NMP_OBJECT_TYPE_IP4_ADDRESS, \ + NMP_OBJECT_TYPE_IP6_ADDRESS, \ + NMP_OBJECT_TYPE_IP4_ROUTE, \ + NMP_OBJECT_TYPE_IP6_ROUTE)); \ + nm_assert(!_obj_state->os_plobj || _obj_state->os_was_in_platform); \ + nm_assert(_obj_state->os_failedobj_expiry_msec != 0 \ + || _obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || !_obj_state->os_plobj); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 \ + || c_list_is_empty(&_obj_state->os_zombie_lst)); \ + nm_assert(_obj_state->os_failedobj_expiry_msec == 0 || _obj_state->obj); \ + nm_assert(!_obj_state->os_plobj \ + || NMP_OBJECT_GET_TYPE(_obj_state->obj) \ + == NMP_OBJECT_GET_TYPE(_obj_state->os_plobj)); \ + if (_self) { \ + if (c_list_is_empty(&_obj_state->os_zombie_lst)) { \ + nm_assert(_self->priv.p->combined_l3cd_commited); \ + \ + if (NM_MORE_ASSERTS > 5) { \ + /* metric-any must be resolved before adding the object. Otherwise, + * their real metric is not known, and they cannot be compared to objects + * from NMPlatform cache. */ \ + nm_assert(!NM_IN_SET(NMP_OBJECT_GET_TYPE(_obj_state->obj), \ + NMP_OBJECT_TYPE_IP4_ROUTE, \ + NMP_OBJECT_TYPE_IP6_ROUTE) \ + || !NMP_OBJECT_CAST_IP_ROUTE(_obj_state->obj)->metric_any); \ + \ + nm_assert(c_list_contains(&_self->priv.p->obj_state_lst_head, \ + &_obj_state->os_lst)); \ + nm_assert(_obj_state->os_plobj \ + == nm_platform_lookup_obj(_self->priv.platform, \ + NMP_CACHE_ID_TYPE_OBJECT_TYPE, \ + _obj_state->obj)); \ + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { \ + nm_assert(!obj_state->os_non_dynamic); \ + nm_assert(!obj_state->os_non_dynamic_dirty); \ + nm_assert(!obj_state->os_dynamic); \ + nm_assert(!obj_state->os_dynamic_dirty); \ + } \ + if (obj_state->os_non_dynamic) { \ + nm_assert( \ + _obj_state->obj \ + == nm_dedup_multi_entry_get_obj(nm_l3_config_data_lookup_obj( \ + _self->priv.p->combined_l3cd_commited, \ + _obj_state->obj))); \ + } else { \ + nm_assert(!nm_l3_config_data_lookup_obj( \ + _self->priv.p->combined_l3cd_commited, \ + _obj_state->obj)); \ + } \ + } \ + } \ + } \ + } \ + } \ G_STMT_END static ObjStateData * @@ -830,7 +860,10 @@ _obj_state_data_new(const NMPObject *obj, const NMPObject *plobj) .os_plobj = nmp_object_ref(plobj), .os_was_in_platform = !!plobj, .os_nm_configured = FALSE, - .os_dirty = FALSE, + .os_non_dynamic_dirty = FALSE, + .os_non_dynamic = FALSE, + .os_dynamic = FALSE, + .os_dynamic_dirty = FALSE, .os_failedobj_expiry_msec = 0, .os_failedobj_prioq_idx = NM_PRIOQ_IDX_NULL, .os_zombie_lst = C_LIST_INIT(obj_state->os_zombie_lst), @@ -899,34 +932,6 @@ _obj_state_data_to_string(const ObjStateData *obj_state, char *buf, gsize buf_si return buf0; } -static gboolean -_obj_state_data_update(ObjStateData *obj_state, const NMPObject *obj) -{ - gboolean changed = FALSE; - - nm_assert_obj_state(NULL, obj_state); - nm_assert(obj); - nm_assert(nmp_object_id_equal(obj_state->obj, obj)); - - obj_state->os_dirty = FALSE; - - if (obj_state->obj != obj) { - nm_auto_nmpobj const NMPObject *obj_old = NULL; - - if (!nmp_object_equal(obj_state->obj, obj)) - changed = TRUE; - obj_old = g_steal_pointer(&obj_state->obj); - obj_state->obj = nmp_object_ref(obj); - } - - if (!c_list_is_empty(&obj_state->os_zombie_lst)) { - c_list_unlink(&obj_state->os_zombie_lst); - changed = TRUE; - } - - return changed; -} - /*****************************************************************************/ static void @@ -1000,6 +1005,151 @@ out: nm_assert_obj_state(self, obj_state); } +static void +_obj_states_track_new(NML3Cfg *self, const NMPObject *obj, gboolean dynamic) +{ + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + ObjStateData *obj_state; + + obj_state = _obj_state_data_new( + obj, + nm_platform_lookup_obj(self->priv.platform, NMP_CACHE_ID_TYPE_OBJECT_TYPE, obj)); + obj_state->os_dynamic = dynamic; + obj_state->os_non_dynamic = !dynamic; + c_list_link_tail(&self->priv.p->obj_state_lst_head, &obj_state->os_lst); + g_hash_table_add(self->priv.p->obj_state_hash, obj_state); + _LOGD("obj-state: track%s: %s", + dynamic ? " (dynamic)" : "", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + nm_assert_obj_state(self, obj_state); +} + +static gboolean +_obj_states_track_update(NML3Cfg *self, + ObjStateData *obj_state, + const NMPObject *obj, + gboolean dynamic) +{ + gboolean changed = FALSE; + + nm_assert_obj_state(NULL, obj_state); + nm_assert(obj); + nm_assert(nmp_object_id_equal(obj_state->obj, obj)); + + if (dynamic) { + if (!obj_state->os_dynamic) + changed = TRUE; + obj_state->os_dynamic_dirty = FALSE; + obj_state->os_dynamic = TRUE; + } else { + if (!obj_state->os_non_dynamic) + changed = TRUE; + obj_state->os_non_dynamic_dirty = FALSE; + obj_state->os_non_dynamic = TRUE; + } + + if (obj_state->obj != obj && (!dynamic || !obj_state->os_non_dynamic)) { + nm_auto_nmpobj const NMPObject *obj_old = NULL; + + if (!nmp_object_equal(obj_state->obj, obj)) + changed = TRUE; + obj_old = g_steal_pointer(&obj_state->obj); + obj_state->obj = nmp_object_ref(obj); + } + + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + c_list_unlink(&obj_state->os_zombie_lst); + changed = TRUE; + } + + if (changed) { + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + + _LOGD("obj-state: update: %s (static: %d, dynamic: %d)", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf)), + !!obj_state->os_non_dynamic, + !!obj_state->os_dynamic); + } + + nm_assert_obj_state(self, obj_state); + return changed; +} + +static gboolean +_obj_states_track_mark_dirty(NML3Cfg *self, gboolean dynamic) +{ + ObjStateData *obj_state; + gboolean any_dirty = FALSE; + + c_list_for_each_entry (obj_state, &self->priv.p->obj_state_lst_head, os_lst) { + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + /* we can ignore zombies. */ + continue; + } + if (dynamic) { + if (!obj_state->os_dynamic) + continue; + obj_state->os_dynamic_dirty = TRUE; + } else { + if (!obj_state->os_non_dynamic) + continue; + obj_state->os_non_dynamic_dirty = TRUE; + } + any_dirty = TRUE; + } + + return any_dirty; +} + +static void +_obj_states_track_prune_dirty(NML3Cfg *self, gboolean also_dynamic) +{ + GHashTableIter h_iter; + ObjStateData *obj_state; + char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; + + g_hash_table_iter_init(&h_iter, self->priv.p->obj_state_hash); + while (g_hash_table_iter_next(&h_iter, (gpointer *) &obj_state, NULL)) { + if (!c_list_is_empty(&obj_state->os_zombie_lst)) { + /* The object is half-dead already and only kept for cleanup. But + * it does not need to be untracked. */ + continue; + } + + /* Resolve the "dirty" flags. */ + if (obj_state->os_non_dynamic_dirty) { + obj_state->os_non_dynamic = FALSE; + obj_state->os_non_dynamic_dirty = FALSE; + } + if (also_dynamic) { + if (obj_state->os_dynamic_dirty) { + obj_state->os_dynamic = FALSE; + obj_state->os_dynamic_dirty = FALSE; + } + } + + if (obj_state->os_non_dynamic || obj_state->os_dynamic) { + /* This obj-state is still alive. Keep it. */ + continue; + } + + if (obj_state->os_plobj && obj_state->os_nm_configured) { + nm_assert(obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); + c_list_link_tail(&self->priv.p->obj_state_zombie_lst_head, &obj_state->os_zombie_lst); + obj_state->os_zombie_count = ZOMBIE_COUNT_START; + _LOGD("obj-state: now zombie: %s", + _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + continue; + } + + _LOGD("obj-state: untrack: %s", _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); + nm_prioq_remove(&self->priv.p->failedobj_prioq, + obj_state, + &obj_state->os_failedobj_prioq_idx); + g_hash_table_iter_remove(&h_iter); + } +} + static void _obj_states_update_all(NML3Cfg *self) { @@ -1009,21 +1159,13 @@ _obj_states_update_all(NML3Cfg *self) NMP_OBJECT_TYPE_IP4_ROUTE, NMP_OBJECT_TYPE_IP6_ROUTE, }; - char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; ObjStateData *obj_state; int i; gboolean any_dirty = FALSE; nm_assert(NM_IS_L3CFG(self)); - c_list_for_each_entry (obj_state, &self->priv.p->obj_state_lst_head, os_lst) { - if (!c_list_is_empty(&obj_state->os_zombie_lst)) { - /* we can ignore zombies. */ - continue; - } - any_dirty = TRUE; - obj_state->os_dirty = TRUE; - } + any_dirty = _obj_states_track_mark_dirty(self, FALSE); for (i = 0; i < (int) G_N_ELEMENTS(obj_types); i++) { const NMPObjectType obj_type = obj_types[i]; @@ -1041,59 +1183,24 @@ _obj_states_update_all(NML3Cfg *self) /* this is a nodev route. We don't track an obj-state for this. */ continue; } - + if (obj_type == NMP_OBJECT_TYPE_IP4_ROUTE + && NMP_OBJECT_CAST_IP4_ROUTE(obj)->weight > 0) { + /* this route weight is bigger than 0, that means we don't know + * which kind of route this will be. It can only be determined during commit. */ + continue; + } obj_state = g_hash_table_lookup(self->priv.p->obj_state_hash, &obj); if (!obj_state) { - obj_state = - _obj_state_data_new(obj, - nm_platform_lookup_obj(self->priv.platform, - NMP_CACHE_ID_TYPE_OBJECT_TYPE, - obj)); - c_list_link_tail(&self->priv.p->obj_state_lst_head, &obj_state->os_lst); - g_hash_table_add(self->priv.p->obj_state_hash, obj_state); - _LOGD("obj-state: track: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - nm_assert_obj_state(self, obj_state); + _obj_states_track_new(self, obj, FALSE); continue; } - if (_obj_state_data_update(obj_state, obj)) { - _LOGD("obj-state: update: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - } - - nm_assert_obj_state(self, obj_state); + _obj_states_track_update(self, obj_state, obj, FALSE); } } - if (any_dirty) { - GHashTableIter h_iter; - - g_hash_table_iter_init(&h_iter, self->priv.p->obj_state_hash); - while (g_hash_table_iter_next(&h_iter, (gpointer *) &obj_state, NULL)) { - if (!c_list_is_empty(&obj_state->os_zombie_lst)) - continue; - if (!obj_state->os_dirty) - continue; - - if (obj_state->os_plobj && obj_state->os_nm_configured) { - nm_assert(obj_state->os_failedobj_prioq_idx == NM_PRIOQ_IDX_NULL); - c_list_link_tail(&self->priv.p->obj_state_zombie_lst_head, - &obj_state->os_zombie_lst); - obj_state->os_zombie_count = ZOMBIE_COUNT_START; - _LOGD("obj-state: now zombie: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - continue; - } - - _LOGD("obj-state: untrack: %s", - _obj_state_data_to_string(obj_state, sbuf, sizeof(sbuf))); - nm_prioq_remove(&self->priv.p->failedobj_prioq, - obj_state, - &obj_state->os_failedobj_prioq_idx); - g_hash_table_iter_remove(&h_iter); - } - } + if (any_dirty) + _obj_states_track_prune_dirty(self, FALSE); } typedef struct { @@ -1265,6 +1372,13 @@ loop_done: if (singlehop_routes) { for (i = 0; i < singlehop_routes->len; i++) { const NMPObject *obj = singlehop_routes->pdata[i]; + ObjStateData *obj_state; + + obj_state = g_hash_table_lookup(self->priv.p->obj_state_hash, &obj); + if (!obj_state) + _obj_states_track_new(self, obj, TRUE); + else + _obj_states_track_update(self, obj_state, obj, TRUE); if (!_obj_states_sync_filter(self, obj, commit_type)) continue; @@ -4841,6 +4955,7 @@ _l3_commit_one(NML3Cfg *self, NMIPRouteTableSyncMode route_table_sync; char sbuf_commit_type[50]; guint i; + gboolean any_dirty = FALSE; nm_assert(NM_IS_L3CFG(self)); nm_assert(NM_IN_SET(commit_type, @@ -4853,6 +4968,9 @@ _l3_commit_one(NML3Cfg *self, nm_utils_addr_family_to_char(addr_family), _l3_cfg_commit_type_to_string(commit_type, sbuf_commit_type, sizeof(sbuf_commit_type))); + if (IS_IPv4) + any_dirty = _obj_states_track_mark_dirty(self, TRUE); + addresses = _commit_collect_addresses(self, addr_family, commit_type); _commit_collect_routes(self, @@ -4877,6 +4995,9 @@ _l3_commit_one(NML3Cfg *self, if (route_table_sync == NM_IP_ROUTE_TABLE_SYNC_MODE_NONE) route_table_sync = NM_IP_ROUTE_TABLE_SYNC_MODE_MAIN; + if (any_dirty) + _obj_states_track_prune_dirty(self, TRUE); + if (commit_type == NM_L3_CFG_COMMIT_TYPE_REAPPLY) { gs_unref_array GArray *ipv6_temp_addrs_keep = NULL; diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index ad156f99ed..ed33d33676 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -905,11 +905,19 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, if (obj_del) { if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->n_nexthops > 1) nm_platform_object_delete(priv->platform, obj_del); - else if (track_obj->l3cfg != l3cfg) - nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO); + else if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->ifindex != nm_l3cfg_get_ifindex(l3cfg)) { + /* A single-hop route from a different interface was merged + * into a ECMP route. Now, it is time to notify the l3cfg that + * is managing that single-hop route to remove it. */ + nm_l3cfg_commit_on_idle_schedule( + nm_netns_l3cfg_get(self, NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->ifindex), + NM_L3_CFG_COMMIT_TYPE_UPDATE); + } } if (route->n_nexthops <= 1) { + NMPObject *route_clone; + /* This is a single hop route. Return it to the caller. */ if (!*out_singlehop_routes) { /* Note that the returned array does not own a reference. This @@ -918,7 +926,28 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, *out_singlehop_routes = g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref); } - g_ptr_array_add(*out_singlehop_routes, (gpointer) nmp_object_ref(route_obj)); + + /* We have here a IPv4 single-hop route. For internal tracking purposes, + * this route has a positive "weight" (which was used to mark it as a candidate + * for ECMP merging). Now we want to return this route to NML3Cfg and add it + * as regular single-hop routes. + * + * A single-hop route in kernel always has a "weight" of zero. This route + * cannot be added as-is. Well, if we would, then the result would be + * a different(!) route (with a zero "weight"). + * + * Anticipate that and normalize the route now to be a regular single-hop + * route (with weight zero). nm_platform_ip_route_normalize() does that. + * We really want to return a regular route here, not the route with a positive + * weight that exists for internal tracking purposes. + */ + nm_assert(NMP_OBJECT_GET_TYPE(route_obj) == NMP_OBJECT_TYPE_IP4_ROUTE); + nm_assert(route_obj->ip4_route.weight > 0u); + + route_clone = nmp_object_clone(route_obj, FALSE); + nm_platform_ip_route_normalize(AF_INET, NMP_OBJECT_CAST_IP_ROUTE(route_clone)); + g_ptr_array_add(*out_singlehop_routes, route_clone); + if (changed) { _LOGT("ecmp-route: single-hop %s", nmp_object_to_string(route_obj, diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index a4751113da..8b4c137eb5 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -5274,6 +5274,33 @@ _route_pref_normalize(guint8 pref) : NM_ICMPV6_ROUTER_PREF_MEDIUM; } +static guint16 +_ip4_route_weight_normalize(guint n_nexthops, guint16 weight, gboolean normalize_ecmp_weight) +{ + if (n_nexthops > 1u) { + /* This is a multihop-route. The weight is relevant. + * + * We only normalize a zero to one (because in kernel such weights + * don't exist. */ + return NM_MAX(weight, 1u); + } + if (n_nexthops == 0) { + /* This route has no next-hop (e.g. blackhole type). The weight is + * always irrelevant. Normalize to zero. */ + return 0; + } + + /* We have a IPv4 single-hop route. In kernel, the weight does not exist. + * It's always zero. + * + * For upper layers, we find it useful to track such routes with a positive + * weight. They are candidates to be merged into a multi-hop ECMP route. + * + * Depending on what the caller requests, we normalize it (or leave it + * unchanged). */ + return normalize_ecmp_weight ? 0u : weight; +} + /** * nm_platform_ip_route_normalize: * @addr_family: AF_INET or AF_INET6 @@ -5286,6 +5313,15 @@ _route_pref_normalize(guint8 pref) * Note that this function is related to NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY * in that if two routes compare semantically equal, after normalizing they also shall * compare equal with NM_PLATFORM_IP_ROUTE_CMP_TYPE_FULL. + * + * Note that a positive "weight" of IPv4 single hop routes is not meaningful in + * kernel. While we track such routes at upper layers, they don't exist in + * kernel (well, they exist, with their weight set to zero, which makes them a + * different route according to NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID. It will + * be normalized to zero too, making basically it a different route. + * + * Also, "metric_any" is normalized to FALSE. This also makes it a different route + * according to NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID. */ void nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) @@ -5299,6 +5335,33 @@ nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) route->rt_source = nmp_utils_ip_config_source_round_trip_rtprot(route->rt_source); + /* For the most part, nm_platform_ip_route_normalize() tries to normalize some fields + * as it happens when they go through kernel. + * + * In most cases, NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison performs the same + * relaxed comparison. For example, normalize() will normalize "scope_inv", and also + * the NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison will do that on-the-fly. Optimally, + * looking into a hash table gives you the same result, whether you normalize the + * needle first (or whether the entries in the hash table are normalized). + * + * Unfortunately, that's not always the case. Examples: + * + * - "metric": we have a "metric_any" field. This is used by higher layers + * to indicate that the metric is dynamically chosen (e.g. by the default + * metric of the default route). As such, as far as NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID + * is concerned, the "metric_any" and "metric" values are treated as distinguishing + * properties. But when we add a route in kernel, "metric_any" no longer exist. + * It becomes a fixed metric. Normalize will fix the metric. + * - "weight": for IPv4 single-hop routes, the weight does not exist in kernel. We however + * use the field to track ECMP information in higher layers. Consequently, + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID treats the weight as-is, while normalization + * (and adding it to kernel) will mangle it. + * + * You thus must be careful when you track NMPlatformIP4Route that make use of such + * higher-level features, which cannot be represented in kernel or the NMPlatform + * cache. + */ + switch (addr_family) { case AF_INET: r4 = (NMPlatformIP4Route *) route; @@ -5306,6 +5369,8 @@ nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route) route->metric_any = FALSE; r4->network = nm_ip4_addr_clear_host_address(r4->network, r4->plen); r4->scope_inv = _ip_route_scope_inv_get_normalized(r4); + r4->n_nexthops = nm_platform_ip4_route_get_n_nexthops(r4); + r4->weight = _ip4_route_weight_normalize(r4->n_nexthops, r4->weight, TRUE); break; case AF_INET6: r6 = (NMPlatformIP6Route *) route; @@ -7039,7 +7104,7 @@ nm_platform_ip4_route_to_string_full(const NMPlatformIP4Route *route, route->plen, n_nexthops <= 1 && s_gateway[0] ? " via " : "", n_nexthops <= 1 ? s_gateway : "", - NM_PRINT_FMT_QUOTED2(n_nexthops <= 1 && route->weight != 0, + NM_PRINT_FMT_QUOTED2(n_nexthops <= 1 && route->weight != 0u, " weight ", nm_sprintf_buf(weight_str, "%u", route->weight), ""), @@ -8596,12 +8661,11 @@ nm_platform_ip4_rt_nexthop_hash_update(const NMPlatformIP4RtNextHop *obj, gboolean for_id, NMHashState *h) { - guint8 w; + guint16 w; nm_assert(obj); w = for_id ? NM_MAX(obj->weight, 1u) : obj->weight; - nm_hash_update_vals(h, obj->ifindex, obj->gateway, w); } @@ -8610,6 +8674,8 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, NMPlatformIPRouteCmpType cmp_type, NMHashState *h) { + guint n_nexthops; + switch (cmp_type) { case NM_PLATFORM_IP_ROUTE_CMP_TYPE_WEAK_ID: case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID: @@ -8647,15 +8713,17 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, obj->lock_mtu, obj->lock_mss)); if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) { + n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj); nm_hash_update_vals(h, obj->ifindex, - nm_platform_ip4_route_get_n_nexthops(obj), + n_nexthops, obj->gateway, - (guint8) NM_MAX(obj->weight, 1u)); + _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE)); } } break; case NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY: + n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj); nm_hash_update_vals( h, obj->type_coerced, @@ -8664,9 +8732,9 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, nm_ip4_addr_clear_host_address(obj->network, obj->plen), obj->plen, obj->metric, - nm_platform_ip4_route_get_n_nexthops(obj), + n_nexthops, obj->gateway, - (guint8) NM_MAX(obj->weight, 1u), + _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE), nmp_utils_ip_config_source_round_trip_rtprot(obj->rt_source), _ip_route_scope_inv_get_normalized(obj), obj->tos, @@ -8732,14 +8800,9 @@ nm_platform_ip4_rt_nexthop_cmp(const NMPlatformIP4RtNextHop *a, const NMPlatformIP4RtNextHop *b, gboolean for_id) { - guint8 w_a; - guint8 w_b; + guint16 w_a; + guint16 w_b; - /* Note that weight zero is not valid (in kernel). We thus treat - * weight zero usually the same as 1. - * - * Not here for cmp/hash_update functions. These functions check for the exact - * bit-pattern, and not the it means at other places. */ NM_CMP_SELF(a, b); NM_CMP_FIELD(a, b, ifindex); NM_CMP_FIELD(a, b, gateway); @@ -8756,6 +8819,8 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, const NMPlatformIP4Route *b, NMPlatformIPRouteCmpType cmp_type) { + guint n_nexthops; + NM_CMP_SELF(a, b); switch (cmp_type) { case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID: @@ -8802,9 +8867,10 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) { NM_CMP_FIELD(a, b, ifindex); NM_CMP_FIELD(a, b, gateway); - NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u)); - NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a), - nm_platform_ip4_route_get_n_nexthops(b)); + n_nexthops = nm_platform_ip4_route_get_n_nexthops(a); + NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b)); + NM_CMP_DIRECT(_ip4_route_weight_normalize(n_nexthops, a->weight, FALSE), + _ip4_route_weight_normalize(n_nexthops, b->weight, FALSE)); } } break; @@ -8825,16 +8891,16 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_FIELD(a, b, plen); NM_CMP_FIELD_UNSAFE(a, b, metric_any); NM_CMP_FIELD(a, b, metric); - if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { - NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a), - nm_platform_ip4_route_get_n_nexthops(b)); - } else - NM_CMP_FIELD(a, b, n_nexthops); NM_CMP_FIELD(a, b, gateway); - if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) - NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u)); - else + if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { + n_nexthops = nm_platform_ip4_route_get_n_nexthops(a); + NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b)); + NM_CMP_DIRECT(_ip4_route_weight_normalize(n_nexthops, a->weight, FALSE), + _ip4_route_weight_normalize(n_nexthops, b->weight, FALSE)); + } else { + NM_CMP_FIELD(a, b, n_nexthops); NM_CMP_FIELD(a, b, weight); + } if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) { NM_CMP_DIRECT(nmp_utils_ip_config_source_round_trip_rtprot(a->rt_source), nmp_utils_ip_config_source_round_trip_rtprot(b->rt_source)); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index 557b4190a8..2794d4b36f 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -444,18 +444,27 @@ struct _NMPlatformIP4Route { * pref_src must match, unless set to 0.0.0.0 to match any. */ in_addr_t pref_src; - /* This is the weight of for the first next-hop, in case of n_nexthops > 1. + /* This is the weight of for the first next-hop. * - * If n_nexthops is zero, this value is undefined (should be zero). - * If n_nexthops is 1, this also doesn't matter, but it's usually set to - * zero. - * If n_nexthops is greater or equal to one, this is the weight of - * the first hop. + * For multi-hop routes (n_nexthops > 1) this is the weight of the first + * hop. Note that the valid range is from 1-256. Zero is treated the same + * as 1 (for NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison). * - * Note that upper layers (nm_utils_ip_route_attribute_to_platform()) use this flag to indicate - * whether this is a multihop route. Single-hop, non-ECMP routes will have a weight of zero. + * For routes without next-hop (e.g. blackhole type), the weight is + * meaningless. It should be set to zero. NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID + * will treat it as zero. * - * The valid range for weight in kernel is 1-256. */ + * For single-hop routes, in kernel they don't have a weight. That means, + * all routes in the platform cache have a weight of zero. For tracking + * purposes, we find it useful that upper layers have single-hop routes + * with a positive weight. Such routes can never exist in kernel. Trying + * to add such a route will somewhat work, because + * nm_platform_ip_route_normalize() normalizes the weight to zero + * (effectively adding another route, according to + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID). A lookup in the platform cache with + * such a route will not yield a result. It does not exist there. If you + * want to find such a route, normalize it first. + */ guint16 weight; /* rtm_tos (iproute2: tos) @@ -704,13 +713,11 @@ typedef struct { typedef struct { int ifindex; in_addr_t gateway; - /* The valid range for weight is 1-256. Single hop routes in kernel - * don't have a weight, we assign them weight zero (to indicate the - * weight is missing). + + /* The weight of the next hop. The valid range for weight is 1-256. * - * Upper layers (nm_utils_ip_route_attribute_to_platform()) care about - * the distinction of unset weight (no-ECMP). They express no-ECMP as - * zero. + * Zero is allowed too, but treated as 1 (by + * NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison). */ guint16 weight;