From 15b820b4e8b2c604c8aabca1ecac170a9112591f Mon Sep 17 00:00:00 2001 From: Jan Vaclav Date: Tue, 24 Feb 2026 11:51:50 +0100 Subject: [PATCH] platform: track onlink flag per-nexthop for IPv4 routes In kernel, the onlink flag (RTNH_F_ONLINK) is associated with each nexthop (rtnh_flags) rather than the route as a whole. NM previously stored it only per-route in NMPlatformIPRoute.r_rtm_flags, which meant that two nexthops only differing with the onlink flag were combined as one entry in the platform cache. Fix this by tracking the onlink flag per-nexthop. Resolves: https://issues.redhat.com/browse/NMT-1486 --- src/core/NetworkManagerUtils.c | 9 +++--- src/core/nm-l3-config-data.c | 9 ++---- src/core/nm-netns.c | 8 +++-- src/libnm-platform/nm-linux-platform.c | 45 +++++++++++++++++--------- src/libnm-platform/nm-platform.c | 30 +++++++++++------ src/libnm-platform/nm-platform.h | 21 ++++++++---- 6 files changed, 76 insertions(+), 46 deletions(-) diff --git a/src/core/NetworkManagerUtils.c b/src/core/NetworkManagerUtils.c index 32df3d6dfd..95ba20f167 100644 --- a/src/core/NetworkManagerUtils.c +++ b/src/core/NetworkManagerUtils.c @@ -1495,11 +1495,10 @@ nm_utils_ip_route_attribute_to_platform(int addr_family, r4->scope_inv = nm_platform_route_scope_inv(scope); } - /* Note that for IPv4 routes in kernel, the onlink flag can be set for - * each next hop separately (rtnh_flags). Not for NetworkManager. We can - * only merge routes as ECMP routes (when setting a weight) if they all - * share the same onlink flag. See NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID. - * That simplifies the code. */ + /* For IPv4 routes in kernel, the onlink flag is per-nexthop (rtnh_flags). + * Here we set the flag on r_rtm_flags which represents the first nexthop's + * flags. For ECMP routes, each nexthop carries its own onlink flag, so + * routes with different onlink settings per-nexthop can be merged. */ GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ONLINK, onlink, BOOLEAN, boolean, FALSE); r->r_rtm_flags = ((onlink) ? (unsigned) RTNH_F_ONLINK : 0u); diff --git a/src/core/nm-l3-config-data.c b/src/core/nm-l3-config-data.c index 0b2804fa72..9e9066c3ed 100644 --- a/src/core/nm-l3-config-data.c +++ b/src/core/nm-l3-config-data.c @@ -3208,12 +3208,9 @@ _init_from_connection_ip(NML3ConfigData *self, int addr_family, NMConnection *co * the one we create here (because the "onlink" flag is part of the * identifier of a route, see nm_platform_ip4_route_cmp()). * - * Note however that for ECMP routes we currently can only merge routes - * that agree in their onlink flag. So a route without gateway cannot - * merge with an onlink route that has a gateway. That needs fixing, - * by not treating the onlink flag as for the entire route, but allowing - * to merge ECMP routes with different onlink flag. And first, we need - * to track the onlink flag for the nexthop (NMPlatformIP4RtNextHop). */ + * The onlink flag is tracked per-nexthop (in NMPlatformIP4RtNextHop.rtnh_flags + * for extra nexthops, and in r_rtm_flags for the first nexthop). ECMP routes + * can be merged regardless of per-nexthop onlink flags. */ r.r4.r_rtm_flags &= ~((unsigned) RTNH_F_ONLINK); } diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index faae72670d..65142f4f34 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -207,6 +207,7 @@ _ecmp_track_sort_lst_cmp(const CList *a, const CList *b, const void *user_data) NM_CMP_FIELD(route_a, route_b, ifindex); NM_CMP_FIELD(route_b, route_a, weight); NM_CMP_DIRECT(htonl(route_a->gateway), htonl(route_b->gateway)); + NM_CMP_DIRECT(route_a->r_rtm_flags & RTNH_F_ONLINK, route_b->r_rtm_flags & RTNH_F_ONLINK); return nm_assert_unreachable_val( nm_platform_ip4_route_cmp(route_a, route_b, NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID)); @@ -275,9 +276,10 @@ _ecmp_track_init_merged_obj(EcmpTrackEcmpid *track_ecmpid, const NMPObject **out NMPlatformIP4RtNextHop *nh = (gpointer) &obj_new->_ip4_route.extra_nexthops[i - 1]; *nh = (NMPlatformIP4RtNextHop) { - .ifindex = r->ifindex, - .gateway = r->gateway, - .weight = r->weight, + .ifindex = r->ifindex, + .gateway = r->gateway, + .weight = r->weight, + .rtnh_flags = r->r_rtm_flags & RTNH_F_ONLINK, }; } i++; diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index cde96326ca..b5df8b17d1 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -4090,6 +4090,7 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter int ifindex; NMIPAddr gateway; gboolean is_via; + unsigned rtnh_flags; } nh = { .found = FALSE, .has_more = FALSE, @@ -4199,9 +4200,10 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter v4_nh_extra_nexthops = v4_nh_extra_nexthops_heap; } nm_assert(v4_n_nexthops - 1u < v4_nh_extra_alloc); - new_nexthop = &v4_nh_extra_nexthops[v4_n_nexthops - 1u]; - new_nexthop->ifindex = rtnh->rtnh_ifindex; - new_nexthop->weight = NM_MAX(((guint) rtnh->rtnh_hops) + 1u, 1u); + new_nexthop = &v4_nh_extra_nexthops[v4_n_nexthops - 1u]; + new_nexthop->ifindex = rtnh->rtnh_ifindex; + new_nexthop->weight = NM_MAX(((guint) rtnh->rtnh_hops) + 1u, 1u); + new_nexthop->rtnh_flags = rtnh->rtnh_flags; if (rtnh->rtnh_len > sizeof(*rtnh)) { struct nlattr *ntb[RTA_MAX + 1]; @@ -4216,9 +4218,10 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter memcpy(&new_nexthop->gateway, nla_data(ntb[RTA_GATEWAY]), addr_len); } } else if (IS_IPv4 || idx == multihop_idx) { - nh.found = TRUE; - nh.ifindex = rtnh->rtnh_ifindex; - nh.weight = NM_MAX(((guint) rtnh->rtnh_hops) + 1u, 1u); + nh.found = TRUE; + nh.ifindex = rtnh->rtnh_ifindex; + nh.weight = NM_MAX(((guint) rtnh->rtnh_hops) + 1u, 1u); + nh.rtnh_flags = rtnh->rtnh_flags; if (rtnh->rtnh_len > sizeof(*rtnh)) { struct nlattr *ntb[RTA_MAX + 1]; @@ -4466,7 +4469,16 @@ rta_multipath_done: } obj->ip_route.r_rtm_flags = rtm->rtm_flags; - obj->ip_route.rt_source = nmp_utils_ip_config_source_from_rtprot(rtm->rtm_protocol); + + if (IS_IPv4 && v4_n_nexthops > 1u) { + /* For multipath routes, rtm_flags at the route level does not contain + * RTNH_F_ONLINK. Instead, each nexthop has its own rtnh_flags. Merge the + * first nexthop's RTNH_F_ONLINK into r_rtm_flags, since the first nexthop + * is embedded in the route struct. */ + obj->ip_route.r_rtm_flags |= (nh.rtnh_flags & RTNH_F_ONLINK); + } + + obj->ip_route.rt_source = nmp_utils_ip_config_source_from_rtprot(rtm->rtm_protocol); if (nh.has_more) { parse_nlmsg_iter->iter_more = TRUE; @@ -5899,15 +5911,18 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob } NLA_PUT_U32(msg, RTA_GATEWAY, gw); - rtnh->rtnh_flags = 0; + if (i == 0u) { + rtnh->rtnh_flags = + (gw != 0 && NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, (unsigned) RTNH_F_ONLINK)) + ? RTNH_F_ONLINK + : 0; + } else { + const NMPlatformIP4RtNextHop *n2 = &obj->_ip4_route.extra_nexthops[i - 1u]; - if (obj->ip4_route.n_nexthops > 1 - && NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, (unsigned) (RTNH_F_ONLINK)) && gw != 0) { - /* Unlike kernel, we only track the onlink flag per NMPlatformIP4Address, and - * not per nexthop. That is fine for NetworkManager configuring addresses. - * It is not fine for tracking addresses from kernel in platform cache, - * because the rtnh_flags of the nexthops need to be part of nmp_object_id_cmp(). */ - rtnh->rtnh_flags |= RTNH_F_ONLINK; + rtnh->rtnh_flags = + (gw != 0 && NM_FLAGS_HAS(n2->rtnh_flags, (unsigned) RTNH_F_ONLINK)) + ? RTNH_F_ONLINK + : 0; } rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh; diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index 1f98a9a9c3..914dcd8654 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -7424,13 +7424,16 @@ nm_platform_ip4_route_to_string_full(const NMPlatformIP4Route *route, "%s" /* ifindex */ "%s%s" /* gateway */ " weight %s" /* weight */ + "%s" /* onlink */ "", NM_PRINT_FMT_QUOTED2(nexthop->gateway != 0 || nexthop->ifindex <= 0, " via ", nm_inet4_ntop(nexthop->gateway, s_gateway), ""), _to_string_dev(str_dev, nexthop->ifindex), - nm_sprintf_buf(weight_str, "%u", nexthop->weight)); + nm_sprintf_buf(weight_str, "%u", nexthop->weight), + NM_FLAGS_HAS(nexthop->rtnh_flags, (unsigned) RTNH_F_ONLINK) ? " onlink" + : ""); } } } @@ -9010,7 +9013,11 @@ nm_platform_ip4_rt_nexthop_hash_update(const NMPlatformIP4RtNextHop *obj, nm_assert(obj); w = for_id ? NM_MAX(obj->weight, 1u) : obj->weight; - nm_hash_update_vals(h, obj->ifindex, obj->gateway, w); + nm_hash_update_vals(h, + obj->ifindex, + obj->gateway, + w, + for_id ? (obj->rtnh_flags & RTNH_F_ONLINK) : obj->rtnh_flags); } void @@ -9047,7 +9054,6 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, obj->initrwnd, obj->mtu, obj->rto_min, - obj->r_rtm_flags & RTNH_F_ONLINK, NM_HASH_COMBINE_BOOLS(guint16, obj->quickack, obj->lock_window, @@ -9065,7 +9071,8 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj, obj->via.addr_family == AF_INET6 ? obj->via.addr.addr6 : in6addr_any, obj->gateway, - _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE)); + _ip4_route_weight_normalize(n_nexthops, obj->weight, FALSE), + obj->r_rtm_flags & RTNH_F_ONLINK); } } break; @@ -9158,6 +9165,11 @@ nm_platform_ip4_rt_nexthop_cmp(const NMPlatformIP4RtNextHop *a, w_b = for_id ? NM_MAX(b->weight, 1u) : b->weight; NM_CMP_DIRECT(w_a, w_b); + if (for_id) + NM_CMP_DIRECT(a->rtnh_flags & RTNH_F_ONLINK, b->rtnh_flags & RTNH_F_ONLINK); + else + NM_CMP_FIELD(a, b, rtnh_flags); + return 0; } @@ -9198,12 +9210,6 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_FIELD(a, b, mtu); NM_CMP_FIELD(a, b, rto_min); - /* Note that for NetworkManager, the onlink flag is only part of the entire route. - * For kernel, each next hop has it's own onlink flag (rtnh_flags). This means, - * we can only merge ECMP routes, if they agree with their onlink flag, and then - * all next hops are onlink (or not). */ - NM_CMP_DIRECT(a->r_rtm_flags & RTNH_F_ONLINK, b->r_rtm_flags & RTNH_F_ONLINK); - NM_CMP_FIELD_UNSAFE(a, b, quickack); NM_CMP_FIELD_UNSAFE(a, b, lock_window); NM_CMP_FIELD_UNSAFE(a, b, lock_cwnd); @@ -9222,6 +9228,10 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b)); NM_CMP_DIRECT(_ip4_route_weight_normalize(n_nexthops, a->weight, FALSE), _ip4_route_weight_normalize(n_nexthops, b->weight, FALSE)); + /* The onlink flag is per-nexthop. For the first nexthop it is in + * r_rtm_flags. For extra nexthops, it's compared via + * nm_platform_ip4_rt_nexthop_cmp(). */ + NM_CMP_DIRECT(a->r_rtm_flags & RTNH_F_ONLINK, b->r_rtm_flags & RTNH_F_ONLINK); } } break; diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index 1cdd160114..ba37d3a270 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -287,7 +287,7 @@ guint _nm_platform_signal_id_get(NMPlatformSignalIdType signal_type); #define __NMPlatformIPRoute_COMMON \ __NMPlatformObjWithIfindex_COMMON; \ \ - /* rtnh_flags + /* rtm_flags and rtnh_flags * * Routes with rtm_flags RTM_F_CLONED are hidden by platform and * do not exist from the point-of-view of platform users. @@ -295,8 +295,15 @@ guint _nm_platform_signal_id_get(NMPlatformSignalIdType signal_type); * * NOTE: currently we ignore all flags except RTM_F_CLONED * and RTNH_F_ONLINK. - * We also may not properly consider the flags as part of the ID - * in route-cmp. */ \ + * + * For IPv4 routes, the RTNH_F_ONLINK flag here applies to the + * first nexthop (which is embedded in the route struct). Extra + * nexthops (NMPlatformIP4RtNextHop) each have their own + * rtnh_flags field. + * + * For single-hop routes, this field comes directly from + * rtm_flags. For multi-hop routes from kernel, the first + * nexthop's RTNH_F_ONLINK from rtnh_flags is merged here. */ \ unsigned r_rtm_flags; \ \ /* RTA_METRICS.RTAX_ADVMSS (iproute2: advmss) */ \ @@ -733,10 +740,10 @@ typedef struct { */ guint16 weight; - /* FIXME: each next hop in kernel also has a rtnh_flags (for example to - * set RTNH_F_ONLINK). As the next hop is part of the identifier of an - * IPv4 route, so is their flags. We must also track the flag, otherwise - * two routes that look different for kernel, get merged by platform cache. */ + /* Each next hop in kernel has its own rtnh_flags (for example to + * set RTNH_F_ONLINK). The flags are part of the identifier of a + * route. */ + unsigned rtnh_flags; } NMPlatformIP4RtNextHop; typedef struct {