From 6ed966258c0acc5bcf0c1dfa7d0cbcdb65b84b56 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 1 Feb 2023 08:58:15 +0100 Subject: [PATCH] platform,core: better handle onlink flag for ECMP routes The onlink flag is part of each next hop. When NetworkManager configures ECMP routes, we won't support that. All next hops of an ECMP route must share the same onlink flag. That is fine and fixed by this commit. What is not fine, is that we don't track the rtnh_flags flags in NMPlatformIP4RtNextHop, and consequently our nmp_object_id_cmp() is wrong. Fixes: 5b5ce4268211 ('nm-netns: track ECMP routes') --- src/core/NetworkManagerUtils.c | 6 +++++- src/core/nm-netns.c | 6 +++++- src/libnm-core-impl/nm-setting-ip4-config.c | 14 +++++++++----- src/libnm-platform/nm-linux-platform.c | 18 +++++++++++++++--- src/libnm-platform/nm-platform.c | 6 ++++++ src/libnm-platform/nm-platform.h | 5 +++++ 6 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/core/NetworkManagerUtils.c b/src/core/NetworkManagerUtils.c index 15a3e5ddcf..6f4c60f876 100644 --- a/src/core/NetworkManagerUtils.c +++ b/src/core/NetworkManagerUtils.c @@ -1386,8 +1386,12 @@ nm_utils_ip_route_attribute_to_platform(int addr_family, r4->scope_inv = nm_platform_route_scope_inv(scope); } + /* Note that for IPv4 routes in kernel, the onlink flag can be set for + * each next hop separately (rtnh_flags). Not for NetworkManager. We can + * only merge routes as ECMP routes (when setting a weight) if they all + * share the same onlink flag. See NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID. + * That simplifies the code. */ GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ONLINK, onlink, BOOLEAN, boolean, FALSE); - r->r_rtm_flags = ((onlink) ? (unsigned) RTNH_F_ONLINK : 0u); GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ADVMSS, r->mss, UINT32, uint32, 0); diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index f273c0885f..12ca850898 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -7,6 +7,8 @@ #include "nm-netns.h" +#include + #include "libnm-glib-aux/nm-dedup-multi.h" #include "libnm-glib-aux/nm-c-list.h" @@ -687,7 +689,9 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, * after l3cfg configured that route. We achieve that by * scheduling another idle commit on "l3cfg". */ track_obj->is_new = FALSE; - if (route && route->gateway == 0) { + if (route + && (route->gateway == 0 + || NM_FLAGS_HAS(route->r_rtm_flags, (unsigned) RTNH_F_ONLINK))) { /* This route is onlink. We don't need to configure an onlink route * to the gateway, and the route is immediately ready for configuration. */ track_obj->is_ready = TRUE; diff --git a/src/libnm-core-impl/nm-setting-ip4-config.c b/src/libnm-core-impl/nm-setting-ip4-config.c index db1416483b..d2ca88241f 100644 --- a/src/libnm-core-impl/nm-setting-ip4-config.c +++ b/src/libnm-core-impl/nm-setting-ip4-config.c @@ -1143,11 +1143,15 @@ nm_setting_ip4_config_class_init(NMSettingIP4ConfigClass *klass) * The default is unicast. * * - * "weight" - an unsigned 32 bit integer ranging from 0 to 256. This indicates - * that the IPv4 route is a ECMP route and is only allowed for routes that specify a next-hop. - * NetworkManager will automatically merge compatible routes into ECMP multi-hop routes. - * Setting to zero or omitting the attribute configures single hop routes that won't get - * merged. If the route finds no merge partner, it is configured as single hop route. + * "weight" - an unsigned 32 bit integer + * ranging from 0 to 256. A non-zero weight indicates that the IPv4 + * route is an ECMP IPv4 route. NetworkManager will automatically + * merge compatible ECMP routes into multi-hop routes. Setting to + * zero or omitting the attribute configures single hop routes that + * won't get merged. If the route finds no merge partner, it is + * configured as single hop route. Note that in + * NetworkManager, currently all nexthops of a ECMP route must share + * the same "onlink" flag in order to be mergable. * * * "window" - an unsigned 32 bit integer. diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 7dc0f81a40..d4ab36f93d 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -5419,6 +5419,7 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob for (i = 0u; i < obj->ip4_route.n_nexthops; i++) { struct rtnexthop *rtnh; + in_addr_t gw; rtnh = nlmsg_reserve(msg, sizeof(*rtnh), NLMSG_ALIGNTO); if (!rtnh) @@ -5430,17 +5431,28 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob if (i == 0u) { rtnh->rtnh_hops = NM_MAX(obj->ip4_route.weight, 1u) - 1u; rtnh->rtnh_ifindex = obj->ip4_route.ifindex; - NLA_PUT_U32(msg, RTA_GATEWAY, obj->ip4_route.gateway); + gw = obj->ip4_route.gateway; } else { const NMPlatformIP4RtNextHop *n = &obj->_ip4_route.extra_nexthops[i - 1u]; rtnh->rtnh_hops = NM_MAX(n->weight, 1u) - 1u; rtnh->rtnh_ifindex = n->ifindex; - NLA_PUT_U32(msg, RTA_GATEWAY, n->gateway); + gw = n->gateway; } + NLA_PUT_U32(msg, RTA_GATEWAY, gw); rtnh->rtnh_flags = 0; - rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh; + + if (obj->ip4_route.n_nexthops > 1 + && NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, (unsigned) (RTNH_F_ONLINK)) && gw != 0) { + /* Unlike kernel, we only track the onlink flag per NMPlatformIP4Address, and + * not per nexthop. That is fine for NetworkManager configuring addresses. + * It is not fine for tracking addresses from kernel in platform cache, + * because the rtnh_flags of the nexthops need to be part of nmp_object_id_cmp(). */ + rtnh->rtnh_flags |= RTNH_F_ONLINK; + } + + rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh; } nla_nest_end(msg, multipath); diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index 6fad736cce..c80d964851 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -8601,7 +8601,13 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_FIELD(a, b, initrwnd); NM_CMP_FIELD(a, b, mtu); NM_CMP_FIELD(a, b, rto_min); + + /* Note that for NetworkManager, the onlink flag is only part of the entire route. + * For kernel, each next hop has it's own onlink flag (rtnh_flags). This means, + * we can only merge ECMP routes, if they agree with their onlink flag, and then + * all next hops are onlink (or not). */ NM_CMP_DIRECT(a->r_rtm_flags & RTNH_F_ONLINK, b->r_rtm_flags & RTNH_F_ONLINK); + NM_CMP_FIELD_UNSAFE(a, b, quickack); NM_CMP_FIELD_UNSAFE(a, b, lock_window); NM_CMP_FIELD_UNSAFE(a, b, lock_cwnd); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index dec38c1c93..1cd9c6c6f9 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -680,6 +680,11 @@ typedef struct { * zero. */ guint16 weight; + + /* FIXME: each next hop in kernel also has a rtnh_flags (for example to + * set RTNH_F_ONLINK). As the next hop is part of the identifier of an + * IPv4 route, so is their flags. We must also track the flag, otherwise + * two routes that look different for kernel, get merged by platform cache. */ } NMPlatformIP4RtNextHop; typedef struct {