diff --git a/src/core/NetworkManagerUtils.c b/src/core/NetworkManagerUtils.c index 15a3e5ddcf..6f4c60f876 100644 --- a/src/core/NetworkManagerUtils.c +++ b/src/core/NetworkManagerUtils.c @@ -1386,8 +1386,12 @@ nm_utils_ip_route_attribute_to_platform(int addr_family, r4->scope_inv = nm_platform_route_scope_inv(scope); } + /* Note that for IPv4 routes in kernel, the onlink flag can be set for + * each next hop separately (rtnh_flags). Not for NetworkManager. We can + * only merge routes as ECMP routes (when setting a weight) if they all + * share the same onlink flag. See NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID. + * That simplifies the code. */ GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ONLINK, onlink, BOOLEAN, boolean, FALSE); - r->r_rtm_flags = ((onlink) ? (unsigned) RTNH_F_ONLINK : 0u); GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ADVMSS, r->mss, UINT32, uint32, 0); diff --git a/src/core/nm-netns.c b/src/core/nm-netns.c index f273c0885f..12ca850898 100644 --- a/src/core/nm-netns.c +++ b/src/core/nm-netns.c @@ -7,6 +7,8 @@ #include "nm-netns.h" +#include + #include "libnm-glib-aux/nm-dedup-multi.h" #include "libnm-glib-aux/nm-c-list.h" @@ -687,7 +689,9 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, * after l3cfg configured that route. We achieve that by * scheduling another idle commit on "l3cfg". */ track_obj->is_new = FALSE; - if (route && route->gateway == 0) { + if (route + && (route->gateway == 0 + || NM_FLAGS_HAS(route->r_rtm_flags, (unsigned) RTNH_F_ONLINK))) { /* This route is onlink. We don't need to configure an onlink route * to the gateway, and the route is immediately ready for configuration. */ track_obj->is_ready = TRUE; diff --git a/src/libnm-core-impl/nm-setting-ip4-config.c b/src/libnm-core-impl/nm-setting-ip4-config.c index db1416483b..d2ca88241f 100644 --- a/src/libnm-core-impl/nm-setting-ip4-config.c +++ b/src/libnm-core-impl/nm-setting-ip4-config.c @@ -1143,11 +1143,15 @@ nm_setting_ip4_config_class_init(NMSettingIP4ConfigClass *klass) * The default is unicast. * * - * "weight" - an unsigned 32 bit integer ranging from 0 to 256. This indicates - * that the IPv4 route is a ECMP route and is only allowed for routes that specify a next-hop. - * NetworkManager will automatically merge compatible routes into ECMP multi-hop routes. - * Setting to zero or omitting the attribute configures single hop routes that won't get - * merged. If the route finds no merge partner, it is configured as single hop route. + * "weight" - an unsigned 32 bit integer + * ranging from 0 to 256. A non-zero weight indicates that the IPv4 + * route is an ECMP IPv4 route. NetworkManager will automatically + * merge compatible ECMP routes into multi-hop routes. Setting to + * zero or omitting the attribute configures single hop routes that + * won't get merged. If the route finds no merge partner, it is + * configured as single hop route. Note that in + * NetworkManager, currently all nexthops of a ECMP route must share + * the same "onlink" flag in order to be mergable. * * * "window" - an unsigned 32 bit integer. diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 7dc0f81a40..d4ab36f93d 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -5419,6 +5419,7 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob for (i = 0u; i < obj->ip4_route.n_nexthops; i++) { struct rtnexthop *rtnh; + in_addr_t gw; rtnh = nlmsg_reserve(msg, sizeof(*rtnh), NLMSG_ALIGNTO); if (!rtnh) @@ -5430,17 +5431,28 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob if (i == 0u) { rtnh->rtnh_hops = NM_MAX(obj->ip4_route.weight, 1u) - 1u; rtnh->rtnh_ifindex = obj->ip4_route.ifindex; - NLA_PUT_U32(msg, RTA_GATEWAY, obj->ip4_route.gateway); + gw = obj->ip4_route.gateway; } else { const NMPlatformIP4RtNextHop *n = &obj->_ip4_route.extra_nexthops[i - 1u]; rtnh->rtnh_hops = NM_MAX(n->weight, 1u) - 1u; rtnh->rtnh_ifindex = n->ifindex; - NLA_PUT_U32(msg, RTA_GATEWAY, n->gateway); + gw = n->gateway; } + NLA_PUT_U32(msg, RTA_GATEWAY, gw); rtnh->rtnh_flags = 0; - rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh; + + if (obj->ip4_route.n_nexthops > 1 + && NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, (unsigned) (RTNH_F_ONLINK)) && gw != 0) { + /* Unlike kernel, we only track the onlink flag per NMPlatformIP4Address, and + * not per nexthop. That is fine for NetworkManager configuring addresses. + * It is not fine for tracking addresses from kernel in platform cache, + * because the rtnh_flags of the nexthops need to be part of nmp_object_id_cmp(). */ + rtnh->rtnh_flags |= RTNH_F_ONLINK; + } + + rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh; } nla_nest_end(msg, multipath); diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index 6fad736cce..c80d964851 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -8601,7 +8601,13 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a, NM_CMP_FIELD(a, b, initrwnd); NM_CMP_FIELD(a, b, mtu); NM_CMP_FIELD(a, b, rto_min); + + /* Note that for NetworkManager, the onlink flag is only part of the entire route. + * For kernel, each next hop has it's own onlink flag (rtnh_flags). This means, + * we can only merge ECMP routes, if they agree with their onlink flag, and then + * all next hops are onlink (or not). */ NM_CMP_DIRECT(a->r_rtm_flags & RTNH_F_ONLINK, b->r_rtm_flags & RTNH_F_ONLINK); + NM_CMP_FIELD_UNSAFE(a, b, quickack); NM_CMP_FIELD_UNSAFE(a, b, lock_window); NM_CMP_FIELD_UNSAFE(a, b, lock_cwnd); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index dec38c1c93..1cd9c6c6f9 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -680,6 +680,11 @@ typedef struct { * zero. */ guint16 weight; + + /* FIXME: each next hop in kernel also has a rtnh_flags (for example to + * set RTNH_F_ONLINK). As the next hop is part of the identifier of an + * IPv4 route, so is their flags. We must also track the flag, otherwise + * two routes that look different for kernel, get merged by platform cache. */ } NMPlatformIP4RtNextHop; typedef struct {