platform,core: better handle onlink flag for ECMP routes

The onlink flag is part of each next hop.

When NetworkManager configures ECMP routes, we won't support that. All
next hops of an ECMP route must share the same onlink flag. That is fine
and fixed by this commit.

What is not fine, is that we don't track the rtnh_flags flags in
NMPlatformIP4RtNextHop, and consequently our nmp_object_id_cmp() is
wrong.

Fixes: 5b5ce42682 ('nm-netns: track ECMP routes')
This commit is contained in:
Thomas Haller 2023-02-01 08:58:15 +01:00
parent 6081e61d91
commit 6ed966258c
No known key found for this signature in database
GPG key ID: 29C2366E4DFC5728
6 changed files with 45 additions and 10 deletions

View file

@ -1386,8 +1386,12 @@ nm_utils_ip_route_attribute_to_platform(int addr_family,
r4->scope_inv = nm_platform_route_scope_inv(scope);
}
/* Note that for IPv4 routes in kernel, the onlink flag can be set for
* each next hop separately (rtnh_flags). Not for NetworkManager. We can
* only merge routes as ECMP routes (when setting a weight) if they all
* share the same onlink flag. See NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID.
* That simplifies the code. */
GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ONLINK, onlink, BOOLEAN, boolean, FALSE);
r->r_rtm_flags = ((onlink) ? (unsigned) RTNH_F_ONLINK : 0u);
GET_ATTR(NM_IP_ROUTE_ATTRIBUTE_ADVMSS, r->mss, UINT32, uint32, 0);

View file

@ -7,6 +7,8 @@
#include "nm-netns.h"
#include <linux/rtnetlink.h>
#include "libnm-glib-aux/nm-dedup-multi.h"
#include "libnm-glib-aux/nm-c-list.h"
@ -687,7 +689,9 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self,
* after l3cfg configured that route. We achieve that by
* scheduling another idle commit on "l3cfg". */
track_obj->is_new = FALSE;
if (route && route->gateway == 0) {
if (route
&& (route->gateway == 0
|| NM_FLAGS_HAS(route->r_rtm_flags, (unsigned) RTNH_F_ONLINK))) {
/* This route is onlink. We don't need to configure an onlink route
* to the gateway, and the route is immediately ready for configuration. */
track_obj->is_ready = TRUE;

View file

@ -1143,11 +1143,15 @@ nm_setting_ip4_config_class_init(NMSettingIP4ConfigClass *klass)
* The default is <literal>unicast</literal>.</para>
* </listitem>
* <listitem>
* <para><literal>"weight"</literal> - an unsigned 32 bit integer ranging from 0 to 256. This indicates
* that the IPv4 route is a ECMP route and is only allowed for routes that specify a next-hop.
* NetworkManager will automatically merge compatible routes into ECMP multi-hop routes.
* Setting to zero or omitting the attribute configures single hop routes that won't get
* merged. If the route finds no merge partner, it is configured as single hop route.</para>
* <para><literal>"weight"</literal> - an unsigned 32 bit integer
* ranging from 0 to 256. A non-zero weight indicates that the IPv4
* route is an ECMP IPv4 route. NetworkManager will automatically
* merge compatible ECMP routes into multi-hop routes. Setting to
* zero or omitting the attribute configures single hop routes that
* won't get merged. If the route finds no merge partner, it is
* configured as single hop route.</para> <para>Note that in
* NetworkManager, currently all nexthops of a ECMP route must share
* the same "onlink" flag in order to be mergable.</para>
* </listitem>
* <listitem>
* <para><literal>"window"</literal> - an unsigned 32 bit integer.</para>

View file

@ -5419,6 +5419,7 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob
for (i = 0u; i < obj->ip4_route.n_nexthops; i++) {
struct rtnexthop *rtnh;
in_addr_t gw;
rtnh = nlmsg_reserve(msg, sizeof(*rtnh), NLMSG_ALIGNTO);
if (!rtnh)
@ -5430,17 +5431,28 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob
if (i == 0u) {
rtnh->rtnh_hops = NM_MAX(obj->ip4_route.weight, 1u) - 1u;
rtnh->rtnh_ifindex = obj->ip4_route.ifindex;
NLA_PUT_U32(msg, RTA_GATEWAY, obj->ip4_route.gateway);
gw = obj->ip4_route.gateway;
} else {
const NMPlatformIP4RtNextHop *n = &obj->_ip4_route.extra_nexthops[i - 1u];
rtnh->rtnh_hops = NM_MAX(n->weight, 1u) - 1u;
rtnh->rtnh_ifindex = n->ifindex;
NLA_PUT_U32(msg, RTA_GATEWAY, n->gateway);
gw = n->gateway;
}
NLA_PUT_U32(msg, RTA_GATEWAY, gw);
rtnh->rtnh_flags = 0;
rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh;
if (obj->ip4_route.n_nexthops > 1
&& NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, (unsigned) (RTNH_F_ONLINK)) && gw != 0) {
/* Unlike kernel, we only track the onlink flag per NMPlatformIP4Address, and
* not per nexthop. That is fine for NetworkManager configuring addresses.
* It is not fine for tracking addresses from kernel in platform cache,
* because the rtnh_flags of the nexthops need to be part of nmp_object_id_cmp(). */
rtnh->rtnh_flags |= RTNH_F_ONLINK;
}
rtnh->rtnh_len = (char *) nlmsg_tail(nlmsg_hdr(msg)) - (char *) rtnh;
}
nla_nest_end(msg, multipath);

View file

@ -8601,7 +8601,13 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a,
NM_CMP_FIELD(a, b, initrwnd);
NM_CMP_FIELD(a, b, mtu);
NM_CMP_FIELD(a, b, rto_min);
/* Note that for NetworkManager, the onlink flag is only part of the entire route.
* For kernel, each next hop has it's own onlink flag (rtnh_flags). This means,
* we can only merge ECMP routes, if they agree with their onlink flag, and then
* all next hops are onlink (or not). */
NM_CMP_DIRECT(a->r_rtm_flags & RTNH_F_ONLINK, b->r_rtm_flags & RTNH_F_ONLINK);
NM_CMP_FIELD_UNSAFE(a, b, quickack);
NM_CMP_FIELD_UNSAFE(a, b, lock_window);
NM_CMP_FIELD_UNSAFE(a, b, lock_cwnd);

View file

@ -680,6 +680,11 @@ typedef struct {
* zero.
*/
guint16 weight;
/* FIXME: each next hop in kernel also has a rtnh_flags (for example to
* set RTNH_F_ONLINK). As the next hop is part of the identifier of an
* IPv4 route, so is their flags. We must also track the flag, otherwise
* two routes that look different for kernel, get merged by platform cache. */
} NMPlatformIP4RtNextHop;
typedef struct {