platform: fix handling weight for IPv4 single hop routes

In kernel, a IPv4 single hop route has no weight. That means, all such
routes inside the platform cache will have it set to zero.

On the other hand, on the upper layers we use the weight field to
indicate whether this route should be merged to a single hop routes.
In that case, the weight zero and any positive weight is relevant to
make the route distinct.

We have dictionaries of NMPObject, and those use the
NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID comparison type. When we lookup a
route in the dictionary, the hash and comparison must know whether the
weight should be considered.

Fix that by adding a field "is_kernel" to NMPlatformIP4Route. The field
determines how the "weight" is treated for comparison. Basically, the
weight is ignored for comparison with ID type, if at least one of the operands
is "is_kernel". This brings an odd asymmetry to the behavior, but it's
not a real problem because our hash tables either contain only objects
from kernel or not.

An example of the oddity is when you have a single-hop is-kernel route
(obviously with a weight of zero), and you look into a hash with two
non-is-kernel routes with different weights. It's undefined which route
you are going to find, and there would be two entries in the hash table
that compare equal to the lookup needle. Still this is preferable,
because for the most part it means we can ignore the weight, and it will
mostly just work.

There is another problem. Previously, the hash/cmp functions always
compared "NM_MAX(a->weight, 1u)". That is again wrong for the NM-meaning
of the weight. You can add two (distinct) routes that are identical
except one has weight 1 and one has weight 0. The former means that the
route is eligible for ECMP merging. Fix that too.

Fixes: 1bbdecf5e1 ('platform: manage ECMP routes')
This commit is contained in:
Thomas Haller 2023-11-28 12:49:31 +01:00 committed by Fernando Fernandez Mancera
parent 796fe4f8ed
commit a1e4f034d3
4 changed files with 114 additions and 40 deletions

View file

@ -342,6 +342,7 @@ test_ip4_route(void)
rts[0].mss = mss;
rts[0].scope_inv = nm_platform_route_scope_inv(RT_SCOPE_LINK);
rts[0].n_nexthops = 1;
rts[0].is_kernel = TRUE;
rts[1].rt_source = nmp_utils_ip_config_source_round_trip_rtprot(NM_IP_CONFIG_SOURCE_USER);
rts[1].network = network;
rts[1].plen = plen;
@ -351,6 +352,7 @@ test_ip4_route(void)
rts[1].mss = mss;
rts[1].scope_inv = nm_platform_route_scope_inv(RT_SCOPE_UNIVERSE);
rts[1].n_nexthops = 1;
rts[1].is_kernel = TRUE;
rts[2].rt_source = nmp_utils_ip_config_source_round_trip_rtprot(NM_IP_CONFIG_SOURCE_USER);
rts[2].network = 0;
rts[2].plen = 0;
@ -360,6 +362,7 @@ test_ip4_route(void)
rts[2].mss = mss;
rts[2].scope_inv = nm_platform_route_scope_inv(RT_SCOPE_UNIVERSE);
rts[2].n_nexthops = 1;
rts[2].is_kernel = TRUE;
g_assert_cmpint(routes->len, ==, 3);
nmtst_platform_ip4_routes_equal_aptr((const NMPObject *const *) routes->pdata,
rts,

View file

@ -4131,6 +4131,8 @@ rta_multipath_done:
obj->ip_route.ifindex = nh.ifindex;
if (IS_IPv4) {
obj->ip4_route.is_kernel = TRUE;
nm_assert((!!nh.found) == (v4_n_nexthops > 0u));
obj->ip4_route.n_nexthops = v4_n_nexthops;
if (v4_n_nexthops > 1) {

View file

@ -5306,6 +5306,12 @@ nm_platform_ip_route_normalize(int addr_family, NMPlatformIPRoute *route)
route->metric_any = FALSE;
r4->network = nm_ip4_addr_clear_host_address(r4->network, r4->plen);
r4->scope_inv = _ip_route_scope_inv_get_normalized(r4);
r4->is_kernel = TRUE;
if (r4->ifindex > 0u) {
r4->n_nexthops = NM_MAX(r4->n_nexthops, 1u);
if (r4->n_nexthops <= 1u)
r4->weight = 0;
}
break;
case AF_INET6:
r6 = (NMPlatformIP6Route *) route;
@ -7025,6 +7031,7 @@ nm_platform_ip4_route_to_string_full(const NMPlatformIP4Route *route,
"%s" /* rto_min */
"%s" /* quickack */
"%s" /* mtu */
"%s" /* is_kernel */
"",
nm_net_aux_rtnl_rtntype_n2a_maybe_buf(nm_platform_route_type_uncoerce(route->type_coerced),
str_type),
@ -7091,7 +7098,8 @@ nm_platform_ip4_route_to_string_full(const NMPlatformIP4Route *route,
" mtu %s%" G_GUINT32_FORMAT,
route->lock_mtu ? "lock " : "",
route->mtu)
: "");
: "",
route->is_kernel ? " is-kernel" : "");
if ((n_nexthops == 1 && route->ifindex > 0) || n_nexthops == 0) {
/* A plain single hop route. Nothing extra to remark. */
@ -8600,6 +8608,14 @@ nm_platform_ip4_rt_nexthop_hash_update(const NMPlatformIP4RtNextHop *obj,
nm_assert(obj);
/* Note that NMPlatformIP4Route.weight has some special handling with
* what zero/one means.
*
* Here we compare the next-hop of an (obviously) multi-hop route. Such
* considerations don't apply here.
*
* All that we do, is to normalize a 0 to 1, with "for_id" comparison. */
w = for_id ? NM_MAX(obj->weight, 1u) : obj->weight;
nm_hash_update_vals(h, obj->ifindex, obj->gateway, w);
@ -8610,6 +8626,8 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj,
NMPlatformIPRouteCmpType cmp_type,
NMHashState *h)
{
guint n_nexthops;
switch (cmp_type) {
case NM_PLATFORM_IP_ROUTE_CMP_TYPE_WEAK_ID:
case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID:
@ -8647,15 +8665,15 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj,
obj->lock_mtu,
obj->lock_mss));
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) {
nm_hash_update_vals(h,
obj->ifindex,
nm_platform_ip4_route_get_n_nexthops(obj),
obj->gateway,
(guint16) NM_MAX(obj->weight, 1u));
n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj);
/* obj->weight must be ignored for ID hashing, so that cmp()
* below works correctly. */
nm_hash_update_vals(h, obj->ifindex, n_nexthops, obj->gateway);
}
}
break;
case NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY:
n_nexthops = nm_platform_ip4_route_get_n_nexthops(obj);
nm_hash_update_vals(
h,
obj->type_coerced,
@ -8664,9 +8682,10 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj,
nm_ip4_addr_clear_host_address(obj->network, obj->plen),
obj->plen,
obj->metric,
nm_platform_ip4_route_get_n_nexthops(obj),
n_nexthops,
obj->gateway,
(guint16) NM_MAX(obj->weight, 1u),
/* obj->weight must be ignored for ID hashing, so that cmp()
* below works correctly. */
nmp_utils_ip_config_source_round_trip_rtprot(obj->rt_source),
_ip_route_scope_inv_get_normalized(obj),
obj->tos,
@ -8722,7 +8741,8 @@ nm_platform_ip4_route_hash_update(const NMPlatformIP4Route *obj,
obj->lock_initcwnd,
obj->lock_initrwnd,
obj->lock_mtu,
obj->lock_mss));
obj->lock_mss,
obj->is_kernel));
break;
}
}
@ -8735,15 +8755,17 @@ nm_platform_ip4_rt_nexthop_cmp(const NMPlatformIP4RtNextHop *a,
guint16 w_a;
guint16 w_b;
/* Note that weight zero is not valid (in kernel). We thus treat
* weight zero usually the same as 1.
*
* Not here for cmp/hash_update functions. These functions check for the exact
* bit-pattern, and not the it means at other places. */
NM_CMP_SELF(a, b);
NM_CMP_FIELD(a, b, ifindex);
NM_CMP_FIELD(a, b, gateway);
/* Note that NMPlatformIP4Route.weight has some special handling with
* what zero/one means.
*
* Here we compare the next-hop of an (obviously) multi-hop route. Such
* considerations don't apply here.
*
* All that we do, is to normalize a 0 to 1, with "for_id" comparison. */
w_a = for_id ? NM_MAX(a->weight, 1u) : a->weight;
w_b = for_id ? NM_MAX(b->weight, 1u) : b->weight;
NM_CMP_DIRECT(w_a, w_b);
@ -8756,6 +8778,41 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a,
const NMPlatformIP4Route *b,
NMPlatformIPRouteCmpType cmp_type)
{
guint n_nexthops;
#define _CMP_WEIGHT_SEMANTICALLY(n_nexthops, a, b) \
/* NMPlatformIP4Route.weight has special semantics.
*
* In particular, whether the objects are from kernel or not.
*
* Note that this brings an odd asymmetry to the comparison, as it now
* depends on whether one of the operands is from kernel. In practice,
* this works fine because our hash tables either have all elements from
* kernel or not. So one of the operands is the needle to lookup and the
* other is inside the hash table. */ \
G_STMT_START \
{ \
const NMPlatformIP4Route *const _ra = (a); \
const NMPlatformIP4Route *const _rb = (b); \
\
if ((n_nexthops) > 1u) { \
/* Both routes are multi-hop. The weight is considered, but
* zero is normalized to one. */ \
NM_CMP_DIRECT(NM_MAX(_ra->weight, 1u), NM_MAX(_rb->weight, 1u)); \
} else if (_ra->is_kernel || _rb->is_kernel) { \
/* At least one of the routes is received from kernel .The
* weight is meaningless for single-hop routes and is ignored. */ \
} else { \
/* Both routes are single-hop and neither are is-kernel.
* This means, we compare the weight directly. In
* particular, we need to differentiate a zero, because
* that determines whether the route is eligible for ECMP
* merging. */ \
NM_CMP_DIRECT(_ra->weight, _rb->weight); \
} \
} \
G_STMT_END
NM_CMP_SELF(a, b);
switch (cmp_type) {
case NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID:
@ -8802,9 +8859,9 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a,
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID) {
NM_CMP_FIELD(a, b, ifindex);
NM_CMP_FIELD(a, b, gateway);
NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u));
NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a),
nm_platform_ip4_route_get_n_nexthops(b));
n_nexthops = nm_platform_ip4_route_get_n_nexthops(a);
NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b));
_CMP_WEIGHT_SEMANTICALLY(n_nexthops, a, b);
}
}
break;
@ -8825,16 +8882,16 @@ nm_platform_ip4_route_cmp(const NMPlatformIP4Route *a,
NM_CMP_FIELD(a, b, plen);
NM_CMP_FIELD_UNSAFE(a, b, metric_any);
NM_CMP_FIELD(a, b, metric);
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) {
NM_CMP_DIRECT(nm_platform_ip4_route_get_n_nexthops(a),
nm_platform_ip4_route_get_n_nexthops(b));
} else
NM_CMP_FIELD(a, b, n_nexthops);
NM_CMP_FIELD(a, b, gateway);
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY)
NM_CMP_DIRECT(NM_MAX(a->weight, 1u), NM_MAX(b->weight, 1u));
else
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) {
n_nexthops = nm_platform_ip4_route_get_n_nexthops(a);
NM_CMP_DIRECT(n_nexthops, nm_platform_ip4_route_get_n_nexthops(b));
_CMP_WEIGHT_SEMANTICALLY(n_nexthops, a, b);
} else {
NM_CMP_FIELD(a, b, n_nexthops);
NM_CMP_FIELD(a, b, weight);
NM_CMP_FIELD_BOOL(a, b, is_kernel);
}
if (cmp_type == NM_PLATFORM_IP_ROUTE_CMP_TYPE_SEMANTICALLY) {
NM_CMP_DIRECT(nmp_utils_ip_config_source_round_trip_rtprot(a->rt_source),
nmp_utils_ip_config_source_round_trip_rtprot(b->rt_source));

View file

@ -446,16 +446,21 @@ struct _NMPlatformIP4Route {
/* This is the weight of for the first next-hop, in case of n_nexthops > 1.
*
* If n_nexthops is zero, this value is undefined (should be zero).
* If n_nexthops is 1, this also doesn't matter, but it's usually set to
* zero.
* If n_nexthops is greater or equal to one, this is the weight of
* the first hop.
* For kernel:
* If n_nexthops is zero, this value is undefined (should be zero).
* If n_nexthops is 1, this also doesn't matter, but it's usually set to
* zero.
* If n_nexthops is greater or equal to one, this is the weight of
* the first hop.
* The valid range for weight in kernel is 1-256.
*
* Note that upper layers (nm_utils_ip_route_attribute_to_platform()) use this flag to indicate
* whether this is a multihop route. Single-hop, non-ECMP routes will have a weight of zero.
*
* The valid range for weight in kernel is 1-256. */
* Note that upper layers (nm_utils_ip_route_attribute_to_platform()) use
* this field to indicate whether this is a multihop route. Single-hop,
* non-ECMP routes will have a weight of zero. In that case, the "weight"
* does matter also for single hop routes. See also _CMP_WEIGHT_SEMANTICALLY().
* See also the field "is_kernel", which determines how we treat the weight of
* single hop routes.
*/
guint16 weight;
/* rtm_tos (iproute2: tos)
@ -477,6 +482,12 @@ struct _NMPlatformIP4Route {
* For IPv6 routes, the scope is ignored and kernel always assumes global scope.
* Hence, this field is only in NMPlatformIP4Route. */
guint8 scope_inv;
/* If true, this route was received from kernel and is in the platform cache.
*
* This indicates how the "weight" of single-hop routes is treated.
*/
bool is_kernel : 1;
} _nm_alignas(NMPlatformObject);
struct _NMPlatformIP6Route {
@ -704,13 +715,14 @@ typedef struct {
typedef struct {
int ifindex;
in_addr_t gateway;
/* The valid range for weight is 1-256. Single hop routes in kernel
* don't have a weight, we assign them weight zero (to indicate the
* weight is missing).
/* The valid range for weight is 1-256.
*
* Upper layers (nm_utils_ip_route_attribute_to_platform()) care about
* the distinction of unset weight (no-ECMP). They express no-ECMP as
* zero.
* Single hop routes in kernel don't have a weight, we assign them weight
* zero (to indicate the weight is missing).
*
* But note that here we have a next-hop for a NON-single-hop route. The
* weight works just as you'd expect, except that 0 is an alias for 1 (for
* convenience)..
*/
guint16 weight;