mirror of
https://gitlab.freedesktop.org/NetworkManager/NetworkManager.git
synced 2026-02-25 07:10:39 +01:00
ndisc: generate IPv6 routes with nexthops to avoid merging
Previously the NDisc code would generate multiple IPv6 default routes with the same metric. The kernel then would merge them into a single ECMP route, which is forbidden by RFCs as it breaks NUD and other use cases. When the kernel is managing IPv6 on the interface it is able to add multiple independent default routes because it sets the internal flag RTF_ADDRCONF on them, which prevents ECMP. This flag can't be set from userspace, and proposals to expose it were rejected: https://lore.kernel.org/netdev/20241105031841.10730-2-Matt.Muggeridge@hpe.com/ The only way to avoid the merge is to use the nexthop API. Now when there are multiple default routes, the NDisc uses a nexthop for them. The nexthop id is generated by hashing the route destination (::/0 for default routes), the interface name and the gateway, so that the id remains stable across restarts. In case of collisions with other nexthops created by NetworkManager, or with nexthops configured externally, a different id is chosen. To mitigate the chance of collisions with external nexthops, we only choose ids with the high bit set.
This commit is contained in:
parent
13f83149e1
commit
359762b2c9
2 changed files with 153 additions and 22 deletions
|
|
@ -18,6 +18,7 @@
|
|||
#include "nm-l3-config-data.h"
|
||||
#include "nm-l3cfg.h"
|
||||
#include "nm-ndisc-private.h"
|
||||
#include "nm-netns.h"
|
||||
#include "nm-setting-ip6-config.h"
|
||||
#include "nm-utils.h"
|
||||
|
||||
|
|
@ -105,6 +106,77 @@ NM_UTILS_LOOKUP_STR_DEFINE(nm_ndisc_dhcp_level_to_string,
|
|||
|
||||
/*****************************************************************************/
|
||||
|
||||
#define NEXTHOP_ID_RETRIES 400
|
||||
|
||||
static guint32
|
||||
nexthop_id_alloc(NMNDisc *ndisc,
|
||||
const struct in6_addr *dest,
|
||||
guint plen,
|
||||
const struct in6_addr *gateway)
|
||||
{
|
||||
NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc);
|
||||
NMNetns *netns = nm_l3cfg_get_netns(priv->config.l3cfg);
|
||||
const char *ifname = nm_l3cfg_get_ifname(priv->config.l3cfg, FALSE);
|
||||
NMPlatform *platform = nm_l3cfg_get_platform(priv->config.l3cfg);
|
||||
int ifindex = nm_l3cfg_get_ifindex(priv->config.l3cfg);
|
||||
nm_auto_nmpobj NMPObject *obj = NULL;
|
||||
CSipHash state;
|
||||
guint64 id64;
|
||||
guint32 id;
|
||||
guint i;
|
||||
|
||||
/* Determine a stable nexthop ID by hashing the interface name, the destination
|
||||
* and the gateway. We set the high bit to decrease the chance of collisions with
|
||||
* external (manually added) nexthops. */
|
||||
c_siphash_init(&state, NM_HASH_SEED_16_U64(725697701u));
|
||||
c_siphash_append(&state, (const uint8_t *) ifname, strlen(ifname) + 1);
|
||||
c_siphash_append(&state, (const uint8_t *) dest, sizeof(struct in6_addr));
|
||||
c_siphash_append(&state, (const uint8_t *) &plen, sizeof(guint));
|
||||
c_siphash_append(&state, (const uint8_t *) gateway, sizeof(struct in6_addr));
|
||||
id64 = c_siphash_finalize(&state);
|
||||
id = ((guint32) (id64 >> 32u)) | (1u << 31);
|
||||
|
||||
for (i = 0; i < NEXTHOP_ID_RETRIES; i++, id++) {
|
||||
if (i < NEXTHOP_ID_RETRIES * 3 / 4) {
|
||||
id |= (1u << 31);
|
||||
} else {
|
||||
/* After many collisions, start probing random ids */
|
||||
id = (guint32) nm_random_u64_range(1u << 31, G_MAXUINT32);
|
||||
}
|
||||
|
||||
if (nm_netns_nexthop_id_is_reserved(netns, id))
|
||||
continue;
|
||||
|
||||
if (nm_platform_ip_nexthop_get(platform, id, &obj)) {
|
||||
/* The id already exists in platform. We can reuse it only
|
||||
* if it's an IPv6 RA nexthop on the same interface. */
|
||||
if (NMP_OBJECT_GET_TYPE(obj) != NMP_OBJECT_TYPE_IP6_NEXTHOP
|
||||
|| obj->ip6_nexthop.nh_source != NM_IP_CONFIG_SOURCE_RTPROT_RA
|
||||
|| obj->ip6_nexthop.ifindex != ifindex)
|
||||
continue;
|
||||
}
|
||||
|
||||
nm_netns_nexthop_id_reserve(netns, id, ndisc);
|
||||
return id;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
_nexthop_id_release_one(NMNDisc *ndisc, guint32 nexthop_id)
|
||||
{
|
||||
NMNDiscPrivate *priv;
|
||||
|
||||
if (nexthop_id == 0)
|
||||
return;
|
||||
|
||||
priv = NM_NDISC_GET_PRIVATE(ndisc);
|
||||
nm_netns_nexthop_id_release(nm_l3cfg_get_netns(priv->config.l3cfg), nexthop_id);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
NML3ConfigData *
|
||||
nm_ndisc_data_to_l3cd(NMDedupMultiIndex *multi_idx,
|
||||
int ifindex,
|
||||
|
|
@ -172,9 +244,6 @@ nm_ndisc_data_to_l3cd(NMDedupMultiIndex *multi_idx,
|
|||
}
|
||||
|
||||
if (rdata->gateways_n > 0) {
|
||||
guint metric_offset = 0;
|
||||
NMIcmpv6RouterPref prev_pref = NM_ICMPV6_ROUTER_PREF_INVALID;
|
||||
|
||||
NMPlatformIP6Route r = {
|
||||
.rt_source = NM_IP_CONFIG_SOURCE_NDISC,
|
||||
.ifindex = ifindex,
|
||||
|
|
@ -185,24 +254,38 @@ nm_ndisc_data_to_l3cd(NMDedupMultiIndex *multi_idx,
|
|||
};
|
||||
|
||||
for (i = 0; i < rdata->gateways_n; i++) {
|
||||
/* If we add multiple default routes with the same metric and
|
||||
* different preferences, kernel merges them into a single ECMP
|
||||
* route, with overall preference equal to the preference of the
|
||||
* first route added. Therefore, the preference of individual routes
|
||||
* is not respected.
|
||||
* To avoid that, add routes with different metrics if they have
|
||||
* different preferences, so that they are not merged together. Here
|
||||
* the gateways are already ordered by increasing preference. */
|
||||
if (i != 0 && rdata->gateways[i].preference != prev_pref) {
|
||||
metric_offset++;
|
||||
}
|
||||
NMPlatformIP6NextHop nh;
|
||||
|
||||
prev_pref = rdata->gateways[i].preference;
|
||||
r.metric = metric_offset;
|
||||
r.gateway = rdata->gateways[i].address;
|
||||
r.rt_pref = rdata->gateways[i].preference;
|
||||
nm_assert((NMIcmpv6RouterPref) r.rt_pref == rdata->gateways[i].preference);
|
||||
|
||||
/* If we add multiple routes with the same destination (in this case, the
|
||||
* default route) and the same metric, the kernel merges them into a single
|
||||
* ECMP route, which is forbidden by RFCs as it breaks NUD and other use cases.
|
||||
* Use nexthop objects to avoid this merging behavior.
|
||||
*
|
||||
* We could use nexthops only when there are multiple default routes on this
|
||||
* interface. But that is not enough, because there can be multiple profiles
|
||||
* with the same ipv6.route-metric value, and their default routes would still
|
||||
* be merged. We need to always use nexthops.
|
||||
*/
|
||||
|
||||
if (rdata->gateways[i].nexthop_id == 0) {
|
||||
/* The nexthop id could not be reserved and we already emitted a warning */
|
||||
continue;
|
||||
}
|
||||
|
||||
nh = (NMPlatformIP6NextHop) {
|
||||
.ifindex = ifindex,
|
||||
.nh_source = NM_IP_CONFIG_SOURCE_NDISC,
|
||||
.gateway = rdata->gateways[i].address,
|
||||
.id = rdata->gateways[i].nexthop_id,
|
||||
};
|
||||
|
||||
r.nhid = nh.id;
|
||||
r.gateway = nh.gateway;
|
||||
nm_l3_config_data_add_route_6(l3cd, &r);
|
||||
nm_l3_config_data_add_nexthop(l3cd, AF_INET6, NULL, (const NMPlatformIPNextHop *) &nh);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -465,23 +548,38 @@ nm_ndisc_add_gateway(NMNDisc *ndisc, const NMNDiscGateway *new_item, gint64 now_
|
|||
{
|
||||
NMNDiscDataInternal *rdata = &NM_NDISC_GET_PRIVATE(ndisc)->rdata;
|
||||
guint i;
|
||||
guint insert_idx = G_MAXUINT;
|
||||
guint insert_idx = G_MAXUINT;
|
||||
guint32 old_nexthop_id = 0;
|
||||
NMNDiscGateway gw;
|
||||
|
||||
for (i = 0; i < rdata->gateways->len;) {
|
||||
NMNDiscGateway *item = &nm_g_array_index(rdata->gateways, NMNDiscGateway, i);
|
||||
|
||||
if (IN6_ARE_ADDR_EQUAL(&item->address, &new_item->address)) {
|
||||
if (new_item->expiry_msec <= now_msec) {
|
||||
_nexthop_id_release_one(ndisc, item->nexthop_id);
|
||||
g_array_remove_index(rdata->gateways, i);
|
||||
_ASSERT_data_gateways(rdata);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (item->preference != new_item->preference) {
|
||||
/* Preference changed: save the nexthop ID so that we can
|
||||
* reuse it when re-inserting at the correct position. */
|
||||
old_nexthop_id = item->nexthop_id;
|
||||
g_array_remove_index(rdata->gateways, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item->nexthop_id == 0) {
|
||||
/* We failed to allocate the nexthop id previously; retry. */
|
||||
item->nexthop_id = nexthop_id_alloc(ndisc, &in6addr_any, 0, &new_item->address);
|
||||
if (item->nexthop_id > 0) {
|
||||
item->expiry_msec = new_item->expiry_msec;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (item->expiry_msec == new_item->expiry_msec)
|
||||
return FALSE;
|
||||
|
||||
|
|
@ -505,9 +603,26 @@ nm_ndisc_add_gateway(NMNDisc *ndisc, const NMNDiscGateway *new_item, gint64 now_
|
|||
if (new_item->expiry_msec <= now_msec)
|
||||
return FALSE;
|
||||
|
||||
/* Make a copy of the gateway and assign a nexthop id, reusing the existing
|
||||
* one if possible */
|
||||
gw = *new_item;
|
||||
if (old_nexthop_id != 0) {
|
||||
gw.nexthop_id = old_nexthop_id;
|
||||
} else {
|
||||
gw.nexthop_id = nexthop_id_alloc(ndisc, &in6addr_any, 0, &new_item->address);
|
||||
if (gw.nexthop_id == 0) {
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
|
||||
_LOGW("failed to find a free nexthop id for gateway %s",
|
||||
nm_inet6_ntop(&new_item->address, buf));
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
g_array_insert_val(rdata->gateways,
|
||||
insert_idx == G_MAXUINT ? rdata->gateways->len : insert_idx,
|
||||
*new_item);
|
||||
gw);
|
||||
|
||||
_ASSERT_data_gateways(rdata);
|
||||
return TRUE;
|
||||
}
|
||||
|
|
@ -1330,6 +1445,9 @@ nm_ndisc_stop(NMNDisc *ndisc)
|
|||
|
||||
NM_NDISC_GET_CLASS(ndisc)->stop(ndisc);
|
||||
|
||||
/* Release all nexthop IDs reserved by this ndisc instance. */
|
||||
nm_netns_nexthop_id_release_all(nm_l3cfg_get_netns(priv->config.l3cfg), ndisc);
|
||||
|
||||
rdata = &priv->rdata;
|
||||
|
||||
g_array_set_size(rdata->gateways, 0);
|
||||
|
|
@ -1442,9 +1560,10 @@ _config_changed_log(NMNDisc *ndisc, NMNDiscConfigMap changed)
|
|||
for (i = 0; i < rdata->gateways->len; i++) {
|
||||
const NMNDiscGateway *gateway = &nm_g_array_index(rdata->gateways, NMNDiscGateway, i);
|
||||
|
||||
_LOGD(" gateway %s pref %s exp %s",
|
||||
_LOGD(" gateway %s pref %s nhid %u exp %s",
|
||||
nm_inet6_ntop(&gateway->address, addrstr),
|
||||
nm_icmpv6_router_pref_to_string(gateway->preference, str_pref, sizeof(str_pref)),
|
||||
gateway->nexthop_id,
|
||||
get_exp(str_exp, now_msec, gateway));
|
||||
}
|
||||
for (i = 0; i < rdata->addresses->len; i++) {
|
||||
|
|
@ -1521,8 +1640,11 @@ clean_gateways(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap *changed, gint6
|
|||
arr = &nm_g_array_first(rdata->gateways, NMNDiscGateway);
|
||||
|
||||
for (i = 0, j = 0; i < rdata->gateways->len; i++) {
|
||||
if (!expiry_next(now_msec, arr[i].expiry_msec, next_msec))
|
||||
if (!expiry_next(now_msec, arr[i].expiry_msec, next_msec)) {
|
||||
/* Gateway expired. Release its nexthop ID. */
|
||||
_nexthop_id_release_one(ndisc, arr[i].nexthop_id);
|
||||
continue;
|
||||
}
|
||||
if (i != j)
|
||||
arr[j] = arr[i];
|
||||
j++;
|
||||
|
|
@ -1533,8 +1655,14 @@ clean_gateways(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap *changed, gint6
|
|||
g_array_set_size(rdata->gateways, j);
|
||||
}
|
||||
|
||||
if (_array_set_size_max(rdata->gateways, _SIZE_MAX_GATEWAYS))
|
||||
if (rdata->gateways->len > _SIZE_MAX_GATEWAYS) {
|
||||
for (i = _SIZE_MAX_GATEWAYS; i < rdata->gateways->len; i++)
|
||||
_nexthop_id_release_one(
|
||||
ndisc,
|
||||
nm_g_array_index(rdata->gateways, NMNDiscGateway, i).nexthop_id);
|
||||
g_array_set_size(rdata->gateways, _SIZE_MAX_GATEWAYS);
|
||||
*changed |= NM_NDISC_CONFIG_GATEWAYS;
|
||||
}
|
||||
|
||||
_ASSERT_data_gateways(rdata);
|
||||
}
|
||||
|
|
@ -2065,6 +2193,8 @@ finalize(GObject *object)
|
|||
NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc);
|
||||
NMNDiscDataInternal *rdata = &priv->rdata;
|
||||
|
||||
nm_netns_nexthop_id_release_all(nm_l3cfg_get_netns(priv->config.l3cfg), ndisc);
|
||||
|
||||
g_array_unref(rdata->gateways);
|
||||
g_array_unref(rdata->addresses);
|
||||
g_array_unref(rdata->routes);
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ typedef struct _NMNDiscGateway {
|
|||
struct in6_addr address;
|
||||
gint64 expiry_msec;
|
||||
NMIcmpv6RouterPref preference;
|
||||
guint32 nexthop_id;
|
||||
} NMNDiscGateway;
|
||||
|
||||
typedef struct _NMNDiscAddress {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue