Thomas Haller 2022-02-16 09:59:56 +01:00
commit 1971e6a7bb
No known key found for this signature in database
GPG key ID: 29C2366E4DFC5728
3 changed files with 131 additions and 105 deletions

5
TODO
View file

@ -2,11 +2,6 @@ So you're interested in hacking on NetworkManager? Here's some cool
stuff you could do...
* IPv6 Multihop Routes in Platform
See src/libnm-platform/README.md
* Use netlink API instead of ioctl based ethtool.
NetworkManager uses ethtool API to set/obtain certain settings of network

View file

@ -20,59 +20,3 @@ This depends on the following helper libraries
- [../libnm-udev-aux/](../libnm-udev-aux/)
- [../libnm-log-core/](../libnm-log-core/)
- [../linux-headers/](../linux-headers/)
TODO and Bugs
=============
IPv6 Multi-hop Routes
---------------------
NMPlatform has a cache (dictionary) with netlink objects, which can also be augmented
with additional information like the WifiData or the udev device. A dictionary requires
that objects have an identity, which they can be compared and hash. In other words,
a set of properties that determines that the object is something distinctly recognizable.
Route routes and routing policy routes, from point of view of kernel there is not
a simple set of properties/attributes that determine the identity of the route/rule. Rather,
most attributes are part of the ID, but not all. See `NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID`
and `NM_PLATFORM_ROUTING_RULE_CMP_TYPE_ID`.
For routes, we currently ignore all multi-hop routes (ECMP). For IPv4, that is fine because
kernel also treats the next hops (of any number) to be the part of the ID of a route. For
example, you can see in `ip route` two IPv4 routes that only differ by their next-hops.
As NetworkManager currently doesn't configure multi-hop routes, ignoring those routes and
not caching them is no problem.
For IPv6 routes that is different. When you add two IPv6 routes that only differ by their
next hops, then kernel will merge them into a multi-hop route (as you can see in `ip -6 route`).
Likewise, if you remove a (single or multi-hop) route, then kernel will "subtract" those
hops from the multi-hop route. In a way, kernel always mangles the result into a multi-hop route.
If you logically consider the hops of an IPv6 part of the identity of a route, then adding a route,
can create a new (because distinct as by their ID) route while removing the previously existing route
(without sending a RTM_DELROUTE message). As NetworkManager currently ignores all multi-hop routes,
this easily leads to an inconsistent cache, because NetworkManager does not understand that the
addition/removal of an IPv6 route, interferes with an entirely different route (from point of view of
the identity).
So you could say the problem is that the ID of a route changes (by merging the next hops). But that
makes no sense, because the ID identifies the route, it cannot change without creating a different
route. So the alternative to see this problem is that adding a route can create a different route
and deleting the previous one, but there are not sufficient netlink events to understand which
route got mangled (short of searching the cache). But also, the RTM_NEWROUTE command no longer
necessarily results in the addition of the route we requested and a RTM_DELROUTE event does
not necessarily notify about the route that was removed (rather, it notifies about the part
that got subtracted).
Another way to see kernel's bogus behavior is to pretend that there are only single-hop routes.
That makes everything simple, the only speciality is that a RTM_NEWROUTE now can contain
(with this point of view of the identity) multiple routes, one for each hop.
To solve the problem of platform cache inconsistencies for IPv6 routes, NetworkManager should
only honor IPv6 single-path routes, but with the twist that one RTM_NEWROUTE can announce multiple
routes at once.
This alternative view that we should implement is possibly a deviation from kernel's view.
Usually we avoid modelling things differently than kernel, but in this case it makes more
sense as this is more how it appears on the netlink API (based on the events that we get).
See also: https://bugzilla.redhat.com/show_bug.cgi?id=1837254#c20

View file

@ -1210,6 +1210,21 @@ _linktype_get_type(NMPlatform *platform,
* libnl unility functions and wrappers
******************************************************************/
typedef struct {
/* The first time, we are called with "iter_more" false. If there is only
* one message to parse, the callee can leave this at false and be done
* (meaning, it can just ignore the potential parsing of multiple messages).
* If there are multiple message, then set this to TRUE. We will continue
* the parsing as long as this flag stays TRUE and an object gets returned. */
bool iter_more;
union {
struct {
guint next_multihop;
} ip6_route;
};
} ParseNlmsgIter;
#define NLMSG_TAIL(nmsg) ((struct rtattr *) (((char *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
/* copied from iproute2's addattr_l(). */
@ -3374,7 +3389,7 @@ _new_from_nl_addr(struct nlmsghdr *nlh, gboolean id_only)
/* Copied and heavily modified from libnl3's rtnl_route_parse() and parse_multipath(). */
static NMPObject *
_new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only)
_new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter *parse_nlmsg_iter)
{
static const struct nla_policy policy[] = {
[RTA_TABLE] = {.type = NLA_U32},
@ -3387,17 +3402,21 @@ _new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only)
[RTA_METRICS] = {.type = NLA_NESTED},
[RTA_MULTIPATH] = {.type = NLA_NESTED},
};
guint multihop_idx;
const struct rtmsg *rtm;
struct nlattr *tb[G_N_ELEMENTS(policy)];
int addr_family;
gboolean IS_IPv4;
nm_auto_nmpobj NMPObject *obj = NULL;
int addr_len;
struct {
gboolean is_present;
gboolean found;
gboolean has_more;
int ifindex;
NMIPAddr gateway;
} nh = {
.is_present = FALSE,
.found = FALSE,
.has_more = FALSE,
};
guint32 mss;
guint32 window = 0;
@ -3407,6 +3426,10 @@ _new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only)
guint32 mtu = 0;
guint32 lock = 0;
nm_assert((parse_nlmsg_iter->iter_more && parse_nlmsg_iter->ip6_route.next_multihop > 0)
|| (!parse_nlmsg_iter->iter_more && parse_nlmsg_iter->ip6_route.next_multihop == 0));
multihop_idx = parse_nlmsg_iter->ip6_route.next_multihop;
if (!nlmsg_valid_hdr(nlh, sizeof(*rtm)))
return NULL;
@ -3416,9 +3439,11 @@ _new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only)
* only handle ~supported~ routes.
*****************************************************************/
if (rtm->rtm_family == AF_INET)
addr_family = rtm->rtm_family;
if (addr_family == AF_INET)
IS_IPv4 = TRUE;
else if (rtm->rtm_family == AF_INET6)
else if (addr_family == AF_INET6)
IS_IPv4 = FALSE;
else
return NULL;
@ -3436,57 +3461,76 @@ _new_from_nl_route(struct nlmsghdr *nlh, gboolean id_only)
/*****************************************************************/
addr_len = IS_IPv4 ? sizeof(in_addr_t) : sizeof(struct in6_addr);
addr_len = nm_utils_addr_family_to_size(addr_family);
if (rtm->rtm_dst_len > (IS_IPv4 ? 32 : 128))
return NULL;
/*****************************************************************
* parse nexthops. Only handle routes with one nh.
*****************************************************************/
if (tb[RTA_MULTIPATH]) {
size_t tlen = nla_len(tb[RTA_MULTIPATH]);
size_t tlen;
struct rtnexthop *rtnh;
guint idx;
tlen = nla_len(tb[RTA_MULTIPATH]);
if (tlen < sizeof(*rtnh))
goto rta_multipath_done;
rtnh = nla_data_as(struct rtnexthop, tb[RTA_MULTIPATH]);
if (tlen < rtnh->rtnh_len)
goto rta_multipath_done;
idx = 0;
while (TRUE) {
if (nh.is_present) {
/* we don't support multipath routes. */
return NULL;
}
if (idx == multihop_idx) {
nh.found = TRUE;
nh.ifindex = rtnh->rtnh_ifindex;
if (rtnh->rtnh_len > sizeof(*rtnh)) {
struct nlattr *ntb[RTA_MAX + 1];
nh.is_present = TRUE;
nh.ifindex = rtnh->rtnh_ifindex;
if (nla_parse_arr(ntb,
(struct nlattr *) RTNH_DATA(rtnh),
rtnh->rtnh_len - sizeof(*rtnh),
NULL)
< 0)
return NULL;
if (rtnh->rtnh_len > sizeof(*rtnh)) {
struct nlattr *ntb[G_N_ELEMENTS(policy)];
if (nla_parse_arr(ntb,
(struct nlattr *) RTNH_DATA(rtnh),
rtnh->rtnh_len - sizeof(*rtnh),
policy)
< 0)
if (_check_addr_or_return_null(ntb, RTA_GATEWAY, addr_len))
memcpy(&nh.gateway, nla_data(ntb[RTA_GATEWAY]), addr_len);
}
} else if (nh.found) {
/* we just parsed a nexthop, but there is yet another hop afterwards. */
nm_assert(idx == multihop_idx + 1);
if (IS_IPv4) {
/* for IPv4, multihop routes are currently not supported.
*
* If we ever support them, then the next-hop list is part of the NMPlatformIPRoute,
* that is, for IPv4 we truly have multihop routes. Unlike for IPv6.
*
* For now, just error out. */
return NULL;
}
if (_check_addr_or_return_null(ntb, RTA_GATEWAY, addr_len))
memcpy(&nh.gateway, nla_data(ntb[RTA_GATEWAY]), addr_len);
/* For IPv6 multihop routes, we need to remember to iterate again.
* For each next-hop, we will create a distinct single-hop NMPlatformIP6Route. */
nh.has_more = TRUE;
break;
}
if (tlen < RTNH_ALIGN(rtnh->rtnh_len) + sizeof(*rtnh))
goto rta_multipath_done;
break;
tlen -= RTNH_ALIGN(rtnh->rtnh_len);
rtnh = RTNH_NEXT(rtnh);
idx++;
}
rta_multipath_done:;
}
rta_multipath_done:
if (!nh.found && multihop_idx > 0) {
/* something is wrong. We are called back to collect multi_idx, but the index
* is not there. We messed up the book keeping. */
return nm_assert_unreachable_val(NULL);
}
if (tb[RTA_OIF] || tb[RTA_GATEWAY] || tb[RTA_FLOW]) {
@ -3498,18 +3542,21 @@ rta_multipath_done:;
if (_check_addr_or_return_null(tb, RTA_GATEWAY, addr_len))
memcpy(&gateway, nla_data(tb[RTA_GATEWAY]), addr_len);
if (!nh.is_present) {
if (!nh.found) {
/* If no nexthops have been provided via RTA_MULTIPATH
* we add it as regular nexthop to maintain backwards
* compatibility */
nh.ifindex = ifindex;
nh.gateway = gateway;
nh.is_present = TRUE;
nh.ifindex = ifindex;
nh.gateway = gateway;
nh.found = TRUE;
} else {
/* Kernel supports new style nexthop configuration,
* verify that it is a duplicate and ignore old-style nexthop. */
if (nh.ifindex != ifindex || memcmp(&nh.gateway, &gateway, addr_len) != 0)
if (nh.ifindex != ifindex || memcmp(&nh.gateway, &gateway, addr_len) != 0) {
/* we have a RTA_MULTIPATH attribute that does not agree.
* That seems not right. Error out. */
return NULL;
}
}
}
@ -3518,7 +3565,7 @@ rta_multipath_done:;
*
* Well, actually, for IPv6 kernel will always say that the device is
* 1 (lo). Of course it does!! */
if (nh.is_present) {
if (nh.found) {
if (IS_IPv4) {
if (nh.ifindex != 0 || nh.gateway.addr4 != 0) {
/* we only accept kernel to notify about the ifindex/gateway, if it
@ -3536,7 +3583,7 @@ rta_multipath_done:;
}
}
} else {
if (!nh.is_present) {
if (!nh.found) {
/* a "normal" route needs a device. This is not the route we are looking for. */
return NULL;
}
@ -3639,6 +3686,12 @@ rta_multipath_done:;
obj->ip_route.r_rtm_flags = rtm->rtm_flags;
obj->ip_route.rt_source = nmp_utils_ip_config_source_from_rtprot(rtm->rtm_protocol);
if (nh.has_more) {
parse_nlmsg_iter->iter_more = TRUE;
parse_nlmsg_iter->ip6_route.next_multihop = multihop_idx + 1;
} else
parse_nlmsg_iter->iter_more = FALSE;
return g_steal_pointer(&obj);
}
@ -4080,7 +4133,8 @@ static NMPObject *
nmp_object_new_from_nl(NMPlatform *platform,
const NMPCache *cache,
struct nl_msg *msg,
gboolean id_only)
gboolean id_only,
ParseNlmsgIter *parse_nlmsg_iter)
{
struct nlmsghdr *msghdr;
@ -4102,7 +4156,7 @@ nmp_object_new_from_nl(NMPlatform *platform,
case RTM_NEWROUTE:
case RTM_DELROUTE:
case RTM_GETROUTE:
return _new_from_nl_route(msghdr, id_only);
return _new_from_nl_route(msghdr, id_only, parse_nlmsg_iter);
case RTM_NEWRULE:
case RTM_DELRULE:
case RTM_GETRULE:
@ -6977,12 +7031,13 @@ event_valid_msg(NMPlatform *platform, struct nl_msg *msg, gboolean handle_events
gboolean is_del = FALSE;
gboolean is_dump = FALSE;
NMPCache *cache = nm_platform_get_cache(platform);
msghdr = nlmsg_hdr(msg);
ParseNlmsgIter parse_nlmsg_iter;
if (!handle_events)
return;
msghdr = nlmsg_hdr(msg);
if (NM_IN_SET(msghdr->nlmsg_type,
RTM_DELLINK,
RTM_DELADDR,
@ -6995,7 +7050,11 @@ event_valid_msg(NMPlatform *platform, struct nl_msg *msg, gboolean handle_events
is_del = TRUE;
}
obj = nmp_object_new_from_nl(platform, cache, msg, is_del);
parse_nlmsg_iter = (ParseNlmsgIter){
.iter_more = FALSE,
};
obj = nmp_object_new_from_nl(platform, cache, msg, is_del, &parse_nlmsg_iter);
if (!obj) {
_LOGT("event-notification: %s: ignore",
nl_nlmsghdr_to_str(msghdr, buf_nlmsghdr, sizeof(buf_nlmsghdr)));
@ -7023,7 +7082,7 @@ event_valid_msg(NMPlatform *platform, struct nl_msg *msg, gboolean handle_events
NULL,
0));
{
while (TRUE) {
nm_auto_nmpobj const NMPObject *obj_old = NULL;
nm_auto_nmpobj const NMPObject *obj_new = NULL;
@ -7085,7 +7144,10 @@ event_valid_msg(NMPlatform *platform, struct nl_msg *msg, gboolean handle_events
*
* This is to help with the performance overhead of a huge number of
* routes, for example with the bird BGP software, that adds routes
* with RTPROT_BIRD protocol. */
* with RTPROT_BIRD protocol.
*
* Even if this is a IPv6 multipath route, we abort (parse_nlmsg_iter). There
* is nothing for us to do. */
return;
}
@ -7158,6 +7220,31 @@ event_valid_msg(NMPlatform *platform, struct nl_msg *msg, gboolean handle_events
default:
break;
}
if (!parse_nlmsg_iter.iter_more) {
/* we are done. */
return;
}
/* There is a special case here. For IPv6 routes, kernel will merge/mangle routes
* that only differ by their next-hop, and pretend they are multi-hop routes. We
* untangle them, and pretend there are only single-hop routes. Hence, one RTM_{NEW,DEL}ROUTE
* message might be about multiple IPv6 routes (NMPObject). So, now let's parse the next... */
nm_assert(NM_IN_SET(msghdr->nlmsg_type, RTM_NEWROUTE, RTM_DELROUTE));
nm_clear_pointer(&obj, nmp_object_unref);
obj = nmp_object_new_from_nl(platform, cache, msg, is_del, &parse_nlmsg_iter);
if (!obj) {
/* we are done. Usually we don't expect this, because we were told that
* there would be another object to collect, but there isn't one. Something
* unusual happened.
*
* the only reason why this actually could happen is if the next-hop data
* is invalid -- we didn't verify that it would be valid when we set iter_more. */
return;
}
}
}