From c0ac920f9c3acf6b7e1112e327b1d551b8a592b8 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Thu, 4 Jul 2024 10:52:32 +0200 Subject: [PATCH 1/5] platform: introduce array of tracked protocols Introduce an array of tracked route protocols that will be used in the next commit. To have the list of protocols defined in a single place, define a macro. --- src/libnm-platform/nm-linux-platform.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 8836f3d70b..39093659c5 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -3908,10 +3908,16 @@ _new_from_nl_addr(const struct nlmsghdr *nlh, gboolean id_only) return g_steal_pointer(&obj); } +#define IP_ROUTE_TRACKED_PROTOCOLS \ + RTPROT_UNSPEC, RTPROT_REDIRECT, RTPROT_KERNEL, RTPROT_BOOT, RTPROT_STATIC, RTPROT_RA, \ + RTPROT_DHCP + +_nm_unused static const guint8 ip_route_tracked_protocols[] = {IP_ROUTE_TRACKED_PROTOCOLS}; + static gboolean ip_route_is_tracked(guint8 proto, guint8 type) { - if (proto > RTPROT_STATIC && !NM_IN_SET(proto, RTPROT_DHCP, RTPROT_RA)) { + if (!NM_IN_SET(proto, IP_ROUTE_TRACKED_PROTOCOLS)) { /* We ignore certain rtm_protocol, because NetworkManager would only ever * configure certain protocols. Other routes are not configured by NetworkManager * and we don't track them in the platform cache. From f6411ed941c569057a6c092a3b16d9246c1a5acf Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Wed, 26 Jun 2024 16:44:16 +0200 Subject: [PATCH 2/5] platform: dump only selected route protocols When doing a dump of routes, we want to exclude routes having protocols we do not care about. Since the netlink socket has STRICT_CHK enabled, we can request multiple dumps for the protocols we need. While doing 6 dumps is less efficient than doing 1, it normally doesn't matter. However, the new implementation is more efficient when there are e.g. millions of BGP routes that can be excluded from the results. --- src/libnm-platform/nm-linux-platform.c | 93 +++++++++++++++++++------- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 39093659c5..615589742b 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -3912,7 +3912,7 @@ _new_from_nl_addr(const struct nlmsghdr *nlh, gboolean id_only) RTPROT_UNSPEC, RTPROT_REDIRECT, RTPROT_KERNEL, RTPROT_BOOT, RTPROT_STATIC, RTPROT_RA, \ RTPROT_DHCP -_nm_unused static const guint8 ip_route_tracked_protocols[] = {IP_ROUTE_TRACKED_PROTOCOLS}; +static const guint8 ip_route_tracked_protocols[] = {IP_ROUTE_TRACKED_PROTOCOLS}; static gboolean ip_route_is_tracked(guint8 proto, guint8 type) @@ -7905,13 +7905,11 @@ do_request_all_no_delayed_actions(NMPlatform *platform, DelayedActionType action FOR_EACH_DELAYED_ACTION (iflags, action_type) { RefreshAllType refresh_all_type = delayed_action_type_to_refresh_all_type(iflags); const RefreshAllInfo *refresh_all_info = refresh_all_type_get_info(refresh_all_type); - nm_auto_nlmsg struct nl_msg *nlmsg = NULL; - int *out_refresh_all_in_progress; + int *out_refresh_all_in_progress; out_refresh_all_in_progress = &priv->delayed_action.refresh_all_in_progress[refresh_all_type]; nm_assert(*out_refresh_all_in_progress >= 0); - *out_refresh_all_in_progress += 1; /* clear any delayed action that request a refresh of this object type. */ priv->delayed_action.flags &= ~iflags; @@ -7930,29 +7928,78 @@ do_request_all_no_delayed_actions(NMPlatform *platform, DelayedActionType action } } - event_handler_read_netlink(platform, refresh_all_info->protocol, FALSE); + /* Routes are handled specially because we want to request only routes + * for protocols we track. The reason is that there might be millions of + * BGP routes we don't track and it would be very inefficient to dump them + * all. Therefore, perform separate dumps, each for a specific protocol we + * track. */ + if (NM_IN_SET(refresh_all_type, + REFRESH_ALL_TYPE_RTNL_IP4_ROUTES, + REFRESH_ALL_TYPE_RTNL_IP6_ROUTES)) { + struct rtmsg rtm = { + .rtm_family = refresh_all_info->addr_family_for_dump, + }; + guint i; - if (refresh_all_info->protocol == NMP_NETLINK_ROUTE) { - nlmsg = _nl_msg_new_dump_rtnl(refresh_all_info->obj_type, - refresh_all_info->addr_family_for_dump); + for (i = 0; i < G_N_ELEMENTS(ip_route_tracked_protocols); i++) { + nm_auto_nlmsg struct nl_msg *nlmsg = NULL; + + /* If we try to request a new dump while the previous is still + * in progress, kernel returns -EBUSY. Complete the previous + * dump by reading from the socket. */ + event_handler_read_netlink(platform, refresh_all_info->protocol, FALSE); + + nlmsg = nlmsg_alloc_new(0, RTM_GETROUTE, NLM_F_DUMP); + if (!nlmsg) + goto next_after_fail; + + rtm.rtm_protocol = ip_route_tracked_protocols[i]; + + if (nlmsg_append_struct(nlmsg, &rtm) < 0) + g_return_if_fail(FALSE); + + *out_refresh_all_in_progress += 1; + + if (_netlink_send_nlmsg(platform, + refresh_all_info->protocol, + nlmsg, + NULL, + NULL, + DELAYED_ACTION_RESPONSE_TYPE_REFRESH_ALL_IN_PROGRESS, + out_refresh_all_in_progress) + < 0) { + *out_refresh_all_in_progress -= 1; + /* Try to refresh other route protocols ... */ + } + } } else { - nm_assert(refresh_all_type == REFRESH_ALL_TYPE_GENL_FAMILIES); - nlmsg = _nl_msg_new_dump_genl_families(); + nm_auto_nlmsg struct nl_msg *nlmsg = NULL; + + *out_refresh_all_in_progress += 1; + event_handler_read_netlink(platform, refresh_all_info->protocol, FALSE); + + if (refresh_all_info->protocol == NMP_NETLINK_ROUTE) { + nlmsg = _nl_msg_new_dump_rtnl(refresh_all_info->obj_type, + refresh_all_info->addr_family_for_dump); + } else { + nm_assert(refresh_all_type == REFRESH_ALL_TYPE_GENL_FAMILIES); + nlmsg = _nl_msg_new_dump_genl_families(); + } + + if (!nlmsg) + goto next_after_fail; + + if (_netlink_send_nlmsg(platform, + refresh_all_info->protocol, + nlmsg, + NULL, + NULL, + DELAYED_ACTION_RESPONSE_TYPE_REFRESH_ALL_IN_PROGRESS, + out_refresh_all_in_progress) + < 0) + goto next_after_fail; } - if (!nlmsg) - goto next_after_fail; - - if (_netlink_send_nlmsg(platform, - refresh_all_info->protocol, - nlmsg, - NULL, - NULL, - DELAYED_ACTION_RESPONSE_TYPE_REFRESH_ALL_IN_PROGRESS, - out_refresh_all_in_progress) - < 0) - goto next_after_fail; - continue; next_after_fail: nm_assert(*out_refresh_all_in_progress > 0); From b2635d3461284f81b3ac3d8ac8347a73fa82c541 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Thu, 4 Jul 2024 15:24:07 +0200 Subject: [PATCH 3/5] platform: assert that we only generate route message of tracked proto --- src/libnm-platform/nm-linux-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 615589742b..e5b6ccb9f6 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -5610,6 +5610,7 @@ _nl_msg_new_route(uint16_t nlmsg_type, uint16_t nlmsg_flags, const NMPObject *ob nm_assert( NM_IN_SET(NMP_OBJECT_GET_TYPE(obj), NMP_OBJECT_TYPE_IP4_ROUTE, NMP_OBJECT_TYPE_IP6_ROUTE)); nm_assert(NM_IN_SET(nlmsg_type, RTM_NEWROUTE, RTM_DELROUTE)); + nm_assert(NM_IN_SET(rtmsg.rtm_protocol, IP_ROUTE_TRACKED_PROTOCOLS)); if (NM_FLAGS_HAS(obj->ip_route.r_rtm_flags, ((unsigned) (RTNH_F_ONLINK)))) { if (IS_IPv4 && obj->ip4_route.gateway == 0) { From 7efab8baeb71ba312db861563cd894aa13ee8f6f Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 29 Jul 2024 14:21:00 +0200 Subject: [PATCH 4/5] platform: add a retry mechanism in case route dump fails In case the platform fails dumping a specific route protocol, retry multiple times. If all attempts fail, emit a warning and proceed as there is nothing more to do. --- src/libnm-platform/nm-linux-platform.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index e5b6ccb9f6..29bf60ca47 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -7940,11 +7940,17 @@ do_request_all_no_delayed_actions(NMPlatform *platform, DelayedActionType action struct rtmsg rtm = { .rtm_family = refresh_all_info->addr_family_for_dump, }; + guint retry_count = 0; guint i; for (i = 0; i < G_N_ELEMENTS(ip_route_tracked_protocols); i++) { nm_auto_nlmsg struct nl_msg *nlmsg = NULL; + if (retry_count > 0) { + /* Try again previous protocol */ + i--; + } + /* If we try to request a new dump while the previous is still * in progress, kernel returns -EBUSY. Complete the previous * dump by reading from the socket. */ @@ -7970,7 +7976,17 @@ do_request_all_no_delayed_actions(NMPlatform *platform, DelayedActionType action out_refresh_all_in_progress) < 0) { *out_refresh_all_in_progress -= 1; - /* Try to refresh other route protocols ... */ + retry_count++; + if (retry_count > 4) { + _LOGW("failed dumping IPv%c routes with protocol %u, cache might be " + "inconsistent", + nm_utils_addr_family_to_char(rtm.rtm_family), + rtm.rtm_protocol); + retry_count = 0; + /* Give up and try the next protocol */ + } + } else { + retry_count = 0; } } } else { From d219277f1aaa97442fe1a9e1c75e6dcfe2557f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Mon, 5 Aug 2024 12:45:15 +0200 Subject: [PATCH 5/5] platform: log routes dump failure as error --- src/libnm-platform/nm-linux-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 29bf60ca47..36d200953a 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -7978,7 +7978,7 @@ do_request_all_no_delayed_actions(NMPlatform *platform, DelayedActionType action *out_refresh_all_in_progress -= 1; retry_count++; if (retry_count > 4) { - _LOGW("failed dumping IPv%c routes with protocol %u, cache might be " + _LOGE("failed dumping IPv%c routes with protocol %u, cache might be " "inconsistent", nm_utils_addr_family_to_char(rtm.rtm_family), rtm.rtm_protocol);