diff --git a/NEWS b/NEWS index 6420f894c3..ef0cee8f69 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,8 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE! * Fix detection of 6 GHz band capability for WiFi devices * Allow IPv6 SLAAC and static IPv6 DNS server assignment for modem broadband when IPv6 device address was not explicitly passed on by ModemManager +* Fix a performance issue that was leading to 100% CPU usage by NetworkManager + if external programs were doing a big amount of routes updates. ============================================= NetworkManager-1.46 diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 9ecac2d9b3..5b595a9b71 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -3903,6 +3903,34 @@ _new_from_nl_addr(const struct nlmsghdr *nlh, gboolean id_only) return g_steal_pointer(&obj); } +static gboolean +ip_route_is_tracked(guint8 proto, guint8 type) +{ + if (proto > RTPROT_STATIC && !NM_IN_SET(proto, RTPROT_DHCP, RTPROT_RA)) { + /* We ignore certain rtm_protocol, because NetworkManager would only ever + * configure certain protocols. Other routes are not configured by NetworkManager + * and we don't track them in the platform cache. + * + * This is to help with the performance overhead of a huge number of + * routes, for example with the bird BGP software, that adds routes + * with RTPROT_BIRD protocol. */ + return FALSE; + } + + if (!NM_IN_SET(type, + RTN_UNICAST, + RTN_LOCAL, + RTN_BLACKHOLE, + RTN_UNREACHABLE, + RTN_PROHIBIT, + RTN_THROW)) { + /* Certain route types are ignored and not placed into the cache. */ + return FALSE; + } + + return TRUE; +} + /* Copied and heavily modified from libnl3's rtnl_route_parse() and parse_multipath(). */ static NMPObject * _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter *parse_nlmsg_iter) @@ -3963,6 +3991,16 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter * only handle ~supported~ routes. *****************************************************************/ + /* If it's a route that we don't need to track, abort here to avoid unnecessary + * memory allocations to create the nmp_object. However, if the message has the + * NLM_F_REPLACE flag, it might be replacing a route that we were tracking so we + * have to stop tracking it. That means that we have to process all messages with + * NLM_F_REPLACE. See nmp_cache_update_netlink_route(). + */ + if (!ip_route_is_tracked(rtm->rtm_protocol, rtm->rtm_type) + && !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return NULL; + addr_family = rtm->rtm_family; if (addr_family == AF_INET) @@ -5519,39 +5557,18 @@ ip_route_get_lock_flag(const NMPlatformIPRoute *route) static gboolean ip_route_is_alive(const NMPlatformIPRoute *route) { - guint8 prot; + guint8 proto, type; nm_assert(route); nm_assert(route->rt_source >= NM_IP_CONFIG_SOURCE_RTPROT_UNSPEC && route->rt_source <= _NM_IP_CONFIG_SOURCE_RTPROT_LAST); - prot = route->rt_source - 1; + proto = route->rt_source - 1; + type = nm_platform_route_type_uncoerce(route->type_coerced); - nm_assert(nmp_utils_ip_config_source_from_rtprot(prot) == route->rt_source); + nm_assert(nmp_utils_ip_config_source_from_rtprot(proto) == route->rt_source); - if (prot > RTPROT_STATIC && !NM_IN_SET(prot, RTPROT_DHCP, RTPROT_RA)) { - /* We ignore certain rtm_protocol, because NetworkManager would only ever - * configure certain protocols. Other routes are not configured by NetworkManager - * and we don't track them in the platform cache. - * - * This is to help with the performance overhead of a huge number of - * routes, for example with the bird BGP software, that adds routes - * with RTPROT_BIRD protocol. */ - return FALSE; - } - - if (!NM_IN_SET(nm_platform_route_type_uncoerce(route->type_coerced), - RTN_UNICAST, - RTN_LOCAL, - RTN_BLACKHOLE, - RTN_UNREACHABLE, - RTN_PROHIBIT, - RTN_THROW)) { - /* Certain route types are ignored and not placed into the cache. */ - return FALSE; - } - - return TRUE; + return ip_route_is_tracked(proto, type); } /* Copied and modified from libnl3's build_route_msg() and rtnl_route_build_msg(). */ diff --git a/src/libnm-platform/nmp-object.c b/src/libnm-platform/nmp-object.c index 4090da71a3..cb4e9764d1 100644 --- a/src/libnm-platform/nmp-object.c +++ b/src/libnm-platform/nmp-object.c @@ -2988,6 +2988,13 @@ nmp_cache_update_netlink_route(NMPCache *cache, * Since we don't cache all routes (see "route_is_alive"), we cannot know * with certainty which route was replaced. * + * For example, the kernel might have 3 similar routes (same WEAK_ID), one + * of which is not tracked by us so we don't have it into the cache. If we + * receive a route replace message, we don't know to what of the 3 routes + * it affects (one of the 3 we don't even know that exists). Moreover, if + * we only have one route on cache, we don't know if the replace is for a + * different one that we don't track. + * * Even if we would cache *all* routes (which we cannot, if kernel adds new * routing features that modify the known nmp_object_id_equal()), it would * be hard to find the right route that was replaced. Well, probably we @@ -3002,15 +3009,14 @@ nmp_cache_update_netlink_route(NMPCache *cache, * [2] https://bugzilla.redhat.com/show_bug.cgi?id=1337860 * * We need to resync. + * + * However, a resync is expensive. Think of a routing daemon that updates + * hundreds of routes per second, the performance penalty is huge. We can + * optimize it: if we don't have any matching route on cache (by WEAK_ID), + * we don't have anything to replace and we don't need a full resync, but + * only to add or discard the new route as usual. */ - if (NMP_OBJECT_GET_TYPE(obj_hand_over) == NMP_OBJECT_TYPE_IP4_ROUTE - && !nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) { - /* For IPv4, we can do a small optimization. We skip the resync, if we have - * no conflicting routes (by weak-id). - * - * This optimization does not work for IPv6 (maybe should be fixed). - */ - } else { + if (nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) { entry_replace = NULL; resync_required = TRUE; goto out;