platform: resync route cache upon NLM_F_REPLACE flag

There really is no way around this. As we don't cache all the routes (e.g. ignored based on rtm_protocol or rtm_type), we cannot know which route was replaced, when we get a NLM_F_REPLACE message. We need to request a new dump in that case, which can be expensive, if there are a lot of routes or if replace happens frequently. The only possible solutions would be: 1) NetworkManager caches all routes, but it also needs to make sure to get *everything* right. In particular, to understand every relevant route attribute (including those added in the future, which is impossible). 2) kernel provides a reasonable API (rhbz#1337855, rhbz#1337860) that allows to sufficiently understand what is going on based on the netlink notifications.
2026-02-13 15:10:29 +01:00 · 2022-10-28 20:17:57 +02:00 · 2022-10-28 20:17:57 +02:00 · 6fc0dc3fcb
commit 6fc0dc3fcb
parent 4ec2123aa2
1 changed files with 37 additions and 14 deletions
--- a/src/libnm-platform/nmp-object.c
+++ b/src/libnm-platform/nmp-object.c
@ -2978,14 +2978,45 @@ nmp_cache_update_netlink_route(NMPCache         *cache,
    nm_assert(cache);
    nm_assert(NMP_OBJECT_IS_VALID(obj_hand_over));
    nm_assert(!NMP_OBJECT_IS_STACKINIT(obj_hand_over));
-    /* A link object from netlink must have the udev related fields unset.
-     * We could implement to handle that, but there is no need to support such
-     * a use-case */
    nm_assert(NM_IN_SET(NMP_OBJECT_GET_TYPE(obj_hand_over),
                        NMP_OBJECT_TYPE_IP4_ROUTE,
                        NMP_OBJECT_TYPE_IP6_ROUTE));
    nm_assert(nm_dedup_multi_index_obj_find(cache->multi_idx, obj_hand_over) != obj_hand_over);

+    if (NM_FLAGS_HAS(nlmsgflags, NLM_F_REPLACE)) {
+        /* This means, that the message indicates that another route was replaced.
+         * Since we don't cache all routes (see "route_is_alive"), we cannot know
+         * with certainty which route was replaced.
+         *
+         * Even if we would cache *all* routes (which we cannot, if kernel adds new
+         * routing features that modify the known nmp_object_id_equal()), it would
+         * be hard to find the right route that was replaced. Well, probably we
+         * would have to keep NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID sorted by order
+         * of notifications, which is hard. The code below actually makes an effort
+         * to do that, but it's not actually used, because we just resync.
+         *
+         * The only proper solution for this would be to improve kernel with [1]
+         * and [2].
+         *
+         * [1] https://bugzilla.redhat.com/show_bug.cgi?id=1337855
+         * [2] https://bugzilla.redhat.com/show_bug.cgi?id=1337860
+         *
+         * We need to resync.
+         */
+        if (NMP_OBJECT_GET_TYPE(obj_hand_over) == NMP_OBJECT_TYPE_IP4_ROUTE
+            && !nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
+            /* For IPv4, we can do a small optimization. We skip the resync, if we have
+             * no conflicting routes (by weak-id).
+             *
+             * This optimization does not work for IPv6 (maybe should be fixed).
+             */
+        } else {
+            entry_replace   = NULL;
+            resync_required = TRUE;
+            goto out;
+        }
+    }
+
    entry_old = _lookup_entry(cache, obj_hand_over);
    entry_new = NULL;

@ -3033,19 +3064,11 @@ update_done:
    if (is_dump)
        goto out;

-    if (!entry_new) {
-        if (NM_FLAGS_HAS(nlmsgflags, NLM_F_REPLACE)
-            && nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
-            /* hm. @obj_hand_over was not added, meaning it was not alive.
-             * However, we track some other objects with the same weak-id.
-             * It's unclear what that means. To be sure, resync. */
-            resync_required = TRUE;
-        }
+    if (!entry_new)
        goto out;
-    }

-    /* FIXME: for routes, we only maintain the order correctly for the BY_WEAK_ID
-     * index. For all other indexes their order becomes messed up. */
+    /* For routes, we only maintain the order correctly for the BY_WEAK_ID
+     * index. For all other indexes, their order is not preserved. */
    entry_cur =
        _lookup_entry_with_idx_type(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, entry_new->obj);
    if (!entry_cur) {