From 623012c14a192f0a4f7fe98e26224ff2055b7650 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 20 Nov 2023 12:09:13 +0100 Subject: [PATCH] core: rate-limit updates to IP addresses/routes on D-Bus API It doesn't scale. If you add 100k routes one-by-one, then upon each change from platform, we will send the growing list of the routes on D-Bus. That is too expensive. Especially, if you imagine that the receiving end is a NMClient instance. There is a D-Bus worker thread that queues the received GVariant messages, while the main thread may not be able to process them fast enough. In that case, the memory keeps growing very fast and due to fragmentation it is not freed. Instead, rate limit updates to 3 per second. Note that the receive buffer of the netlink socket can fill up and we loose messages. Therefore, already on the lowest level, we may miss addresses/routes. Next, on top of NMPlatform, NMIPConfig listens to NM_L3_CONFIG_NOTIFY_TYPE_PLATFORM_CHANGE_ON_IDLE events. Thereby it further will miss intermediate state (e.g. a route that exists only for a short moment). Now adding another delay and rate limiting on top of that, does not make that fundamentally different, we anyway didn't get all intermediate states from netlink. We may miss addresses/routes that only exist for a short amount of time. This makes "the problem" worse, but not fundamentally new. We can only get a (correct) settled state, after all events are processed. And we never know, whether there isn't the next event just waiting to be received. Rate limiting is important to not overwhelm D-Bus clients. In reality, none of the users really need this information, because it's also incomplete. Users who really need to know addresses/routes should use netlink or find another way (a way that scales and where they explicitly request this information). --- src/core/nm-ip-config.c | 75 ++++++++++++++++++++++++++++++++++++++++- src/core/nm-ip-config.h | 5 ++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/src/core/nm-ip-config.c b/src/core/nm-ip-config.c index bec67d9a46..48f59bd427 100644 --- a/src/core/nm-ip-config.c +++ b/src/core/nm-ip-config.c @@ -16,6 +16,10 @@ /*****************************************************************************/ +#define NOTIFY_PLATFORM_RATELIMIT_MSEC 333 + +/*****************************************************************************/ + GType nm_ip4_config_get_type(void); GType nm_ip6_config_get_type(void); @@ -67,6 +71,73 @@ _value_set_variant_as(GValue *value, const char *const *strv, guint len) /*****************************************************************************/ +static void +_notify_platform_handle(NMIPConfig *self, gint64 now_msec) +{ + NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self); + guint32 obj_type_flags; + + nm_clear_g_source_inst(&priv->notify_platform_timeout_source); + + priv->notify_platform_rlimited_until_msec = now_msec + NOTIFY_PLATFORM_RATELIMIT_MSEC; + + obj_type_flags = nm_steal_int(&priv->notify_platform_obj_type_flags); + + nm_assert(obj_type_flags != 0u); + + _handle_platform_change(self, obj_type_flags, FALSE); +} + +static gboolean +_notify_platform_cb(gpointer user_data) +{ + _notify_platform_handle(user_data, nm_utils_get_monotonic_timestamp_msec()); + return G_SOURCE_CONTINUE; +} + +static void +_notify_platform(NMIPConfig *self, guint32 obj_type_flags) +{ + const int addr_family = nm_ip_config_get_addr_family(self); + const int IS_IPv4 = NM_IS_IPv4(addr_family); + NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self); + gint64 now_msec; + + obj_type_flags &= (nmp_object_type_to_flags(NMP_OBJECT_TYPE_IP_ADDRESS(IS_IPv4)) + | nmp_object_type_to_flags(NMP_OBJECT_TYPE_IP_ROUTE(IS_IPv4))); + + if (obj_type_flags == 0u) + return; + + priv->notify_platform_obj_type_flags |= obj_type_flags; + + if (priv->notify_platform_timeout_source) { + /* We are currently rate limited. Don't bother to check whether + * (now_msec < priv->notify_platform_rlimited_until_msec), just always + * delegate to the timeout handler. It is scheduled with a lower idle + * priority, so we want that additional backoff. */ + return; + } + + now_msec = nm_utils_get_monotonic_timestamp_msec(); + + if (now_msec < priv->notify_platform_rlimited_until_msec) { + priv->notify_platform_timeout_source = nm_g_source_attach( + /* Schedule with a low G_PRIORITY_LOW. */ + nm_g_timeout_source_new(priv->notify_platform_rlimited_until_msec - now_msec, + G_PRIORITY_LOW - 10, + _notify_platform_cb, + self, + NULL), + NULL); + return; + } + + _notify_platform_handle(self, now_msec); +} + +/*****************************************************************************/ + static void _l3cfg_notify_cb(NML3Cfg *l3cfg, const NML3ConfigNotifyData *notify_data, NMIPConfig *self) { @@ -76,7 +147,7 @@ _l3cfg_notify_cb(NML3Cfg *l3cfg, const NML3ConfigNotifyData *notify_data, NMIPCo _handle_l3cd_changed(self, notify_data->l3cd_changed.l3cd_new); break; case NM_L3_CONFIG_NOTIFY_TYPE_PLATFORM_CHANGE_ON_IDLE: - _handle_platform_change(self, notify_data->platform_change_on_idle.obj_type_flags, FALSE); + _notify_platform(self, notify_data->platform_change_on_idle.obj_type_flags); break; default: break; @@ -207,6 +278,8 @@ finalize(GObject *object) NMIPConfig *self = NM_IP_CONFIG(object); NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self); + nm_clear_g_source_inst(&priv->notify_platform_timeout_source); + nm_clear_g_signal_handler(priv->l3cfg, &priv->l3cfg_notify_id); g_object_unref(priv->l3cfg); diff --git a/src/core/nm-ip-config.h b/src/core/nm-ip-config.h index 0dcea83b51..47a40bd223 100644 --- a/src/core/nm-ip-config.h +++ b/src/core/nm-ip-config.h @@ -32,7 +32,10 @@ struct _NMIPConfigPrivate { struct { const NMPObject *best_default_route; } v_gateway; - gulong l3cfg_notify_id; + GSource *notify_platform_timeout_source; + gint64 notify_platform_rlimited_until_msec; + gulong l3cfg_notify_id; + guint32 notify_platform_obj_type_flags; }; struct _NMIPConfig {