core: rate-limit updates to IP addresses/routes on D-Bus API

It doesn't scale. If you add 100k routes one-by-one, then upon each
change from platform, we will send the growing list of the routes on
D-Bus.

That is too expensive. Especially, if you imagine that the receiving end
is a NMClient instance. There is a D-Bus worker thread that queues the
received GVariant messages, while the main thread may not be able to
process them fast enough. In that case, the memory keeps growing very
fast and due to fragmentation it is not freed.

Instead, rate limit updates to 3 per second.

Note that the receive buffer of the netlink socket can fill up and we
loose messages. Therefore, already on the lowest level, we may miss
addresses/routes. Next, on top of NMPlatform, NMIPConfig listens to
NM_L3_CONFIG_NOTIFY_TYPE_PLATFORM_CHANGE_ON_IDLE events. Thereby it
further will miss intermediate state (e.g. a route that exists only for
a short moment).

Now adding another delay and rate limiting on top of that, does not make
that fundamentally different, we anyway didn't get all intermediate states
from netlink. We may miss addresses/routes that only exist for a short
amount of time. This makes "the problem" worse, but not fundamentally
new.

We can only get a (correct) settled state, after all events are
processed. And we never know, whether there isn't the next event
just waiting to be received.

Rate limiting is important to not overwhelm D-Bus clients. In reality,
none of the users really need this information, because it's also
incomplete. Users who really need to know addresses/routes should use
netlink or find another way (a way that scales and where they explicitly
request this information).
This commit is contained in:
Thomas Haller 2023-11-20 12:09:13 +01:00
parent a7a36cde83
commit 623012c14a
No known key found for this signature in database
GPG key ID: 29C2366E4DFC5728
2 changed files with 78 additions and 2 deletions

View file

@ -16,6 +16,10 @@
/*****************************************************************************/
#define NOTIFY_PLATFORM_RATELIMIT_MSEC 333
/*****************************************************************************/
GType nm_ip4_config_get_type(void);
GType nm_ip6_config_get_type(void);
@ -67,6 +71,73 @@ _value_set_variant_as(GValue *value, const char *const *strv, guint len)
/*****************************************************************************/
static void
_notify_platform_handle(NMIPConfig *self, gint64 now_msec)
{
NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self);
guint32 obj_type_flags;
nm_clear_g_source_inst(&priv->notify_platform_timeout_source);
priv->notify_platform_rlimited_until_msec = now_msec + NOTIFY_PLATFORM_RATELIMIT_MSEC;
obj_type_flags = nm_steal_int(&priv->notify_platform_obj_type_flags);
nm_assert(obj_type_flags != 0u);
_handle_platform_change(self, obj_type_flags, FALSE);
}
static gboolean
_notify_platform_cb(gpointer user_data)
{
_notify_platform_handle(user_data, nm_utils_get_monotonic_timestamp_msec());
return G_SOURCE_CONTINUE;
}
static void
_notify_platform(NMIPConfig *self, guint32 obj_type_flags)
{
const int addr_family = nm_ip_config_get_addr_family(self);
const int IS_IPv4 = NM_IS_IPv4(addr_family);
NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self);
gint64 now_msec;
obj_type_flags &= (nmp_object_type_to_flags(NMP_OBJECT_TYPE_IP_ADDRESS(IS_IPv4))
| nmp_object_type_to_flags(NMP_OBJECT_TYPE_IP_ROUTE(IS_IPv4)));
if (obj_type_flags == 0u)
return;
priv->notify_platform_obj_type_flags |= obj_type_flags;
if (priv->notify_platform_timeout_source) {
/* We are currently rate limited. Don't bother to check whether
* (now_msec < priv->notify_platform_rlimited_until_msec), just always
* delegate to the timeout handler. It is scheduled with a lower idle
* priority, so we want that additional backoff. */
return;
}
now_msec = nm_utils_get_monotonic_timestamp_msec();
if (now_msec < priv->notify_platform_rlimited_until_msec) {
priv->notify_platform_timeout_source = nm_g_source_attach(
/* Schedule with a low G_PRIORITY_LOW. */
nm_g_timeout_source_new(priv->notify_platform_rlimited_until_msec - now_msec,
G_PRIORITY_LOW - 10,
_notify_platform_cb,
self,
NULL),
NULL);
return;
}
_notify_platform_handle(self, now_msec);
}
/*****************************************************************************/
static void
_l3cfg_notify_cb(NML3Cfg *l3cfg, const NML3ConfigNotifyData *notify_data, NMIPConfig *self)
{
@ -76,7 +147,7 @@ _l3cfg_notify_cb(NML3Cfg *l3cfg, const NML3ConfigNotifyData *notify_data, NMIPCo
_handle_l3cd_changed(self, notify_data->l3cd_changed.l3cd_new);
break;
case NM_L3_CONFIG_NOTIFY_TYPE_PLATFORM_CHANGE_ON_IDLE:
_handle_platform_change(self, notify_data->platform_change_on_idle.obj_type_flags, FALSE);
_notify_platform(self, notify_data->platform_change_on_idle.obj_type_flags);
break;
default:
break;
@ -207,6 +278,8 @@ finalize(GObject *object)
NMIPConfig *self = NM_IP_CONFIG(object);
NMIPConfigPrivate *priv = NM_IP_CONFIG_GET_PRIVATE(self);
nm_clear_g_source_inst(&priv->notify_platform_timeout_source);
nm_clear_g_signal_handler(priv->l3cfg, &priv->l3cfg_notify_id);
g_object_unref(priv->l3cfg);

View file

@ -32,7 +32,10 @@ struct _NMIPConfigPrivate {
struct {
const NMPObject *best_default_route;
} v_gateway;
gulong l3cfg_notify_id;
GSource *notify_platform_timeout_source;
gint64 notify_platform_rlimited_until_msec;
gulong l3cfg_notify_id;
guint32 notify_platform_obj_type_flags;
};
struct _NMIPConfig {