From 830dd4ad9c9f93b7dcc3781ee842c3bf3ce313ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Mon, 12 Aug 2024 12:04:04 +0200 Subject: [PATCH] platform: add small backoff time before resync If the socket's RX buffer is full it's probably because other process is doing lot of changes very quickly, faster than we can process them. Let's give the writer a small time to finish: 1. Avoid contending the kernel's RTNL lock, so we don't make the whole situation even worse and it can finish earlier. 2. Avoid having to resync again and again due to trying to resync while the writer is still doing quick changes, so we are unable to catch up yet. This won't help if this situation takes a long time or is continuous, but that's unlikely to happen, and if it does, it's the writer's fault for starving the whole system. There is no need to progresively increase the backoff time for the same reason: if this situation takes lot of time, it's the writer's fault. It's neither a good idea because the whole NM process will end being sleeping long times, not doing anything at all, without being able to react when the Netlink messages burst stops. --- src/libnm-platform/nm-linux-platform.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 83fc980748..bd495fe27e 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -361,7 +361,8 @@ struct _ifla_vf_vlan_info { /*****************************************************************************/ -#define RESYNC_RETRIES 50 +#define RESYNC_RETRIES 50 +#define RESYNC_BACKOFF_SECONDS 1 /*****************************************************************************/ @@ -11203,6 +11204,20 @@ event_handler_read_netlink(NMPlatform *platform, } _reason; })); + + if (nle == -ENOBUFS) { + /* Netlink notifications are coming faster than what + * we can process them. Backoff a bit so we give some + * time for this burst to finish, and we don't + * contribute to starve the system contending for the + * kernel's RTNL lock. + */ + _LOGI("netlink[%s]: backoff for %d seconds before the resync.", + nmp_netlink_protocol_info(netlink_protocol)->name, + RESYNC_BACKOFF_SECONDS); + sleep(RESYNC_BACKOFF_SECONDS); + } + _netlink_recv_handle(platform, netlink_protocol, FALSE); delayed_action_wait_for_nl_response_complete_all( platform,