From 87f24157ef33d68f07cdd51b10490e45ab2eaf6a Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Thu, 16 Apr 2020 22:37:27 +0200 Subject: [PATCH] dhcp-helper: retry in case of failure connecting to D-Bus unix socket Connecting to the unix socket can fail with EAGAIN if there are too many pending connections and the server can't accept them in time before reaching backlog capacity. Ideally the server should increase the backlog length, but GLib doesn't provide a way to change it for a GDBus server. Retry for up to 5 seconds in case of failure. https://bugzilla.redhat.com/show_bug.cgi?id=1821594 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/471 (cherry picked from commit eefe5dacaa90e8a4e63f34168fafb1a29c8a2c64) (cherry picked from commit 4cf63dfa1515d5aa89f3f2b94f316d3483b54c91) (cherry picked from commit 2b7908a1c6831c9e3bdbdacd44263d477a1a4d8a) --- src/dhcp/nm-dhcp-helper.c | 45 +++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/dhcp/nm-dhcp-helper.c b/src/dhcp/nm-dhcp-helper.c index 9acc40451d..aaca644ee3 100644 --- a/src/dhcp/nm-dhcp-helper.c +++ b/src/dhcp/nm-dhcp-helper.c @@ -129,19 +129,42 @@ main (int argc, char *argv[]) gs_unref_variant GVariant *parameters = NULL; gs_unref_variant GVariant *result = NULL; gboolean success = FALSE; - guint try_count = 0; + guint try_count; + gint64 time_start; gint64 time_end; - /* FIXME: g_dbus_connection_new_for_address_sync() tries to connect to the socket in - * non-blocking mode, which can easily fail with EAGAIN, causing the creation of the - * socket to fail with "Could not connect: Resource temporarily unavailable". - * - * We should instead create the GIOStream ourself and block on connecting to - * the socket. */ + /* Connecting to the unix socket can fail with EAGAIN if there are too + * many pending connections and the server can't accept them in time + * before reaching backlog capacity. Ideally the server should increase + * the backlog length, but GLib doesn't provide a way to change it for a + * GDBus server. Retry for up to 5 seconds in case of failure. */ + time_start = g_get_monotonic_time (); + time_end = time_start + (5000 * 1000L); + try_count = 0; + +do_connect: + try_count++; connection = g_dbus_connection_new_for_address_sync ("unix:path=" NMRUNDIR "/private-dhcp", G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT, NULL, NULL, &error); if (!connection) { + if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) { + gint64 time_remaining = time_end - g_get_monotonic_time (); + gint64 interval; + + if (time_remaining > 0) { + _LOGi ("failure to connect: %s (retry %u, waited %lld ms)", + error->message, try_count, + (long long) (time_end - time_remaining - time_start) / 1000); + interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))), + 5000, + 100000); + g_usleep (NM_MIN (interval, time_remaining)); + g_clear_error (&error); + goto do_connect; + } + } + g_dbus_error_strip_remote_error (error); _LOGE ("could not connect to NetworkManager D-Bus socket: %s", error->message); @@ -149,8 +172,8 @@ main (int argc, char *argv[]) } parameters = build_signal_parameters (); - time_end = g_get_monotonic_time () + (200 * 1000L); /* retry for at most 200 milliseconds */ + try_count = 0; do_notify: try_count++; @@ -172,6 +195,7 @@ do_notify: s_err = g_dbus_error_get_remote_error (error); if (NM_IN_STRSET (s_err, "org.freedesktop.DBus.Error.UnknownMethod")) { gint64 remaining_time = time_end - g_get_monotonic_time (); + gint64 interval; /* I am not sure that a race can actually happen, as we register the object * on the server side during GDBusServer:new-connection signal. @@ -180,7 +204,10 @@ do_notify: * do some retry. */ if (remaining_time > 0) { _LOGi ("failure to call notify: %s (retry %u)", error->message, try_count); - g_usleep (NM_MIN (NM_CLAMP ((gint64) (100L * (1L << try_count)), 5000, 25000), remaining_time)); + interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))), + 5000, + 25000); + g_usleep (NM_MIN (interval, remaining_time)); g_clear_error (&error); goto do_notify; }