dhcp-helper: retry in case of failure connecting to D-Bus unix socket

Connecting to the unix socket can fail with EAGAIN if there are too
many pending connections and the server can't accept them in time
before reaching backlog capacity. Ideally the server should increase
the backlog length, but GLib doesn't provide a way to change it for a
GDBus server. Retry for up to 5 seconds in case of failure.

https://bugzilla.redhat.com/show_bug.cgi?id=1821594
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/471
(cherry picked from commit eefe5dacaa)
(cherry picked from commit 4cf63dfa15)
(cherry picked from commit 2b7908a1c6)
This commit is contained in:
Beniamino Galvani 2020-04-16 22:37:27 +02:00
parent 3ca48236e4
commit 87f24157ef

View file

@ -129,19 +129,42 @@ main (int argc, char *argv[])
gs_unref_variant GVariant *parameters = NULL;
gs_unref_variant GVariant *result = NULL;
gboolean success = FALSE;
guint try_count = 0;
guint try_count;
gint64 time_start;
gint64 time_end;
/* FIXME: g_dbus_connection_new_for_address_sync() tries to connect to the socket in
* non-blocking mode, which can easily fail with EAGAIN, causing the creation of the
* socket to fail with "Could not connect: Resource temporarily unavailable".
*
* We should instead create the GIOStream ourself and block on connecting to
* the socket. */
/* Connecting to the unix socket can fail with EAGAIN if there are too
* many pending connections and the server can't accept them in time
* before reaching backlog capacity. Ideally the server should increase
* the backlog length, but GLib doesn't provide a way to change it for a
* GDBus server. Retry for up to 5 seconds in case of failure. */
time_start = g_get_monotonic_time ();
time_end = time_start + (5000 * 1000L);
try_count = 0;
do_connect:
try_count++;
connection = g_dbus_connection_new_for_address_sync ("unix:path=" NMRUNDIR "/private-dhcp",
G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
NULL, NULL, &error);
if (!connection) {
if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) {
gint64 time_remaining = time_end - g_get_monotonic_time ();
gint64 interval;
if (time_remaining > 0) {
_LOGi ("failure to connect: %s (retry %u, waited %lld ms)",
error->message, try_count,
(long long) (time_end - time_remaining - time_start) / 1000);
interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))),
5000,
100000);
g_usleep (NM_MIN (interval, time_remaining));
g_clear_error (&error);
goto do_connect;
}
}
g_dbus_error_strip_remote_error (error);
_LOGE ("could not connect to NetworkManager D-Bus socket: %s",
error->message);
@ -149,8 +172,8 @@ main (int argc, char *argv[])
}
parameters = build_signal_parameters ();
time_end = g_get_monotonic_time () + (200 * 1000L); /* retry for at most 200 milliseconds */
try_count = 0;
do_notify:
try_count++;
@ -172,6 +195,7 @@ do_notify:
s_err = g_dbus_error_get_remote_error (error);
if (NM_IN_STRSET (s_err, "org.freedesktop.DBus.Error.UnknownMethod")) {
gint64 remaining_time = time_end - g_get_monotonic_time ();
gint64 interval;
/* I am not sure that a race can actually happen, as we register the object
* on the server side during GDBusServer:new-connection signal.
@ -180,7 +204,10 @@ do_notify:
* do some retry. */
if (remaining_time > 0) {
_LOGi ("failure to call notify: %s (retry %u)", error->message, try_count);
g_usleep (NM_MIN (NM_CLAMP ((gint64) (100L * (1L << try_count)), 5000, 25000), remaining_time));
interval = NM_CLAMP ((gint64) (100L * (1L << NM_MIN (try_count, 31))),
5000,
25000);
g_usleep (NM_MIN (interval, remaining_time));
g_clear_error (&error);
goto do_notify;
}