ndisc: rework sending router solicitations to follow RFC7559

RFC4861 describes how to solicit routers, but this is later extended
by RFC7559 (Packet-Loss Resiliency for Router Solicitations).

Rework the scheduling of router solicitations to follow RFC7559.

Differences from RFC7559:

- the initial random delay before sending the first RS is only
  up to 250 milliseconds (instead of 1 second).

- we never completely stop sending RS, even after we receive a valid
  RA. We will still send RS every hour.

We no longer honor the sysctl variables

  - /proc/sys/net/ipv6/conf/$IFNAME/router_solicitations
  - /proc/sys/net/ipv6/conf/$IFNAME/router_solicitation_interval

I don't think having the autoconf algorithm configurable is useful.
At least not, if the configuration happens via sysctl variables.

https://tools.ietf.org/html/rfc4861#section-6.3.7
https://tools.ietf.org/html/rfc7559#section-2.1
This commit is contained in:
Thomas Haller 2021-01-22 17:53:23 +01:00
parent 6dad4a315f
commit 5ba2f81d2d
No known key found for this signature in database
GPG key ID: 29C2366E4DFC5728
2 changed files with 131 additions and 105 deletions

View file

@ -20,6 +20,9 @@
#define _NMLOG_PREFIX_NAME "ndisc"
#define RFC7559_IRT ((gint32) 4) /* RFC7559, Initial Retransmission Time, in seconds */
#define RFC7559_MRT ((gint32) 3600) /* RFC7559, Maximum Retransmission Time, in seconds */
/*****************************************************************************/
struct _NMNDiscPrivate {
@ -29,18 +32,13 @@ struct _NMNDiscPrivate {
char * last_error;
GSource *ra_timeout_source;
union {
gint32 solicitations_left;
gint32 announcements_left;
};
union {
guint send_rs_id;
guint send_ra_id;
};
union {
gint32 last_rs;
gint32 last_ra;
};
gint32 announcements_left;
guint send_ra_id;
gint32 last_ra;
gint32 solicit_retransmit_time_msec;
GSource *solicit_timer_source;
GSource *timeout_expire_source;
@ -773,62 +771,111 @@ nm_ndisc_add_dns_domain(NMNDisc *ndisc, const NMNDiscDNSDomain *new_item, gint64
} \
G_STMT_END
static gboolean
send_rs_timeout(NMNDisc *ndisc)
static gint32
solicit_retransmit_time_jitter(gint32 solicit_retransmit_time_msec)
{
nm_auto_pop_netns NMPNetns *netns = NULL;
NMNDiscClass * klass = NM_NDISC_GET_CLASS(ndisc);
NMNDiscPrivate * priv = NM_NDISC_GET_PRIVATE(ndisc);
GError * error = NULL;
gint32 ten_percent;
priv->send_rs_id = 0;
nm_assert(solicit_retransmit_time_msec > 0);
nm_assert(solicit_retransmit_time_msec < 3 * RFC7559_MRT * 1000);
if (!nm_ndisc_netns_push(ndisc, &netns))
return G_SOURCE_REMOVE;
/* Add a ±10% jitter.
*
* This is the "RAND" parameter from https://tools.ietf.org/html/rfc3315#section-14
* as requested by RFC7559. */
if (klass->send_rs(ndisc, &error)) {
_LOGD("router solicitation sent");
priv->solicitations_left--;
ten_percent = NM_MAX(1, solicit_retransmit_time_msec / 10);
return solicit_retransmit_time_msec - ten_percent
+ ((gint32)(g_random_int() % (2u * ((guint32) ten_percent))));
}
static gboolean
solicit_timer_cb(gpointer user_data)
{
const gint32 TIMEOUT_APPROX_THRESHOLD_SEC = 10000;
nm_auto_pop_netns NMPNetns *netns = NULL;
NMNDisc * ndisc = user_data;
NMNDiscClass * klass = NM_NDISC_GET_CLASS(ndisc);
NMNDiscPrivate * priv = NM_NDISC_GET_PRIVATE(ndisc);
gs_free_error GError *error = NULL;
gint32 timeout_msec;
if (!nm_ndisc_netns_push(ndisc, &netns)) {
nm_utils_error_set(&error,
NM_UTILS_ERROR_UNKNOWN,
"failure to switch netns for soliciting routers");
} else
klass->send_rs(ndisc, &error);
if (error)
_MAYBE_WARN("solicit: failure sending router solicitation: %s", error->message);
else {
_LOGT("solicit: router solicitation sent");
nm_clear_g_free(&priv->last_error);
} else {
_MAYBE_WARN("failure sending router solicitation: %s", error->message);
g_clear_error(&error);
}
priv->last_rs = nm_utils_get_monotonic_timestamp_sec();
if (priv->solicitations_left > 0) {
_LOGD("scheduling router solicitation retry in %d seconds.",
(int) priv->router_solicitation_interval);
priv->send_rs_id = g_timeout_add_seconds(priv->router_solicitation_interval,
(GSourceFunc) send_rs_timeout,
ndisc);
/* https://tools.ietf.org/html/rfc4861#section-6.3.7 describes how to send solicitations:
*
* > ... a host SHOULD transmit up to MAX_RTR_SOLICITATIONS Router Solicitation messages,
* > each separated by at least RTR_SOLICITATION_INTERVAL seconds.
*
* but this was extended by https://tools.ietf.org/html/rfc7559#section-2 to send continuously
* and with exponential backoff (detailed the algorithm in https://tools.ietf.org/html/rfc3315#section-14).
* We do RFC7559.
*/
if (priv->solicit_retransmit_time_msec == 0) {
G_STATIC_ASSERT(RFC7559_IRT == NM_NDISC_RFC4861_RTR_SOLICITATION_INTERVAL);
priv->solicit_retransmit_time_msec = solicit_retransmit_time_jitter(RFC7559_IRT * 1000);
timeout_msec = priv->solicit_retransmit_time_msec;
} else {
_LOGD("did not receive a router advertisement after %d solicitations.",
(int) priv->router_solicitations);
priv->solicit_retransmit_time_msec +=
solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec);
timeout_msec = priv->solicit_retransmit_time_msec;
if (priv->solicit_retransmit_time_msec > RFC7559_MRT * 1000) {
priv->solicit_retransmit_time_msec = RFC7559_MRT * 1000;
timeout_msec = solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec);
}
}
return G_SOURCE_REMOVE;
_LOGD("solicit: schedule sending next solicitation in%s %.3f seconds",
timeout_msec / 1000 >= TIMEOUT_APPROX_THRESHOLD_SEC ? " about" : "",
((double) timeout_msec) / 1000);
nm_clear_g_source_inst(&priv->solicit_timer_source);
priv->solicit_timer_source = nm_g_timeout_add_source_approx(timeout_msec,
TIMEOUT_APPROX_THRESHOLD_SEC,
solicit_timer_cb,
ndisc);
return G_SOURCE_CONTINUE;
}
static void
solicit_routers(NMNDisc *ndisc)
solicit_timer_start(NMNDisc *ndisc)
{
NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc);
gint32 now, next;
gint64 t;
gint32 delay_msec;
if (priv->send_rs_id)
return;
/* rfc4861, Section 6.3.7:
*
* We should randomly wait up to NM_NDISC_RFC4861_MAX_RTR_SOLICITATION_DELAY (1 second)
* before sending the first RS. RFC4861 is from 2007, I don't think 1 second is
* a suitable delay in 2021. Wait only up to 250 msec instead. */
now = nm_utils_get_monotonic_timestamp_sec();
priv->solicitations_left = priv->router_solicitations;
delay_msec =
g_random_int() % ((guint32)(NM_NDISC_RFC4861_MAX_RTR_SOLICITATION_DELAY * 1000 / 4));
t = (((gint64) priv->last_rs) + priv->router_solicitation_interval) - now;
next = CLAMP(t, 0, G_MAXINT32);
_LOGD("scheduling explicit router solicitation request in %" G_GINT32_FORMAT " seconds.", next);
priv->send_rs_id = g_timeout_add_seconds((guint32) next, (GSourceFunc) send_rs_timeout, ndisc);
_LOGD("solicit: schedule sending first solicitation (of %d) in %.3f seconds",
priv->router_solicitations,
((double) delay_msec) / 1000);
priv->solicit_retransmit_time_msec = 0;
nm_clear_g_source_inst(&priv->solicit_timer_source);
priv->solicit_timer_source = nm_g_timeout_add_source(delay_msec, solicit_timer_cb, ndisc);
}
/*****************************************************************************/
static gboolean
announce_router(NMNDisc *ndisc)
{
@ -988,7 +1035,7 @@ nm_ndisc_set_iid(NMNDisc *ndisc, const NMUtilsIPv6IfaceId iid)
_LOGD("IPv6 interface identifier changed, flushing addresses");
g_array_remove_range(rdata->addresses, 0, rdata->addresses->len);
nm_ndisc_emit_config_change(ndisc, NM_NDISC_CONFIG_ADDRESSES);
solicit_routers(ndisc);
solicit_timer_start(ndisc);
}
return TRUE;
}
@ -1050,7 +1097,7 @@ nm_ndisc_start(NMNDisc *ndisc)
g_source_attach(priv->ra_timeout_source, NULL);
}
solicit_routers(ndisc);
solicit_timer_start(ndisc);
return;
}
@ -1088,20 +1135,16 @@ nm_ndisc_stop(NMNDisc *ndisc)
g_array_set_size(rdata->dns_domains, 0);
priv->rdata.public.hop_limit = 64;
/* Start at very low number so that last_rs - router_solicitation_interval
* is much lower than nm_utils_get_monotonic_timestamp_sec() at startup.
*/
priv->last_rs = G_MININT32;
nm_clear_g_source_inst(&priv->ra_timeout_source);
nm_clear_g_source(&priv->send_rs_id);
nm_clear_g_source(&priv->send_ra_id);
nm_clear_g_free(&priv->last_error);
nm_clear_g_source_inst(&priv->timeout_expire_source);
priv->solicitations_left = 0;
priv->solicit_retransmit_time_msec = 0;
nm_clear_g_source_inst(&priv->solicit_timer_source);
priv->announcements_left = 0;
priv->last_rs = G_MININT32;
priv->last_ra = G_MININT32;
}
@ -1422,6 +1465,33 @@ check_timestamps(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap changed)
ndisc);
}
/* When we receive an RA, we don't disable solicitations entirely. Instead,
* we set the interval the maximum (RFC7559_MRT).
*
* This contradicts https://tools.ietf.org/html/rfc7559#section-2.1, which says
* that we SHOULD stop sending RS if we receive an RA -- but only on a multicast
* capable link and if the RA has a valid router lifetime.
*
* But we really want to recover from a dead router on the network, so we
* don't want to cease sending RS entirely.
*
* But we only re-schedule the timer if the current interval is not already
* "RFC7559_MRT * 1000". Otherwise, we already have a slow interval counter
* pending. */
if (priv->solicit_retransmit_time_msec != RFC7559_MRT * 1000) {
gint32 timeout_msec;
priv->solicit_retransmit_time_msec = RFC7559_MRT * 1000;
timeout_msec = solicit_retransmit_time_jitter(priv->solicit_retransmit_time_msec);
_LOGD("solicit: schedule sending next (slow) solicitation in about %.3f seconds",
((double) timeout_msec) / 1000);
nm_clear_g_source_inst(&priv->solicit_timer_source);
priv->solicit_timer_source =
nm_g_timeout_add_source_approx(timeout_msec, 0, solicit_timer_cb, ndisc);
}
if (changed != NM_NDISC_CONFIG_NONE)
nm_ndisc_emit_config_change(ndisc, changed);
}
@ -1439,7 +1509,6 @@ nm_ndisc_ra_received(NMNDisc *ndisc, gint64 now_msec, NMNDiscConfigMap changed)
NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc);
nm_clear_g_source_inst(&priv->ra_timeout_source);
nm_clear_g_source(&priv->send_rs_id);
nm_clear_g_free(&priv->last_error);
check_timestamps(ndisc, now_msec, changed);
}
@ -1551,11 +1620,6 @@ nm_ndisc_init(NMNDisc *ndisc)
rdata->dns_domains = g_array_new(FALSE, FALSE, sizeof(NMNDiscDNSDomain));
g_array_set_clear_func(rdata->dns_domains, dns_domain_free);
priv->rdata.public.hop_limit = 64;
/* Start at very low number so that last_rs - router_solicitation_interval
* is much lower than nm_utils_get_monotonic_timestamp_sec() at startup.
*/
priv->last_rs = G_MININT32;
}
static void
@ -1565,7 +1629,7 @@ dispose(GObject *object)
NMNDiscPrivate *priv = NM_NDISC_GET_PRIVATE(ndisc);
nm_clear_g_source_inst(&priv->ra_timeout_source);
nm_clear_g_source(&priv->send_rs_id);
nm_clear_g_source_inst(&priv->solicit_timer_source);
nm_clear_g_source(&priv->send_ra_id);
nm_clear_g_free(&priv->last_error);

View file

@ -584,46 +584,9 @@ _test_dns_solicit_loop_changed(NMNDisc * ndisc,
data->counter++;
}
static gboolean
success_timeout(TestData *data)
{
data->timeout_id = 0;
g_main_loop_quit(data->loop);
return G_SOURCE_REMOVE;
}
static void
_test_dns_solicit_loop_rs_sent(NMFakeNDisc *ndisc, TestData *data)
{
const gint64 now_msec = nm_utils_get_monotonic_timestamp_msec();
guint id;
if (data->rs_counter > 0 && data->rs_counter < 6) {
if (data->rs_counter == 1) {
data->first_solicit_msec = now_msec;
/* Kill the test after 10 seconds if it hasn't failed yet */
data->timeout_id = g_timeout_add_seconds(10, (GSourceFunc) success_timeout, data);
}
/* On all but the first solicitation, which should be triggered by the
* DNS servers reaching 1/2 lifetime, emit a new RA without the DNS
* servers again.
*/
id = nm_fake_ndisc_add_ra(ndisc, 0, NM_NDISC_DHCP_LEVEL_NONE, 4, 1500);
g_assert(id);
nm_fake_ndisc_add_gateway(ndisc,
id,
"fe80::1",
now_msec + 10000,
NM_ICMPV6_ROUTER_PREF_MEDIUM);
nm_fake_ndisc_emit_new_ras(ndisc);
} else if (data->rs_counter >= 6) {
/* Fail if we've sent too many solicitations in the past 4 seconds */
g_assert_cmpint(now_msec - data->first_solicit_msec, >, 4000);
g_source_remove(data->timeout_id);
g_main_loop_quit(data->loop);
}
data->rs_counter++;
}
@ -639,9 +602,6 @@ test_dns_solicit_loop(void)
};
guint id;
g_test_skip("The solicitation behavior is wrong and need fixing. This test is not working too");
return;
/* Ensure that no solicitation loop happens when DNS servers or domains
* stop being sent in advertisements. This can happen if two routers
* send RAs, but the one sending DNS info stops responding, or if one
@ -664,8 +624,10 @@ test_dns_solicit_loop(void)
&data);
nm_ndisc_start(NM_NDISC(ndisc));
nmtst_main_loop_run_assert(data.loop, 20000);
if (nmtst_main_loop_run(data.loop, 10000))
g_error("we expect to run the loop until timeout. What is wrong?");
g_assert_cmpint(data.counter, ==, 3);
g_assert_cmpint(data.rs_counter, ==, 1);
}
/*****************************************************************************/