l3cfg: merge branch 'th/l3cfg-acd-crash'

https://bugzilla.redhat.com/show_bug.cgi?id=2047788 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1077
2026-03-04 03:40:38 +01:00 · 2022-02-02 11:10:36 +01:00 · 2022-02-02 11:10:36 +01:00 · dc1092e22b
commit dc1092e22b
parent 49fe45b155 db0d84f13a
1 changed files with 76 additions and 74 deletions
--- a/src/core/nm-l3cfg.c
+++ b/src/core/nm-l3cfg.c
@ -1334,6 +1334,15 @@ _acd_data_collect_tracks_data(const AcdData     *acd_data,
    guint             n                     = 0;
    guint             i;

+    /* We do a simple search over all track-infos for the best, which determines
+     * our ACD state. That is, we prefer ACD disabled, and otherwise the
+     * shortest configured timeout.
+     *
+     * This linear search is probably fast enough, because we expect that each
+     * address/acd_data has few trackers.
+     * The alternative would be caching the best result, but that is more complicated,
+     * so not done. */
+
    for (i = 0; i < acd_data->info.n_track_infos; i++) {
        const NML3AcdAddrTrackInfo *acd_track = &acd_data->info.track_infos[i];

@ -1517,12 +1526,7 @@ _l3_acd_nacd_event(int fd, GIOCondition condition, gpointer user_data)
                      "in %u msec)",
                      timeout_msec);
                self->priv.p->nacd_event_down_source =
-                    nm_g_timeout_source_new(timeout_msec,
-                                            G_PRIORITY_DEFAULT,
-                                            _l3_acd_nacd_event_down_timeout_cb,
-                                            self,
-                                            NULL);
-                g_source_attach(self->priv.p->nacd_event_down_source, NULL);
+                    nm_g_timeout_add_source(timeout_msec, _l3_acd_nacd_event_down_timeout_cb, self);
            }
            break;
        default:
@ -1583,12 +1587,9 @@ _l3_acd_nacd_instance_reset(NML3Cfg *self, NMTernary start_timer, gboolean acd_d
        break;
    case NM_TERNARY_TRUE:
        self->priv.p->nacd_instance_ensure_retry =
-            nm_g_timeout_source_new_seconds(ACD_ENSURE_RATELIMIT_MSEC / 1000u,
-                                            G_PRIORITY_DEFAULT,
+            nm_g_timeout_add_seconds_source(ACD_ENSURE_RATELIMIT_MSEC / 1000u,
                                            _l3_acd_nacd_instance_ensure_retry_cb,
-                                            self,
-                                            NULL);
-        g_source_attach(self->priv.p->nacd_instance_ensure_retry, NULL);
+                                            self);
        break;
    case NM_TERNARY_DEFAULT:
        break;
@ -1937,14 +1938,24 @@ _l3_acd_data_timeout_cb(gpointer user_data)
 static void
 _l3_acd_data_timeout_schedule(AcdData *acd_data, gint64 timeout_msec)
 {
+    /* in _l3_acd_data_state_set_full() we clear the timer. At the same time,
+     * in _l3_acd_data_state_change(ACD_STATE_CHANGE_MODE_TIMEOUT) we only
+     * expect timeouts in certain states.
+     *
+     * That means, scheduling a timeout is only correct if we are in a certain
+     * state, which allows to handle timeouts. This assert checks for that to
+     * ensure we don't call a timeout in an unexpected state. */
+    nm_assert(NM_IN_SET(acd_data->info.state,
+                        NM_L3_ACD_ADDR_STATE_PROBING,
+                        NM_L3_ACD_ADDR_STATE_USED,
+                        NM_L3_ACD_ADDR_STATE_DEFENDING,
+                        NM_L3_ACD_ADDR_STATE_CONFLICT));
+
    nm_clear_g_source_inst(&acd_data->acd_data_timeout_source);
    acd_data->acd_data_timeout_source =
-        nm_g_timeout_source_new(NM_CLAMP((gint64) 0, timeout_msec, (gint64) G_MAXUINT),
-                                G_PRIORITY_DEFAULT,
+        nm_g_timeout_add_source(NM_CLAMP((gint64) 0, timeout_msec, (gint64) G_MAXUINT),
                                _l3_acd_data_timeout_cb,
-                                acd_data,
-                                NULL);
-    g_source_attach(acd_data->acd_data_timeout_source, NULL);
+                                acd_data);
 }

 static void
@ -2071,17 +2082,19 @@ _nm_printf(5, 6) static void _l3_acd_data_state_set_full(NML3Cfg         *self,
    else
        changed = FALSE;

-    if (format) {
-        gs_free char *msg = NULL;
-        va_list       args;
+    if (_LOGT_ENABLED()) {
+        if (format) {
+            gs_free char *msg = NULL;
+            va_list       args;

-        va_start(args, format);
-        msg = g_strdup_vprintf(format, args);
-        va_end(args);
+            va_start(args, format);
+            msg = g_strdup_vprintf(format, args);
+            va_end(args);

-        _LOGT_acd(acd_data, "set state to %s (%s)", _l3_acd_addr_state_to_string(state), msg);
-    } else
-        _LOGT_acd(acd_data, "set state to %s", _l3_acd_addr_state_to_string(state));
+            _LOGT_acd(acd_data, "set state to %s (%s)", _l3_acd_addr_state_to_string(state), msg);
+        } else
+            _LOGT_acd(acd_data, "set state to %s", _l3_acd_addr_state_to_string(state));
+    }

    if (changed && allow_commit) {
        /* The availability of an address just changed (and we are instructed to
@ -2338,22 +2351,7 @@ handle_init:
                /* we are already probing. There is nothing to do for this timeout. */
                return;
            }
-
-            nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
-
-            if (acd_data->probing_timestamp_msec + ACD_WAIT_PROBING_EXTRA_TIME_MSEC
-                    + ACD_WAIT_PROBING_EXTRA_TIME2_MSEC
-                >= (*p_now_msec)) {
-                /* hm. We failed to create a new probe too long. Something is really wrong
-                 * internally, but let's ignore the issue and assume the address is good. What
-                 * else would we do? Assume the address is USED? */
-                _LOGT_acd(acd_data,
-                          "probe-good (waiting for creating probe timed out. Assume good)");
-                goto handle_start_defending;
-            }
-
-            log_reason = "retry probing on timeout";
-            goto handle_start_probing;
+            /* fall-through */

        case NM_L3_ACD_ADDR_STATE_USED:
        case NM_L3_ACD_ADDR_STATE_CONFLICT:
@ -2373,7 +2371,10 @@ handle_init:
            acd_data->acd_defend_type_desired = acd_defend_type;

            if (acd_timeout_msec <= 0) {
-                log_reason = "acd disabled by configuration (restart after previous conflict)";
+                if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_PROBING)
+                    log_reason = "acd disabled by configuration (timeout during probing)";
+                else
+                    log_reason = "acd disabled by configuration (restart after previous conflict)";
                goto handle_probing_done;
            }

@ -2383,6 +2384,23 @@ handle_init:
            }

            nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
+
+            if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_PROBING) {
+                if (acd_data->probing_timestamp_msec + ACD_WAIT_PROBING_EXTRA_TIME_MSEC
+                        + ACD_WAIT_PROBING_EXTRA_TIME2_MSEC
+                    >= (*p_now_msec)) {
+                    /* hm. We failed to create a new probe too long. Something is really wrong
+                     * internally, but let's ignore the issue and assume the address is good. What
+                     * else would we do? Assume the address is USED? */
+                    _LOGT_acd(acd_data,
+                              "probe-good (waiting for creating probe timed out. Assume good)");
+                    goto handle_start_defending;
+                }
+
+                log_reason = "retry probing on timeout";
+                goto handle_start_probing;
+            }
+
            acd_data->probing_timestamp_msec = (*p_now_msec);
            acd_data->probing_timeout_msec   = acd_timeout_msec;
            if (acd_data->info.state == NM_L3_ACD_ADDR_STATE_USED)
@ -2603,39 +2621,23 @@ handle_init:
        return;

    case ACD_STATE_CHANGE_MODE_INSTANCE_RESET:
+        nm_assert(NM_IN_SET(acd_data->info.state,
+                            NM_L3_ACD_ADDR_STATE_PROBING,
+                            NM_L3_ACD_ADDR_STATE_DEFENDING,
+                            NM_L3_ACD_ADDR_STATE_READY,
+                            NM_L3_ACD_ADDR_STATE_USED,
+                            NM_L3_ACD_ADDR_STATE_CONFLICT,
+                            NM_L3_ACD_ADDR_STATE_EXTERNAL_REMOVED));

-        switch (acd_data->info.state) {
-        case NM_L3_ACD_ADDR_STATE_INIT:
-            nm_assert_not_reached();
-            return;
-        case NM_L3_ACD_ADDR_STATE_PROBING:
-        case NM_L3_ACD_ADDR_STATE_DEFENDING:
-        case NM_L3_ACD_ADDR_STATE_READY:
-            if (!acd_data->nacd_probe) {
-                /* we failed starting to probe before and have a timer running to
-                 * restart. We don't do anything now, but let the timer handle it.
-                 * This also implements some rate limiting for us. */
-                _LOGT_acd(acd_data,
-                          "n-acd instance reset. Ignore event while in state %s",
-                          _l3_acd_addr_state_to_string(acd_data->info.state));
-                return;
-            }
-
-            _LOGT_acd(acd_data,
-                      "n-acd instance reset. Trigger a restart while in state %s",
-                      _l3_acd_addr_state_to_string(acd_data->info.state));
-            acd_data->nacd_probe = n_acd_probe_free(acd_data->nacd_probe);
-            _l3_acd_data_timeout_schedule(acd_data, 0);
-            return;
-        case NM_L3_ACD_ADDR_STATE_USED:
-        case NM_L3_ACD_ADDR_STATE_CONFLICT:
-        case NM_L3_ACD_ADDR_STATE_EXTERNAL_REMOVED:
-            /* as we have no probe, we are already handling this (e.g. by having
-             * a retry timer). Nothing to do. */
-            nm_assert(!acd_data->nacd_probe);
-            return;
-        }
-        nm_assert_not_reached();
+        /* an instance-reset is a dramatic event. We start over with probing. */
+        _LOGT_acd(acd_data,
+                  "n-acd instance reset. Reset to probing while in state %s",
+                  _l3_acd_addr_state_to_string(acd_data->info.state));
+        acd_data->nacd_probe                         = n_acd_probe_free(acd_data->nacd_probe);
+        acd_data->last_defendconflict_timestamp_msec = 0;
+        acd_data->probing_timestamp_msec = nm_utils_get_monotonic_timestamp_msec_cached(p_now_msec);
+        _l3_acd_data_state_set(self, acd_data, NM_L3_ACD_ADDR_STATE_PROBING, FALSE);
+        _l3_acd_data_timeout_schedule(acd_data, 0);
        return;
    }