From e9268e392418404cddabbcbbc4981337a96f0942 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 13 Sep 2022 20:17:20 +0200 Subject: [PATCH 1/3] firewall: add mlag firewall utils for multi chassis link aggregation (MLAG) for bonding-slb Add a way to configure MLAG NFT rules for SLB bonding. OVS supports "bonding-slb" (source load balancing, [1]). This is basically setting "mode=balance-xor" and "xmit_hash_policy=vlan+srcmac", which requires no special switch configuration (like LACP). For that to work, we need to filter out packets that the switch sends back on the other port, for which we configure some NFT rules. The rules are taken from mlag.sh at [2] or [3]. See-also: https://bugzilla.redhat.com/show_bug.cgi?id=1724795 [1] https://docs.openvswitch.org/en/latest/topics/bonding/#slb-bondin [2] https://gitlab.com/egarver/virtual-networking [3] https://gitlab.com/jtoppins_redhat/bond-slb-nft --- src/core/nm-firewall-utils.c | 248 +++++++++++++++++++++++++++++++++++ src/core/nm-firewall-utils.h | 6 + 2 files changed, 254 insertions(+) diff --git a/src/core/nm-firewall-utils.c b/src/core/nm-firewall-utils.c index 92c9fd814e..7b5d2f47b6 100644 --- a/src/core/nm-firewall-utils.c +++ b/src/core/nm-firewall-utils.c @@ -39,6 +39,71 @@ static const struct { /*****************************************************************************/ +static const char * +_nft_ifname_valid(const char *str) +{ + gsize i; + + /* `nft -f -` takes certain strings, like "device $IFNAME", but + * those strings are from a limited character set. Check that + * @str is valid according to those rules. + * + * src/scanner.l: + * digit [0-9] + * letter [a-zA-Z] + * string ({letter}|[_.])({letter}|{digit}|[/\-_\.])* + **/ + + if (!str || !str[0]) + return NULL; + + for (i = 0; str[i]; i++) { + switch (str[i]) { + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + case '.': + continue; + case '0' ... '9': + case '/': + case '-': + if (i == 0) + return NULL; + continue; + default: + return NULL; + } + } + if (i >= NMP_IFNAMSIZ) + return NULL; + + return str; +} + +static const char * +_strbuf_set_sanitized(NMStrBuf *strbuf, const char *prefix, const char *str_to_sanitize) +{ + nm_str_buf_reset(strbuf); + + if (prefix) + nm_str_buf_append(strbuf, prefix); + + for (; str_to_sanitize[0] != '\0'; str_to_sanitize++) { + const char ch = str_to_sanitize[0]; + + if (g_ascii_isalpha(ch) || g_ascii_isdigit(ch)) { + nm_str_buf_append_c(strbuf, ch); + continue; + } + nm_str_buf_append_c(strbuf, '_'); + nm_str_buf_append_c_hex(strbuf, ch, FALSE); + } + + return nm_str_buf_get_str(strbuf); +} + +/*****************************************************************************/ + #define _SHARE_IPTABLES_SUBNET_TO_STR_LEN (INET_ADDRSTRLEN + 1 + 2 + 1) static const char * @@ -701,6 +766,189 @@ _fw_nft_set_shared_construct(gboolean up, const char *ip_iface, in_addr_t addr, /*****************************************************************************/ +GBytes * +nm_firewall_nft_stdio_mlag(gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members, + const char *const *previous_members) +{ + nm_auto_str_buf NMStrBuf strbuf_table_name = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_32, FALSE); + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT(NM_UTILS_GET_NEXT_REALLOC_SIZE_1000, FALSE); + const char *table_name; + gsize i; + + if (NM_MORE_ASSERTS > 10 && active_members) { + /* No duplicates. We make certain assumptions here, and we don't + * want to check that there are no duplicates. The caller must take + * care of this. */ + for (i = 0; active_members[i]; i++) + nm_assert(!nm_strv_contains(&active_members[i + 1], -1, active_members[i])); + } + + /* If an interface gets renamed, we need to update the nft tables. Since one nft + * invocation is atomic, it is reasonable to drop the previous tables(s) at the + * same time when creating the new one. */ + for (; bond_ifnames_down && bond_ifnames_down[0]; bond_ifnames_down++) { + if (nm_streq(bond_ifname, bond_ifnames_down[0])) + continue; + table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifnames_down[0]); + _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, FALSE); + } + + table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifname); + + _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, up); + + if (up) { + nm_auto_str_buf NMStrBuf strbuf_1 = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + const gsize n_active_members = NM_PTRARRAY_LEN(active_members); + + if (!_nft_ifname_valid(bond_ifname)) { + /* We cannot meaningfully express this interface name. Ignore all chains + * and only create an empty table. */ + goto out; + } + + for (; previous_members && previous_members[0]; previous_members++) { + const char *previous_member = previous_members[0]; + const char *chain_name; + + /* The caller already ensures that the previous member is not part of the new + * active members. Avoid the overhead of checking, and assert against that. */ + nm_assert(!nm_strv_contains(active_members, n_active_members, previous_member)); + + if (!_nft_ifname_valid(previous_member)) + continue; + + chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", previous_member); + + /* We want atomically update our table, however, we don't want to delete + * and recreate it, because then the sets get lost (which we don't want). + * + * Instead, we only "add && flush" the table, which removes all rules from + * the chain. However, as our active-members change, we want to delete + * the obsolete chains too. + * + * nft has no way to delete all chains in a table, we have to name + * them one by one. So we keep track of active members that we had + * in the past, and which are now no longer in use. For those previous + * members we delete the chains (again, with the "add && delete" dance + * to avoid failure deleting a non-existing chain (in case our tracking + * is wrong or somebody else modified the table in the meantime). + * + * We need to track the previous members, because we don't want to first + * ask nft which chains exist. Doing that would be cumbersome as we would + * have to do one async program invocation and parse stdout. */ + _append(&strbuf, + "add chain netdev %s %s {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + chain_name, + previous_member); + _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name); + } + + /* OVS SLB rule 1 + * + * "Open vSwitch avoids packet duplication by accepting multicast and broadcast + * packets on only the active member, and dropping multicast and broadcast + * packets on all other members." + * + * primary is first member, we drop on all others */ + for (i = 0; i < n_active_members; i++) { + const char *active_member = active_members[i]; + const char *chain_name; + + if (!_nft_ifname_valid(active_member)) + continue; + + chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", active_member); + + _append(&strbuf, + "add chain netdev %s %s {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + chain_name, + active_member); + + if (i == 0) { + _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name); + continue; + } + + _append(&strbuf, + "add rule netdev %s %s pkttype {" + " broadcast, multicast " + "} counter drop", + table_name, + chain_name); + } + + /* OVS SLB rule 2 + * + * "Open vSwitch deals with this case by dropping packets received on any SLB + * bonded link that have a source MAC+VLAN that has been learned on any other + * port." + */ + _append(&strbuf, + "add set netdev %s macset-tagged {" + " typeof ether saddr . vlan id; flags timeout; " + "}", + table_name); + _append(&strbuf, + "add set netdev %s macset-untagged {" + " typeof ether saddr; flags timeout;" + "}", + table_name); + + _append(&strbuf, + "add chain netdev %s tx-snoop-source-mac {" + " type filter hook egress device %s priority filter; " + "}", + table_name, + bond_ifname); + _append(&strbuf, + "add rule netdev %s tx-snoop-source-mac set update ether saddr . vlan id" + " timeout 5s @macset-tagged counter return" + "", /* tagged */ + table_name); + _append(&strbuf, + "add rule netdev %s tx-snoop-source-mac set update ether saddr" + " timeout 5s @macset-untagged counter" + "", /* untagged*/ + table_name); + + _append(&strbuf, + "add chain netdev %s rx-drop-looped-packets {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + bond_ifname); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether saddr . vlan id" + " @macset-tagged counter drop", + table_name); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether type vlan counter return" + "", /* avoid looking up tagged packets in untagged table */ + table_name); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether saddr @macset-untagged" + " counter drop", + table_name); + } + +out: + return nm_str_buf_finalize_to_gbytes(&strbuf); +} + +/*****************************************************************************/ + struct _NMFirewallConfig { char *ip_iface; in_addr_t addr; diff --git a/src/core/nm-firewall-utils.h b/src/core/nm-firewall-utils.h index 9d883fea7b..ca138ccf78 100644 --- a/src/core/nm-firewall-utils.h +++ b/src/core/nm-firewall-utils.h @@ -35,4 +35,10 @@ void nm_firewall_nft_call(GBytes *stdin_buf, gboolean nm_firewall_nft_call_finish(GAsyncResult *result, GError **error); +GBytes *nm_firewall_nft_stdio_mlag(gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members, + const char *const *previous_members); + #endif /* __NM_FIREWALL_UTILS_H__ */ From 5450be31d9aca76a4f08f3f636c3bacc6ac2fbe0 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 14 Sep 2022 17:37:55 +0200 Subject: [PATCH 2/3] core: add NMBondManager to handle NTF rules for balance-slb (MLAG) --- Makefile.am | 2 + src/core/meson.build | 1 + src/core/nm-bond-manager.c | 967 +++++++++++++++++++++++++++++++++++++ src/core/nm-bond-manager.h | 32 ++ 4 files changed, 1002 insertions(+) create mode 100644 src/core/nm-bond-manager.c create mode 100644 src/core/nm-bond-manager.h diff --git a/Makefile.am b/Makefile.am index 6b9b82876b..13cadec6dc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2462,6 +2462,8 @@ src_core_libNetworkManagerBase_la_SOURCES = \ src/core/nm-l3cfg.h \ src/core/nm-ip-config.c \ src/core/nm-ip-config.h \ + src/core/nm-bond-manager.c \ + src/core/nm-bond-manager.h \ \ src/core/dhcp/nm-dhcp-client.c \ src/core/dhcp/nm-dhcp-client.h \ diff --git a/src/core/meson.build b/src/core/meson.build index f3359ad0f5..6f11595aa0 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -53,6 +53,7 @@ libNetworkManagerBase = static_library( 'nm-l3-ipv4ll.c', 'nm-l3-ipv6ll.c', 'nm-l3cfg.c', + 'nm-bond-manager.c', 'nm-ip-config.c', ), dependencies: [ diff --git a/src/core/nm-bond-manager.c b/src/core/nm-bond-manager.c new file mode 100644 index 0000000000..2d15b0b5a0 --- /dev/null +++ b/src/core/nm-bond-manager.c @@ -0,0 +1,967 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "src/core/nm-default-daemon.h" + +#include "nm-bond-manager.h" + +#include + +#include "NetworkManagerUtils.h" +#include "libnm-core-aux-intern/nm-libnm-core-utils.h" +#include "libnm-glib-aux/nm-str-buf.h" +#include "libnm-platform/nm-platform.h" +#include "libnm-platform/nmp-object.h" +#include "nm-firewall-utils.h" + +/*****************************************************************************/ + +typedef enum _nm_packed { + REGISTRATION_STATE_NONE, + REGISTRATION_STATE_UPPING, + REGISTRATION_STATE_UP, + REGISTRATION_STATE_DOWNING, +} RegistrationState; + +struct _NMBondManager { + NMPlatform *platform; + + NMBondManagerCallback callback; + gpointer user_data; + + /* This is only used for structured logging. */ + char *connection_uuid; + + GSource *reconfigure_on_idle_source; + + /* During _reconfigure_check() we remember all ifindexes that are part + * of the current SLB bond. This is used during _link_changed_cb() to + * figure out whether a change on the interface might be relevant to + * trigger a _reconfigure_check() on idle. */ + GHashTable *previous_ifindexes; + + /* We need to keep track of active members that we configured in NFT. + * That is, because on update we use "add && flush" to reset the table, + * however that leaves empty chains around. If we previously had an active + * member, a chain for it was created that we need to clean up. + * + * Before every NFT call we use this to generate the list of members that + * are to be cleaned up. Thereby also adding the new active-memebers to + * the list. When the NFT calls returns with success, we can prune the + * now deleted member/chain. */ + GHashTable *previous_members; + + GCancellable *cancellable; + + struct { + char *bond_ifname_curr; + char *bond_ifname_next; + const char **active_members_curr; + const char **active_members_next; + } dat; + + gulong link_changed_id; + int ifindex; + RegistrationState reg_state; + bool destroyed : 1; + + /* Whether we noticed some changes that require us to _reconfigure_check(). + * Note that while a NFT call is pending, we postpone the check. */ + bool reconfigure_check : 1; + + /* Whether a `nft` call is in progress. Usually this corresponds to + * having a cancellable, however, we may also cancel and clear the + * cancellable while the call is still in progress. */ + bool nft_in_progress : 1; + + /* Whether the last NFT invocation was good. If not, we may have + * an invalid state. Actually unused, so far because it's not + * clear what to do about failure to configure NFT (aside logging + * a warning). */ + bool nft_good : 1; + + /* The overall state. DEFAULT means that an update is pending. + * FALSE means that the last "nft" command failed. + * TRUE means that the last "nft" command was good. */ + NMOptionBool state : 3; +}; + +#define NM_IS_BOND_MANAGER(self) \ + ({ \ + const NMBondManager *_self = (self); \ + \ + (_self && NM_IS_PLATFORM(_self->platform)); \ + }) + +/*****************************************************************************/ + +static void _nft_call(NMBondManager *self, + gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members); + +static void _bond_manager_destroy(NMBondManager *self); + +static void _reconfigure_check(NMBondManager *self, gboolean reapply); + +/*****************************************************************************/ + +#define _NMLOG_DOMAIN LOGD_DEVICE +#define _NMLOG_PREFIX_NAME "mlag" +#define _NMLOG(level, ...) \ + G_STMT_START \ + { \ + const NMLogLevel _level = (level); \ + \ + if (nm_logging_enabled(_level, _NMLOG_DOMAIN)) { \ + NMBondManager *const _self = (self); \ + const char *_ifname = nm_platform_link_get_name(_self->platform, _self->ifindex); \ + char _sbuf[30]; \ + \ + _nm_log(_level, \ + _NMLOG_DOMAIN, \ + 0, \ + _ifname, \ + _self->connection_uuid, \ + "%s[" NM_HASH_OBFUSCATE_PTR_FMT ", %s]: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \ + _NMLOG_PREFIX_NAME, \ + NM_HASH_OBFUSCATE_PTR(_self), \ + (_ifname ?: nm_sprintf_buf(_sbuf, "(%d)", _self->ifindex)) \ + _NM_UTILS_MACRO_REST(__VA_ARGS__)); \ + } \ + } \ + G_STMT_END + +static const char * +_log_info(NMStrBuf *strbuf, + const char *bond_ifname, + const char *const *active_members, + const char *const *previous_members) +{ + gsize i; + + nm_str_buf_reset(strbuf); + + if (!bond_ifname) + nm_str_buf_append(strbuf, "(disabled)"); + else { + nm_str_buf_append_printf(strbuf, "(enabled, \"%s\"", bond_ifname); + + for (i = 0; active_members && active_members[i]; i++) { + if (i == 0) + nm_str_buf_append(strbuf, ", active-members=[ \""); + else + nm_str_buf_append(strbuf, "\", \""); + nm_str_buf_append(strbuf, active_members[i]); + } + if (i > 0) + nm_str_buf_append(strbuf, "\" ]"); + + for (i = 0; previous_members && previous_members[i]; i++) { + nm_assert(!nm_strv_contains(active_members, -1, previous_members[i])); + if (i == 0) + nm_str_buf_append(strbuf, ", previous-members=[ \""); + else + nm_str_buf_append(strbuf, "\", \""); + nm_str_buf_append(strbuf, previous_members[i]); + } + if (i > 0) + nm_str_buf_append(strbuf, "\" ]"); + + nm_str_buf_append(strbuf, ")"); + } + + return nm_str_buf_get_str(strbuf); +} + +/*****************************************************************************/ + +static gboolean +_nm_assert_self_(NMBondManager *self) +{ + nm_assert(self); + nm_assert(NM_IS_PLATFORM(self->platform)); + nm_assert(!self->cancellable || G_IS_CANCELLABLE(self->cancellable)); + nm_assert(!self->cancellable || !g_cancellable_is_cancelled(self->cancellable)); + nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next); + nm_assert(!self->cancellable || self->nft_in_progress); + nm_assert(!self->reconfigure_on_idle_source || self->reconfigure_check); + nm_assert(!self->nft_in_progress || !self->reconfigure_on_idle_source); + + nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr[0]); + nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next[0]); + + nm_assert(!self->destroyed || !self->dat.bond_ifname_next); + nm_assert(!self->destroyed + || NM_IN_SET((RegistrationState) self->reg_state, + REGISTRATION_STATE_UPPING, + REGISTRATION_STATE_DOWNING)); + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + nm_assert(!self->nft_in_progress); + nm_assert(!self->cancellable); + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + break; + case REGISTRATION_STATE_UPPING: + nm_assert(self->nft_in_progress); + nm_assert(self->dat.bond_ifname_curr); + break; + case REGISTRATION_STATE_UP: + nm_assert(!self->nft_in_progress); + nm_assert(!self->cancellable); + nm_assert(self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + break; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->nft_in_progress); + nm_assert(self->dat.bond_ifname_curr); + break; + default: + nm_assert_not_reached(); + break; + } + + return TRUE; +} + +#define _nm_assert_self(self) nm_assert(_nm_assert_self_(self)) + +/*****************************************************************************/ + +static void +_callback_invoke(NMBondManager *self, NMBondManagerEventType event_type) +{ + if (!self->callback) + return; + + self->callback(self, event_type, self->user_data); +} + +static void +_notify_state_change(NMBondManager *self) +{ + NMOptionBool state; + + if (self->nft_in_progress) + state = NM_OPTION_BOOL_DEFAULT; + else + state = !!self->nft_good; + + if (state == self->state) + return; + + self->state = state; + _callback_invoke(self, NM_BOND_MANAGER_EVENT_TYPE_STATE); +} + +/*****************************************************************************/ + +static void +_nft_call_cb(GObject *source, GAsyncResult *result, gpointer user_data) +{ + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + NMBondManager *self; + gpointer ptr_up; + gs_free const char **previous_members = NULL; + gs_free_error GError *error = NULL; + + nm_utils_user_data_unpack(user_data, &self, &ptr_up, &previous_members); + + _nm_assert_self(self); + + self->nft_in_progress = FALSE; + + nm_firewall_nft_call_finish(result, &error); + + if (!error) { + gsize i; + + /* On success, we can forget about our previous members that we successfully + * deleted. */ + if (!GPOINTER_TO_INT(ptr_up)) { + /* We successfully deleted the NFT table. Forget all previous members. */ + g_hash_table_remove_all(self->previous_members); + } else if (previous_members) { + /* These previous members are now forgotten for good. */ + for (i = 0; previous_members[i]; i++) + g_hash_table_remove(self->previous_members, previous_members[i]); + } + } else { + /* If all our NFT calls keep failing, we never actually prune entries from + * self->previous_members. That is a problem, however, under normal operation + * NFT calls should not continuously fail, and we would have a small fixed + * number of active-members. */ + } + + nm_clear_g_cancellable(&self->cancellable); + + if (nm_utils_error_is_cancelled(error)) { + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + case REGISTRATION_STATE_UP: + case REGISTRATION_STATE_DOWNING: + /* It is not expected that we cancel anything in this state. */ + nm_assert_not_reached(); + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(self->dat.bond_ifname_curr); + /* We cancelled while upping. We need to issue another down, + * to make sure the data is gone. */ + if (!self->dat.bond_ifname_next) { + /* There is no other name to configure. We just need to down + * the current one. */ + _LOGT("reconfigure: configuration cancelled, deconfigure %s", + self->dat.bond_ifname_curr); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + goto out; + } + /* There is already another configuration. UPPING again. */ + _LOGT("reconfigure: configuration cancelled, configure %s", + _log_info(&strbuf, + self->dat.bond_ifname_next, + self->dat.active_members_next, + NULL)); + _nft_call(self, + TRUE, + self->dat.bond_ifname_next, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + self->dat.active_members_next); + self->reg_state = REGISTRATION_STATE_UPPING; + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next); + self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next); + goto out; + } + nm_assert_not_reached(); + goto out; + } + + if (error) { + self->nft_good = FALSE; + } else { + /* Technically, if a previous downing failed, we cannot know that + * we were able to fix this bug a successful run now. That is, because + * if the interface got renamed, and the downing for the previous + * interface name failed, we leak that table and the success now doesn't + * fix that. + * + * That is a bug, but probably not severe because: + * - interfaces are not supposed to be renamed. + * - if this NFT command succeed, we expect that also the previous downings worked. + * + * The problem here is only that nft_good might lie and indicate + * no problem. However, when a downing fails, we anyway leak the table already + * and the bad thing happend. We cannot fix if `nft` command fails. + */ + self->nft_good = TRUE; + } + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + case REGISTRATION_STATE_UP: + /* Unexpected to get a callback completion in these states. */ + nm_assert_not_reached(); + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(!self->dat.bond_ifname_next); + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft configuration for balance-slb failed: %s", error->message); + } else + _LOGT("reconfigure: configuration completed"); + self->reg_state = REGISTRATION_STATE_UP; + goto out; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->dat.bond_ifname_curr); + if (!self->dat.bond_ifname_next) { + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft deconfiguration for balance-slb failed: %s", + error->message); + } else + _LOGT("reconfigure: deconfiguration completed"); + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->reg_state = REGISTRATION_STATE_NONE; + + if (self->destroyed) { + _bond_manager_destroy(self); + return; + } + + goto out; + } + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft deconfiguration failed before restart: %s", error->message); + } else + _LOGT("reconfigure: deconfiguration completed before restart"); + _nft_call(self, + TRUE, + self->dat.bond_ifname_next, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + self->dat.active_members_next); + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next); + self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next); + self->reg_state = REGISTRATION_STATE_UPPING; + goto out; + } + + nm_assert_not_reached(); + +out: + if (self->reconfigure_check) { + if (self->destroyed) + nm_assert_not_reached(); + else if (!self->nft_in_progress) { + nm_assert(!self->reconfigure_on_idle_source); + _reconfigure_check(self, FALSE); + } + } + + _notify_state_change(self); +} + +static void +_nft_call(NMBondManager *self, + gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members) +{ + gs_unref_bytes GBytes *stdin_buf = NULL; + gs_free const char *const *previous_members_strv = NULL; + + if (up) { + gs_unref_ptrarray GPtrArray *arr = NULL; + GHashTableIter iter; + const char *n; + gsize i; + + /* We need to track the active-members that we add, because, when we update the + * NFT table without the member from previously, we use "add && flush", which + * leaves empty chains for the previous members around. We need to cleanup those + * chains, hence the need to track which members we ever added. + * + * Before making an UP call, we add the newly configured active_members to the list + * of previous_members. All the while, passing a list of previous_members_strv + * which we currently no longer configure. + * + * Only when the call succeeds (in _nft_call_cb()), we will forget about previously added + * members. This is done by passing the list of members that we are forgetting now + * on to the callback below. */ + + /* Get the list of previous members that are no longer in the current + * active list. */ + g_hash_table_iter_init(&iter, self->previous_members); + while (g_hash_table_iter_next(&iter, (gpointer *) &n, NULL)) { + if (nm_strv_contains(active_members, -1, n)) + continue; + if (!arr) + arr = g_ptr_array_new(); + g_ptr_array_add(arr, (gpointer) n); + } + if (arr) { + nm_strv_sort((const char **) arr->pdata, arr->len); + previous_members_strv = nm_strv_dup_packed((const char *const *) arr->pdata, arr->len); + } + + /* The now active member also get tracked as previous members for the future. */ + if (active_members) { + for (i = 0; active_members[i]; i++) + g_hash_table_add(self->previous_members, g_strdup(active_members[i])); + } + } + + stdin_buf = nm_firewall_nft_stdio_mlag(up, + bond_ifname, + bond_ifnames_down, + active_members, + previous_members_strv); + + nm_clear_g_cancellable(&self->cancellable); + self->cancellable = g_cancellable_new(); + + nm_shutdown_wait_obj_register_cancellable(self->cancellable, "nft-mlag"); + + if (_LOGT_ENABLED()) { + if (up) { + nm_auto_str_buf NMStrBuf strbuf = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + + _LOGT("reconfigure: call nft: %s", + _log_info(&strbuf, bond_ifname, active_members, previous_members_strv)); + } else + _LOGT("reconfigure: call nft: disable on \"%s\"", bond_ifname); + } + + self->nft_in_progress = TRUE; + + if (self->reconfigure_check) + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + + nm_firewall_nft_call(stdin_buf, + self->cancellable, + _nft_call_cb, + nm_utils_user_data_pack(self, + GINT_TO_POINTER(up), + g_steal_pointer(&previous_members_strv))); +} + +/*****************************************************************************/ + +static void +_reconfigure_do(NMBondManager *self, + gboolean reapply, + const char *bond_ifname, + const char **active_members_take) +{ + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + gs_free const char **active_members = g_steal_pointer(&active_members_take); + + _nm_assert_self(self); + nm_assert(!active_members || bond_ifname); + nm_assert(!active_members || active_members[0]); + + /* The difficulty of all of this is "state". In particular, since we make the nft call + * async, we need to handle all the possible cases, how an update event can invalidate + * a currently pending call. */ + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_curr); + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + nm_assert(!self->cancellable); + nm_assert(!self->nft_in_progress); + + if (!bond_ifname) { + /* No configuration done. Nothing to do. */ + goto out; + } + + _LOGT("reconfigure: start configuring (%s)", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + self->dat.bond_ifname_curr = g_strdup(bond_ifname); + self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1); + _nft_call(self, TRUE, self->dat.bond_ifname_curr, NULL, self->dat.active_members_curr); + self->reg_state = REGISTRATION_STATE_UPPING; + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(self->nft_in_progress); + + /* We are UPPING, we cancel the pending operation and will + * handle the rest when the callback completes. */ + if (!bond_ifname) { + if (self->cancellable || self->dat.bond_ifname_next) + _LOGT("reconfigure: aborting configuring"); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + nm_clear_g_cancellable(&self->cancellable); + goto out; + } + if (!reapply && self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_curr) + && nm_strv_equal(active_members, self->dat.active_members_curr)) { + /* Nothing to do. We are already upping this setup. */ + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + goto out; + } + if (!reapply && !self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_next) + && nm_strv_equal(active_members, self->dat.active_members_next)) { + /* We already cancelled the current upping, and have scheduled another + * (identical) run. Nothing to do. */ + goto out; + } + _LOGT("reconfigure: abort configuring to configure %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + self->dat.bond_ifname_next = g_strdup(bond_ifname); + self->dat.active_members_next = nm_strv_dup_packed(active_members, -1); + nm_clear_g_cancellable(&self->cancellable); + goto out; + case REGISTRATION_STATE_UP: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + nm_assert(!self->cancellable); + nm_assert(!self->nft_in_progress); + + if (!bond_ifname) { + _LOGT("reconfigure: deconfigure to disable"); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + goto out; + } + if (!reapply && nm_streq0(bond_ifname, self->dat.bond_ifname_curr) + && nm_strv_equal(active_members, self->dat.active_members_curr)) { + /* Nothing to do. The current configuration is already active. */ + goto out; + } + _LOGT("reconfigure: configure, update to %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + _nft_call(self, + TRUE, + bond_ifname, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + active_members); + self->reg_state = REGISTRATION_STATE_UPPING; + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_strdup(bond_ifname); + self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1); + goto out; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(self->nft_in_progress); + + /* we are already DOWNING. It suffices to clear the scheduled "next" + * config and wait, and reset the "next" configuration. */ + if (nm_streq0(bond_ifname, self->dat.bond_ifname_next) + && nm_strv_equal(active_members, self->dat.active_members_next)) { + /* Nothing to do. */ + goto out; + } + _LOGT("reconfigure: deconfiguring and waiting for %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + if (bond_ifname) { + self->dat.bond_ifname_next = g_strdup(bond_ifname); + self->dat.active_members_next = nm_strv_dup_packed(active_members, -1); + } + goto out; + } + nm_assert_not_reached(); + +out: + _notify_state_change(self); +} + +static void +_reconfigure_check(NMBondManager *self, gboolean reapply) +{ + const NMPlatformLink *plink_ctrl; + const NMPlatformLink *plink_port; + const NMPlatformLnkBond *plnkbond_ctrl; + NMDedupMultiIter pliter; + const NMDedupMultiHeadEntry *pl_links_head_entry; + const char *active_members_lst_stack[16]; + gs_free const char **active_members_lst_heap = NULL; + const char **active_members_lst = active_members_lst_stack; + gsize active_members_alloc = G_N_ELEMENTS(active_members_lst_stack); + gsize active_members_n = 0; + gs_free const char **active_members_result = NULL; + const char *bond_ifname = NULL; + + _nm_assert_self(self); + nm_assert(!self->destroyed); + + self->reconfigure_check = FALSE; + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + + g_hash_table_remove_all(self->previous_ifindexes); + + plnkbond_ctrl = nm_platform_link_get_lnk_bond(self->platform, self->ifindex, &plink_ctrl); + + /* We only do bonding-slb MLAG handling if our ifindex is a bond with + * mode=balance-xor && xmit_hash_policy=vlan+srcmac. */ + if (!plnkbond_ctrl) + goto out; + if (!plink_ctrl) + goto out; + if (plink_ctrl->type != NM_LINK_TYPE_BOND) + goto out; + if (plnkbond_ctrl->mode != NM_BOND_MODE_XOR) + goto out; + if (plnkbond_ctrl->xmit_hash_policy != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC) + goto out; + + /* Find all the connected ports that are IFF_RUNNING. */ + pl_links_head_entry = nm_platform_lookup_obj_type(self->platform, NMP_OBJECT_TYPE_LINK); + nmp_cache_iter_for_each_link (&pliter, pl_links_head_entry, &plink_port) { + if (plink_port->master != self->ifindex) + continue; + if (!NM_FLAGS_HAS(plink_port->n_ifi_flags, IFF_RUNNING)) + continue; + + g_hash_table_add(self->previous_ifindexes, GINT_TO_POINTER(plink_port->ifindex)); + + if (active_members_n == active_members_alloc) { + active_members_alloc *= 2; + active_members_lst_heap = + g_renew(const char *, active_members_lst_heap, active_members_alloc); + if (active_members_lst == active_members_lst_stack) { + memcpy(active_members_lst_heap, + active_members_lst_stack, + sizeof(const char *) * active_members_n); + } + active_members_lst = active_members_lst_heap; + } + + active_members_lst[active_members_n++] = plink_port->name; + } + + if (active_members_n > 0) { + gsize i; + gsize j; + + /* We sort the active members by name */ + g_qsort_with_data(active_members_lst, + active_members_n, + sizeof(const char *), + nm_strcmp_p_with_data, + NULL); + + /* There really shouldn't be any duplicates. Nonetheless, check + * and drop them. They must be unique, because nm_firewall_nft_stdio_mlag() + * relies on that. */ + for (j = 1, i = 1; i < active_members_n; i++) { + if (nm_streq(active_members_lst[j - 1], active_members_lst[i])) { + /* Repeated. Skip. */ + continue; + } + if (j != i) + active_members_lst[j] = active_members_lst[i]; + j++; + } + active_members_n = j; + + active_members_result = g_new(const char *, active_members_n + 1u); + j = 0; + + if (self->dat.active_members_curr) { + /* We configured a list earlier. We want to preserve the sort order + * from before. Prefer entries that we already had, in their previous + * order. */ + for (i = 0; self->dat.active_members_curr[i]; i++) { + gssize idx; + + /* We cannot use binary search, because we steal the elements we found + * already. Hence this is O(n^2). We could use binary search if we would + * not modify active_members_lst, but then we would need to remember + * somehow which elements are already consumed. */ + idx = nm_strv_find_first(active_members_lst, + active_members_n, + self->dat.active_members_curr[i]); + if (idx >= 0) + active_members_result[j++] = g_steal_pointer(&active_members_lst[idx]); + } + } + + /* append the remaining entries, which are sorted by name. */ + for (i = 0; i < active_members_n; i++) { + if (active_members_lst[i]) + active_members_result[j++] = active_members_lst[i]; + } + + nm_assert(j == active_members_n); + active_members_result[j] = NULL; + } + + bond_ifname = plink_ctrl->name; + +out: + _reconfigure_do(self, reapply, bond_ifname, g_steal_pointer(&active_members_result)); +} + +static gboolean +_reconfigure_check_on_idle_cb(gpointer user_data) +{ + NMBondManager *self = user_data; + + nm_assert(!self->nft_in_progress); + _reconfigure_check(self, FALSE); + return G_SOURCE_CONTINUE; +} + +/*****************************************************************************/ + +static void +_link_changed_cb(NMPlatform *platform, + int obj_type_i, + int ifindex, + const NMPlatformLink *plink, + int change_type_i, + NMBondManager *self) +{ + if (self->reconfigure_check) { + /* Recheck already scheduled. */ + return; + } + + if (self->destroyed) { + /* We should not get another event at this point. Anyway, ignore. */ + return; + } + + if (ifindex == self->ifindex) + goto schedule; + + if (plink->master == self->ifindex) + goto schedule; + + if (g_hash_table_contains(self->previous_ifindexes, GINT_TO_POINTER(ifindex))) + goto schedule; + + /* This event is not relevant. Skip. */ + return; + +schedule: + self->reconfigure_check = TRUE; + if (!self->nft_in_progress) { + self->reconfigure_on_idle_source = + nm_g_idle_add_source(_reconfigure_check_on_idle_cb, self); + } +} + +/*****************************************************************************/ + +void +nm_bond_manager_reapply(NMBondManager *self) +{ + _reconfigure_check(self, TRUE); +} + +/*****************************************************************************/ + +int +nm_bond_manager_get_ifindex(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->ifindex; +} + +const char * +nm_bond_manager_get_connection_uuid(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->connection_uuid; +} + +NMOptionBool +nm_bond_manager_get_state(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->state; +} + +/*****************************************************************************/ + +NMBondManager * +nm_bond_manager_new(struct _NMPlatform *platform, + int ifindex, + const char *connection_uuid, + NMBondManagerCallback callback, + gpointer user_data) +{ + NMBondManager *self; + + nm_assert(NM_IS_PLATFORM(platform)); + nm_assert(ifindex > 0); + + self = g_slice_new(NMBondManager); + *self = (NMBondManager){ + .platform = g_object_ref(platform), + .ifindex = ifindex, + .reg_state = REGISTRATION_STATE_NONE, + .destroyed = FALSE, + .nft_good = TRUE, + .callback = callback, + .user_data = user_data, + .previous_ifindexes = g_hash_table_new(nm_direct_hash, NULL), + .previous_members = g_hash_table_new_full(nm_str_hash, g_str_equal, g_free, NULL), + .connection_uuid = g_strdup(connection_uuid), + .state = NM_OPTION_BOOL_DEFAULT, + }; + + self->link_changed_id = g_signal_connect(self->platform, + NM_PLATFORM_SIGNAL_LINK_CHANGED, + G_CALLBACK(_link_changed_cb), + self); + + _LOGT("new balance-slb (MLAG) manager for interface %d", self->ifindex); + + _reconfigure_check(self, TRUE); + + return self; +} + +void +nm_bond_manager_destroy(NMBondManager *self) +{ + g_return_if_fail(self); + g_return_if_fail(!self->destroyed); + + self->destroyed = TRUE; + + self->callback = NULL; + self->user_data = NULL; + + nm_clear_g_signal_handler(self->platform, &self->link_changed_id); + + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + self->reconfigure_check = FALSE; + + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + break; + case REGISTRATION_STATE_UPPING: + /* We still have some nfts registered. We need to wrap them up. */ + _LOGT("destroying but deconfigure pending configuration first"); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + nm_clear_g_cancellable(&self->cancellable); + return; + case REGISTRATION_STATE_UP: + _LOGT("destroying but deconfigure first"); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + return; + case REGISTRATION_STATE_DOWNING: + _LOGT("destroying but wait for deconfiguring"); + return; + } + + _bond_manager_destroy(self); +} + +static void +_bond_manager_destroy(NMBondManager *self) +{ + _LOGT("destroyed"); + + nm_assert(self); + nm_assert(self->destroyed); + nm_assert(self->reg_state == REGISTRATION_STATE_NONE); + nm_assert(self->link_changed_id == 0); + nm_assert(!self->cancellable); + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_curr); + nm_assert(!self->reconfigure_on_idle_source); + + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + + g_object_unref(self->platform); + g_hash_table_unref(self->previous_ifindexes); + g_hash_table_unref(self->previous_members); + g_free(self->connection_uuid); + nm_g_slice_free(self); +} diff --git a/src/core/nm-bond-manager.h b/src/core/nm-bond-manager.h new file mode 100644 index 0000000000..92a89f0b92 --- /dev/null +++ b/src/core/nm-bond-manager.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#ifndef __NM_BOND_MANAGER_H__ +#define __NM_BOND_MANAGER_H__ + +typedef struct _NMBondManager NMBondManager; + +struct _NMPlatform; + +typedef enum { + NM_BOND_MANAGER_EVENT_TYPE_STATE, +} NMBondManagerEventType; + +typedef void (*NMBondManagerCallback)(NMBondManager *self, + NMBondManagerEventType event_type, + gpointer user_data); + +NMBondManager *nm_bond_manager_new(struct _NMPlatform *platform, + int ifindex, + const char *connection_uuid, + NMBondManagerCallback callback, + gpointer user_data); + +void nm_bond_manager_reapply(NMBondManager *self); + +void nm_bond_manager_destroy(NMBondManager *self); + +int nm_bond_manager_get_ifindex(NMBondManager *self); +const char *nm_bond_manager_get_connection_uuid(NMBondManager *self); +NMOptionBool nm_bond_manager_get_state(NMBondManager *self); + +#endif /* __NM_BOND_MANAGER_H__ */ From 22f670687a5b91784bf04cedaf0a7f11c4eb2dd5 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Sep 2022 13:32:25 +0200 Subject: [PATCH 3/3] libnm,core: support "bond.balance-slb" option --- NEWS | 4 + src/core/devices/nm-device-bond.c | 123 ++++++++++++++++++++++-- src/libnm-core-impl/nm-setting-bond.c | 40 ++++++++ src/libnm-core-public/nm-setting-bond.h | 1 + 4 files changed, 162 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index aa2eb464b4..4bd63ab496 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,10 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE! in `nmcli connection $operator uuid $uuid`. * nmtui now supports editing Wi-Fi WPA-Enterprise, Ethernet with 802.1X authentication and MACsec connection profiles. +* bond: add "balance-slb" option which implements source load balancing + with "balance-xor" mode and "vlan-srcmac" xmit_hash_policy. In this + mode, NetworkManager configures nftables to prevent loops in the + switch. ============================================= NetworkManager-1.40 diff --git a/src/core/devices/nm-device-bond.c b/src/core/devices/nm-device-bond.c index dc5e1d5c7e..72ede7c19c 100644 --- a/src/core/devices/nm-device-bond.c +++ b/src/core/devices/nm-device-bond.c @@ -20,6 +20,7 @@ #include "libnm-core-intern/nm-core-internal.h" #include "nm-manager.h" #include "nm-setting-bond-port.h" +#include "nm-bond-manager.h" #define _NMLOG_DEVICE_TYPE NMDeviceBond #include "nm-device-logging.h" @@ -59,7 +60,8 @@ /*****************************************************************************/ struct _NMDeviceBond { - NMDevice parent; + NMDevice parent; + NMBondManager *bond_manager; }; struct _NMDeviceBondClass { @@ -178,7 +180,9 @@ update_connection(NMDevice *device, NMConnection *connection) gs_free char *value = NULL; char *p; - if (NM_IN_STRSET(option, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE)) + if (NM_IN_STRSET(option, + NM_SETTING_BOND_OPTION_ACTIVE_SLAVE, + NM_SETTING_BOND_OPTION_BALANCE_SLB)) continue; value = @@ -460,10 +464,97 @@ _platform_lnk_bond_init_from_setting(NMSettingBond *s_bond, NMPlatformLnkBond *p props->tlb_dynamic_lb_has = NM_IN_SET(props->mode, NM_BOND_MODE_TLB, NM_BOND_MODE_ALB); } +static void +_balance_slb_cb(NMBondManager *bond_manager, NMBondManagerEventType event_type, gpointer user_data) +{ + NMDevice *device = user_data; + NMDeviceBond *self = NM_DEVICE_BOND(device); + + nm_assert(NM_IS_DEVICE_BOND(self)); + nm_assert(self->bond_manager == bond_manager); + + switch (event_type) { + case NM_BOND_MANAGER_EVENT_TYPE_STATE: + switch (nm_bond_manager_get_state(bond_manager)) { + case NM_OPTION_BOOL_FALSE: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED) { + _LOGD(LOGD_BOND, "balance-slb: failed"); + nm_device_state_changed(device, + NM_DEVICE_STATE_FAILED, + NM_DEVICE_STATE_REASON_CONFIG_FAILED); + } + return; + case NM_OPTION_BOOL_TRUE: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED + && nm_device_devip_get_state(device, AF_UNSPEC) <= NM_DEVICE_IP_STATE_PENDING) { + nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_READY, NULL); + } + return; + case NM_OPTION_BOOL_DEFAULT: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED + && nm_device_devip_get_state(device, AF_UNSPEC) == NM_DEVICE_IP_STATE_READY) { + /* We are again busy. We can also go back to "pending" from "ready". + * If ip-config state is not yet complete, this will further delay it. + * Otherwise, it should have no effect. */ + nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL); + } + return; + } + nm_assert_not_reached(); + return; + } + + nm_assert_not_reached(); +} + +static void +_balance_slb_setup(NMDeviceBond *self, NMConnection *connection) +{ + int ifindex = nm_device_get_ifindex(NM_DEVICE(self)); + gboolean balance_slb = FALSE; + const char *uuid; + NMSettingBond *s_bond; + + if (ifindex > 0 && connection && (s_bond = nm_connection_get_setting_bond(connection))) + balance_slb = _v_intbool(s_bond, NM_SETTING_BOND_OPTION_BALANCE_SLB); + + if (!balance_slb) { + if (nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy)) { + _LOGD(LOGD_BOND, "balance-slb: stopped"); + nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_NONE, NULL); + } + return; + } + + uuid = nm_connection_get_uuid(connection); + + if (self->bond_manager) { + if (nm_bond_manager_get_ifindex(self->bond_manager) == ifindex + && nm_streq0(nm_bond_manager_get_connection_uuid(self->bond_manager), uuid)) { + _LOGD(LOGD_BOND, "balance-slb: reapply"); + nm_bond_manager_reapply(self->bond_manager); + return; + } + nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy); + _LOGD(LOGD_BOND, "balance-slb: restart"); + } + + _LOGD(LOGD_BOND, "balance-slb: start"); + if (nm_device_devip_get_state(NM_DEVICE(self), AF_UNSPEC) < NM_DEVICE_IP_STATE_PENDING) + nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL); + self->bond_manager = nm_bond_manager_new(nm_device_get_platform(NM_DEVICE(self)), + ifindex, + uuid, + _balance_slb_cb, + self); + nm_assert(nm_bond_manager_get_state(self->bond_manager) == NM_OPTION_BOOL_DEFAULT); +} + static NMActStageReturn act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason) { - NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; + NMDeviceBond *self = NM_DEVICE_BOND(device); + NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; NMConnection *connection; NMSettingBond *s_bond; NMPlatformLnkBond props; @@ -476,6 +567,14 @@ act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason) s_bond = nm_connection_get_setting_bond(connection); g_return_val_if_fail(s_bond, NM_ACT_STAGE_RETURN_FAILURE); + if (nm_device_sys_iface_state_is_external(device)) + return NM_ACT_STAGE_RETURN_SUCCESS; + + _balance_slb_setup(self, connection); + + if (nm_device_sys_iface_state_is_external_or_assume(device)) + return NM_ACT_STAGE_RETURN_SUCCESS; + _platform_lnk_bond_init_from_setting(s_bond, &props); /* Interface must be down to set bond options */ @@ -684,7 +783,7 @@ can_reapply_change(NMDevice *device, const char *name = *option_list; /* We support changes to these */ - if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL)) + if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL, NM_SETTING_BOND_OPTION_BALANCE_SLB)) continue; /* Reject any other changes */ @@ -730,6 +829,16 @@ reapply_connection(NMDevice *device, NMConnection *con_old, NMConnection *con_ne set_bond_arp_ip_targets(device, s_bond); set_bond_attrs_or_default(device, s_bond, NM_MAKE_STRV(OPTIONS_REAPPLY_SUBSET)); + + _balance_slb_setup(self, con_new); +} + +static void +deactivate(NMDevice *device) +{ + NMDeviceBond *self = NM_DEVICE_BOND(device); + + _balance_slb_setup(self, NULL); } /*****************************************************************************/ @@ -768,13 +877,15 @@ nm_device_bond_class_init(NMDeviceBondClass *klass) device_class->update_connection = update_connection; device_class->master_update_slave_connection = controller_update_port_connection; - device_class->create_and_realize = create_and_realize; - device_class->act_stage1_prepare = act_stage1_prepare; + device_class->create_and_realize = create_and_realize; + device_class->act_stage1_prepare = act_stage1_prepare; + device_class->act_stage1_prepare_also_for_external_or_assume = TRUE; device_class->get_configured_mtu = nm_device_get_configured_mtu_for_wired; device_class->attach_port = attach_port; device_class->detach_port = detach_port; device_class->can_reapply_change = can_reapply_change; device_class->reapply_connection = reapply_connection; + device_class->deactivate = deactivate; } /*****************************************************************************/ diff --git a/src/libnm-core-impl/nm-setting-bond.c b/src/libnm-core-impl/nm-setting-bond.c index a7f64393b9..b03cc455a7 100644 --- a/src/libnm-core-impl/nm-setting-bond.c +++ b/src/libnm-core-impl/nm-setting-bond.c @@ -70,6 +70,7 @@ static const char *const valid_options_lst[] = { NM_SETTING_BOND_OPTION_ARP_INTERVAL, NM_SETTING_BOND_OPTION_ARP_IP_TARGET, NM_SETTING_BOND_OPTION_ARP_VALIDATE, + NM_SETTING_BOND_OPTION_BALANCE_SLB, NM_SETTING_BOND_OPTION_PRIMARY, NM_SETTING_BOND_OPTION_PRIMARY_RESELECT, NM_SETTING_BOND_OPTION_FAIL_OVER_MAC, @@ -195,6 +196,7 @@ static NM_UTILS_STRING_TABLE_LOOKUP_STRUCT_DEFINE( {NM_SETTING_BOND_OPTION_ARP_IP_TARGET, {"", NM_BOND_OPTION_TYPE_IP}}, {NM_SETTING_BOND_OPTION_ARP_VALIDATE, {"none", NM_BOND_OPTION_TYPE_BOTH, 0, 6, _option_default_strv_arp_validate}}, + {NM_SETTING_BOND_OPTION_BALANCE_SLB, {"0", NM_BOND_OPTION_TYPE_INT, 0, 1}}, {NM_SETTING_BOND_OPTION_DOWNDELAY, {"0", NM_BOND_OPTION_TYPE_INT, 0, G_MAXINT}}, {NM_SETTING_BOND_OPTION_FAIL_OVER_MAC, {"none", NM_BOND_OPTION_TYPE_BOTH, 0, 2, _option_default_strv_fail_over_mac}}, @@ -344,6 +346,17 @@ _bond_get_option_normalized(NMSettingBond *self, const char *option, gboolean ge value = _bond_get_option(self, NM_SETTING_BOND_OPTION_PRIMARY); if (!value) value = _bond_get_option(self, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE); + } else if (nm_streq(option, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY)) { + if (_nm_utils_ascii_str_to_int64( + _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB), + 10, + 0, + 1, + -1) + == 1) { + /* balance-slb implies vlan+srcmac */ + return "5"; + } } else value = _bond_get_option(self, option); @@ -840,6 +853,7 @@ verify(NMSetting *setting, NMConnection *connection, GError **error) const char *arp_ip_target = NULL; const char *lacp_rate; const char *primary; + const char *s; NMBondMode bond_mode; guint i; const NMUtilsNamedValue *n; @@ -1067,6 +1081,32 @@ verify(NMSetting *setting, NMConnection *connection, GError **error) return FALSE; } + s = _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB); + if (s && _atoi(s) > 0) { + if (bond_mode != NM_BOND_MODE_XOR) { + g_set_error(error, + NM_CONNECTION_ERROR, + NM_CONNECTION_ERROR_INVALID_PROPERTY, + _("%s requires bond mode \"%s\""), + NM_SETTING_BOND_OPTION_BALANCE_SLB, + "balance-xor"); + g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS); + return FALSE; + } + s = _bond_get_option(self, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY); + if (s + && _nm_setting_bond_xmit_hash_policy_from_string(s) + != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC) { + g_set_error(error, + NM_CONNECTION_ERROR, + NM_CONNECTION_ERROR_INVALID_PROPERTY, + _("%s requires xmit_hash_policy \"vlan+srcmac\""), + NM_SETTING_BOND_OPTION_BALANCE_SLB); + g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS); + return FALSE; + } + } + if (!_nm_connection_verify_required_interface_name(connection, error)) return FALSE; diff --git a/src/libnm-core-public/nm-setting-bond.h b/src/libnm-core-public/nm-setting-bond.h index ed44abbff1..10d703bcaa 100644 --- a/src/libnm-core-public/nm-setting-bond.h +++ b/src/libnm-core-public/nm-setting-bond.h @@ -37,6 +37,7 @@ G_BEGIN_DECLS #define NM_SETTING_BOND_OPTION_ARP_IP_TARGET "arp_ip_target" #define NM_SETTING_BOND_OPTION_ARP_VALIDATE "arp_validate" #define NM_SETTING_BOND_OPTION_PRIMARY "primary" +#define NM_SETTING_BOND_OPTION_BALANCE_SLB "balance-slb" #define NM_SETTING_BOND_OPTION_PRIMARY_RESELECT "primary_reselect" #define NM_SETTING_BOND_OPTION_FAIL_OVER_MAC "fail_over_mac" #define NM_SETTING_BOND_OPTION_USE_CARRIER "use_carrier"