From 23cbce4bc9c70fc33d3413fc1b9a5f3303498036 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Wed, 18 Apr 2018 15:21:19 +0200 Subject: [PATCH] Squashed 'shared/n-acd/' content from commit a68b55992 git-subtree-dir: shared/n-acd git-subtree-split: a68b55992dd7b38bdb9dbbdba4a9284ff2c2cce3 --- .editorconfig | 16 + .gitmodules | 6 + .travis.yml | 18 + COPYING | 19 + LICENSE | 201 +++++++ README | 50 ++ meson.build | 19 + src/libnacd.sym | 13 + src/meson.build | 76 +++ src/n-acd.c | 1240 +++++++++++++++++++++++++++++++++++++++++ src/n-acd.h | 94 ++++ src/test-api.c | 84 +++ src/test-basic.c | 13 + src/test-loopback.c | 66 +++ src/test-twice.c | 97 ++++ src/test-unplug.c | 84 +++ src/test-unused.c | 63 +++ src/test.h | 97 ++++ subprojects/c-list | 1 + subprojects/c-siphash | 1 + 20 files changed, 2258 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitmodules create mode 100644 .travis.yml create mode 100644 COPYING create mode 100644 LICENSE create mode 100644 README create mode 100644 meson.build create mode 100644 src/libnacd.sym create mode 100644 src/meson.build create mode 100644 src/n-acd.c create mode 100644 src/n-acd.h create mode 100644 src/test-api.c create mode 100644 src/test-basic.c create mode 100644 src/test-loopback.c create mode 100644 src/test-twice.c create mode 100644 src/test-unplug.c create mode 100644 src/test-unused.c create mode 100644 src/test.h create mode 160000 subprojects/c-list create mode 160000 subprojects/c-siphash diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000..b41176962d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,16 @@ +# http://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 + +# match config files, set indent to spaces with width of eight +[*.{c,h}] +indent_style = space +indent_size = 8 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..ec8b866d2f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "subprojects/c-list"] + path = subprojects/c-list + url = https://github.com/c-util/c-list.git +[submodule "subprojects/c-siphash"] + path = subprojects/c-siphash + url = https://github.com/c-util/c-siphash.git diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..ed0bcf38c4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,18 @@ +dist: trusty +sudo: required +os: linux +language: c +compiler: + - gcc + - clang + +install: + - curl -L "https://github.com/ninja-build/ninja/releases/download/v1.7.2/ninja-linux.zip" -o "ninja-linux.zip" + - sudo unzip "ninja-linux.zip" -d "/usr/local/bin" + - sudo chmod 755 "/usr/local/bin/ninja" + - pip3 install meson + +script: + - meson "build" + - ninja -C "build" + - sudo MESON_TESTTHREADS=64 ninja -C "build" test diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000..81c0566b88 --- /dev/null +++ b/COPYING @@ -0,0 +1,19 @@ +LICENSE: + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +COPYRIGHT: (ordered alphabetically) + Copyright (C) 2015-2017 Red Hat, Inc. + +AUTHORS: (ordered alphabetically) + David Herrmann + Tom Gundersen diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..5d501a7284 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright {yyyy} {name of copyright owner} + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README b/README new file mode 100644 index 0000000000..4077cba05e --- /dev/null +++ b/README @@ -0,0 +1,50 @@ +n-acd - IPv4 Address Conflict Detection + +ABOUT: + The n-acd project implements the IPv4 Address Conflict Detection + standard as defined in RFC-5227. The state machine is implemented in a + shared library and provides a stable ISO-C11 API. The implementation is + linux-only and relies heavily on the API behavior of recent linux + kernel releases. + +DETAILS: + https://github.com/nettools/n-acd/wiki + +BUG REPORTS: + https://github.com/nettools/n-acd/issues + +GIT: + git@github.com:nettools/n-acd.git + https://github.com/nettools/n-acd.git + +GITWEB: + https://github.com/nettools/n-acd + +LICENSE: + Apache Software License 2.0 (LICENSE) + See COPYING for details. + +REQUIREMENTS: + The requirements for n-acd are: + + Linux kernel >= 3.0 + libc (e.g., glibc >= 2.16) + + At build-time, the following software is required: + + meson >= 0.41 + pkg-config >= 0.29 + +INSTALL: + The meson build-system is used for n-acd. Contact upstream + documentation for detailed help. In most situations the following + commands are sufficient to build and install n-acd from source: + + $ mkdir build + $ cd build + $ meson setup . .. + $ ninja + $ ninja test + # ninja install + + No custom configuration options are available. diff --git a/meson.build b/meson.build new file mode 100644 index 0000000000..da923c288d --- /dev/null +++ b/meson.build @@ -0,0 +1,19 @@ +project('n-acd', + 'c', + version: '1', + license: 'Apache', + default_options: [ + 'buildtype=release', + 'c_std=c11', + ]) + +add_project_arguments('-D_GNU_SOURCE', language: 'c') +mod_pkgconfig = import('pkgconfig') + +sub_clist = subproject('c-list') +sub_csiphash = subproject('c-siphash') + +dep_clist = sub_clist.get_variable('libclist_dep') +dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep') + +subdir('src') diff --git a/src/libnacd.sym b/src/libnacd.sym new file mode 100644 index 0000000000..c9bd487533 --- /dev/null +++ b/src/libnacd.sym @@ -0,0 +1,13 @@ +LIBNACD_1 { +global: + n_acd_new; + n_acd_free; + n_acd_get_fd; + n_acd_dispatch; + n_acd_pop_event; + n_acd_start; + n_acd_stop; + n_acd_announce; +local: + *; +}; diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 0000000000..ba09d1323b --- /dev/null +++ b/src/meson.build @@ -0,0 +1,76 @@ +# +# target: libnacd.so +# We build both, a static and a shared library. We want our tests to get access +# to internals, so we link them statically. +# + +libnacd_private = static_library('nacd-private', + ['n-acd.c'], + c_args: [ + '-fvisibility=hidden', + '-fno-common' + ], + dependencies: [ + dep_clist, + dep_csiphash, + ], + pic: true) +install_headers('n-acd.h') +libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym') +libnacd_shared = shared_library('nacd', + dependencies: dep_csiphash, + objects: libnacd_private.extract_all_objects(), + install: true, + soversion: 0, + link_depends: libnacd_symfile, + link_args: [ + '-Wl,--no-undefined', + '-Wl,--version-script=@0@'.format(libnacd_symfile) + ]) +mod_pkgconfig.generate(libraries: libnacd_shared, + version: meson.project_version(), + name: 'libnacd', + filebase: 'libnacd', + description: 'IPv4 Address Conflict Detection') + +# +# target: test-api +# The test-api program explicitly links against the shared library, since it +# tests for symbol visibility. +# + +test_api = executable('test-api', + ['test-api.c'], + link_with: libnacd_shared) +test('API Symbol Visibility', test_api) + +# +# target: test-* +# All other tests are listed here. They link against the static library, so +# they can access internals for verification. +# + +test_basic = executable('test-basic', + ['test-basic.c'], + link_with: libnacd_private) +test('Basic API Behavior', test_basic) + +test_loopback = executable('test-loopback', + ['test-loopback.c'], + link_with: libnacd_private) +test('Echo Suppression via Loopback', test_loopback) + +test_twice = executable('test-twice', + ['test-twice.c'], + link_with: libnacd_private) +test('Two ACD in Parallel', test_twice) + +test_unplug = executable('test-unplug', + ['test-unplug.c'], + link_with: libnacd_private) +test('Async Interface Hotplug', test_unplug) + +test_unused = executable('test-unsed', + ['test-unused.c'], + link_with: libnacd_private) +test('Unconflicted ACD', test_unused) diff --git a/src/n-acd.c b/src/n-acd.c new file mode 100644 index 0000000000..266e5d6f2a --- /dev/null +++ b/src/n-acd.c @@ -0,0 +1,1240 @@ +/* + * IPv4 Address Conflict Detection + * + * This implements the main n-acd API. It is built around an epoll-fd to + * encapsulate a timerfd+socket. The n-acd context has quite straightforward + * lifetime rules. The parameters must be set when the engine is started, and + * they can only be changed by stopping and restartding the engine. The engine + * is started on demand and stopped when no longer needed. + * During the entire lifetime the context can be dispatched. That is, the + * dispatcher does not have to be aware of the context state. After each call + * to dispatch(), the caller must pop all pending events until -EAGAIN is + * returned. + * + * If a conflict is detected, the ACD engine reports to the caller and stops + * the engine. The caller can now modify parameters and restart the engine, if + * required. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "n-acd.h" + +#define _public_ __attribute__((__visibility__("default"))) + +/* + * These parameters and timing intervals specified in RFC-5227. The original + * values are: + * + * PROBE_NUM 3 + * PROBE_WAIT 1s + * PROBE_MIN 1s + * PROBE_MAX 3s + * ANNOUNCE_NUM 3 + * ANNOUNCE_WAIT 2s + * ANNOUNCE_INTERVAL 2s + * MAX_CONFLICTS 10 + * RATE_LIMIT_INTERVAL 60s + * DEFEND_INTERVAL 10s + * + * If we assume a best-case and worst-case scenario for non-conflicted runs, we + * end up with a runtime between 4s and 9s to finish the probe. Then it still + * takes a fixed 4s to finish the announcements. + * + * RFC 5227 section 1.1: + * [...] (Note that the values listed here are fixed constants; they are + * not intended to be modifiable by implementers, operators, or end users. + * These constants are given symbolic names here to facilitate the writing + * of future standards that may want to reference this document with + * different values for these named constants; however, at the present time + * no such future standards exist.) [...] + * + * Unfortunately, no-one ever stepped up to write a "future standard" to revise + * the timings. A 9s timeout for successful link setups is not acceptable today. + * Hence, we will just go forward and ignore the proposed values. On both + * wired and wireless local links round-trip latencies of below 3ms are common, + * while latencies above 10ms are rarely seen. We require the caller to set a + * timeout multiplier, where 1 corresponds to a total probe time of 0.5 ms and + * 1.0 ms. On modern networks a multiplier of about 100 should be a reasonable + * default. To comply with the RFC select a multiplier of 9000. + */ +#define N_ACD_RFC_PROBE_NUM (3) +#define N_ACD_RFC_PROBE_WAIT_USEC (UINT64_C(111)) /* 111us */ +#define N_ACD_RFC_PROBE_MIN_USEC (UINT64_C(111)) /* 111us */ +#define N_ACD_RFC_PROBE_MAX_USEC (UINT64_C(333)) /* 333us */ +#define N_ACD_RFC_ANNOUNCE_NUM (3) +#define N_ACD_RFC_ANNOUNCE_WAIT_USEC (UINT64_C(222)) /* 222us */ +#define N_ACD_RFC_ANNOUNCE_INTERVAL_USEC (UINT64_C(222)) /* 222us */ +#define N_ACD_RFC_MAX_CONFLICTS (10) +#define N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC (UINT64_C(60000000)) /* 60s */ +#define N_ACD_RFC_DEFEND_INTERVAL_USEC (UINT64_C(10000000)) /* 10s */ + +/* + * Fake ENETDOWN error-code. We use this as replacement for known EFOOBAR error + * codes. It is explicitly chosen to be outside the known error-code range. + * Whenever we are deep down in a call-stack and notice a ENETDOWN error, we + * return this instead. It is caught by the top-level dispatcher and then + * properly handled. + * This avoids gracefully handling ENETDOWN in call-stacks, but then continuing + * with some work in the callers without noticing the soft failure. + */ +#define N_ACD_E_DOWN (INT_MAX) + +#define TIME_INFINITY ((uint64_t) -1) + +enum { + N_ACD_EPOLL_TIMER, + N_ACD_EPOLL_SOCKET, +}; + +enum { + N_ACD_STATE_INIT, + N_ACD_STATE_PROBING, + N_ACD_STATE_CONFIGURING, + N_ACD_STATE_ANNOUNCING, +}; + +typedef struct NAcdEventNode { + NAcdEvent event; + uint8_t sender[ETH_ALEN]; + CList link; +} NAcdEventNode; + +struct NAcd { + /* context */ + unsigned int seed; + int fd_epoll; + int fd_timer; + + /* configuration */ + NAcdConfig config; + uint8_t mac[ETH_ALEN]; + uint64_t timeout_multiplier; + + /* runtime */ + int fd_socket; + unsigned int state; + unsigned int n_iteration; + unsigned int n_conflicts; + unsigned int defend; + uint64_t last_defend; + uint64_t last_conflict; + + /* pending events */ + CList events; + NAcdEventNode *current; +}; + +static int n_acd_errno(void) { + /* + * Compilers continuously warn about uninitialized variables since they + * cannot deduce that `return -errno;` will always be negative. This + * small wrapper makes sure compilers figure that out. Use it as + * replacement for `errno` read access. Yes, it generates worse code, + * but only marginally and only affects slow-paths. + */ + return abs(errno) ? : EIO; +} + +static int n_acd_event_node_new(NAcdEventNode **nodep, unsigned int event) { + NAcdEventNode *node; + + node = calloc(1, sizeof(*node)); + if (!node) + return -ENOMEM; + + node->event.event = event; + node->link = (CList)C_LIST_INIT(node->link); + + *nodep = node; + + return 0; +} + +static NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) { + if (!node) + return NULL; + + c_list_unlink(&node->link); + free(node); + + return NULL; +} + +static int n_acd_get_random(unsigned int *random) { + uint8_t hash_seed[] = { 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a, 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1 }; + CSipHash hash = C_SIPHASH_NULL; + struct timespec ts; + const uint8_t *p; + int r; + + /* + * We need random jitter for all timeouts when handling ARP probes. Use + * AT_RANDOM to get a seed for rand_r(3p), if available (should always + * be available on linux). See the time-out scheduler for details. + * Additionally, we include the current time in the seed. This avoids + * using the same jitter in case you run multiple ACD engines in the + * same process. Lastly, the seed is hashed with SipHash24 to avoid + * exposing the value of AT_RANDOM on the network. + */ + c_siphash_init(&hash, hash_seed); + + p = (const uint8_t *)getauxval(AT_RANDOM); + if (p) + c_siphash_append(&hash, p, 16); + + r = clock_gettime(CLOCK_BOOTTIME, &ts); + if (r < 0) + return -n_acd_errno(); + + c_siphash_append(&hash, (const uint8_t *)&ts.tv_sec, sizeof(ts.tv_sec)); + c_siphash_append(&hash, (const uint8_t *)&ts.tv_nsec, sizeof(ts.tv_nsec)); + + *random = c_siphash_finalize(&hash); + return 0; +} + +static void n_acd_reset(NAcd *acd) { + acd->state = N_ACD_STATE_INIT; + acd->defend = N_ACD_DEFEND_NEVER; + acd->n_iteration = 0; + acd->last_defend = 0; + timerfd_settime(acd->fd_timer, 0, &(struct itimerspec){}, NULL); + + if (acd->fd_socket >= 0) { + assert(acd->fd_epoll >= 0); + epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_socket, NULL); + close(acd->fd_socket); + acd->fd_socket = -1; + } +} + +/** + * n_acd_new() - create a new ACD context + * @acdp: output argument for context + * + * Create a new ACD context and return it in @acdp. + * + * Return: 0 on success, or a negative error code on failure. + */ +_public_ int n_acd_new(NAcd **acdp) { + NAcd *acd; + int r; + + acd = calloc(1, sizeof(*acd)); + if (!acd) + return -ENOMEM; + + acd->fd_epoll = -1; + acd->fd_timer = -1; + acd->fd_socket = -1; + acd->state = N_ACD_STATE_INIT; + acd->defend = N_ACD_DEFEND_NEVER; + acd->events = (CList)C_LIST_INIT(acd->events); + acd->last_conflict = TIME_INFINITY; + + r = n_acd_get_random(&acd->seed); + if (r < 0) + return r; + + acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC); + if (acd->fd_epoll < 0) { + r = -n_acd_errno(); + goto error; + } + + acd->fd_timer = timerfd_create(CLOCK_BOOTTIME, TFD_CLOEXEC | TFD_NONBLOCK); + if (acd->fd_timer < 0) { + r = -n_acd_errno(); + goto error; + } + + r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_timer, + &(struct epoll_event){ + .events = EPOLLIN, + .data.u32 = N_ACD_EPOLL_TIMER, + }); + if (r < 0) { + r = -n_acd_errno(); + goto error; + } + + *acdp = acd; + return 0; + +error: + n_acd_free(acd); + return r; +} + +/** + * n_acd_free() - free an ACD context + * + * Frees all resources held by the context. This may be called at any time, + * but doing so invalidates all data owned by the context. + * + * Return: NULL. + */ +_public_ NAcd *n_acd_free(NAcd *acd) { + NAcdEventNode *node; + + if (!acd) + return NULL; + + n_acd_reset(acd); + + acd->current = n_acd_event_node_free(acd->current); + + while ((node = c_list_first_entry(&acd->events, NAcdEventNode, link))) + n_acd_event_node_free(node); + + assert(acd->fd_socket < 0); + + if (acd->fd_timer >= 0) { + assert(acd->fd_epoll >= 0); + epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_timer, NULL); + close(acd->fd_timer); + acd->fd_timer = -1; + } + + if (acd->fd_epoll >= 0) { + close(acd->fd_epoll); + acd->fd_epoll = -1; + } + + free(acd); + + return NULL; +} + +/** + * n_acd_get_fd() - get pollable file descriptor + * @acd: ACD context + * @fdp: output argument for file descriptor + * + * Returns a file descriptor in @fdp. This filedescriptor can be polled by + * the caller to indicate when the ACD context can be dispatched. + */ +_public_ void n_acd_get_fd(NAcd *acd, int *fdp) { + *fdp = acd->fd_epoll; +} + +static int n_acd_push_event(NAcd *acd, unsigned int event, uint16_t *operation, uint8_t (*sender)[6], uint8_t (*target)[4]) { + NAcdEventNode *node; + int r; + + r = n_acd_event_node_new(&node, event); + if (r < 0) + return r; + + switch (event) { + case N_ACD_EVENT_USED: + node->event.used.operation = be16toh(*operation); + memcpy(node->sender, sender, sizeof(node->sender)); + node->event.used.sender = node->sender; + node->event.used.n_sender = sizeof(node->sender); + memcpy(&node->event.used.target, target, sizeof(node->event.used.target)); + break; + case N_ACD_EVENT_CONFLICT: + node->event.conflict.operation = be16toh(*operation); + memcpy(node->sender, sender, sizeof(node->sender)); + node->event.used.sender = node->sender; + node->event.used.n_sender = sizeof(node->sender); + memcpy(&node->event.conflict.target, target, sizeof(node->event.conflict.target)); + break; + case N_ACD_EVENT_DEFENDED: + node->event.defended.operation = be16toh(*operation); + memcpy(node->sender, sender, sizeof(node->sender)); + node->event.used.sender = node->sender; + node->event.used.n_sender = sizeof(node->sender); + memcpy(&node->event.defended.target, target, sizeof(node->event.defended.target)); + break; + case N_ACD_EVENT_READY: + case N_ACD_EVENT_DOWN: + break; + default: + assert(0); + } + + c_list_link_tail(&acd->events, &node->link); + + return 0; +} + +static int n_acd_now(uint64_t *nowp) { + struct timespec ts; + int r; + + r = clock_gettime(CLOCK_BOOTTIME, &ts); + if (r < 0) + return -n_acd_errno(); + + *nowp = ts.tv_sec * UINT64_C(1000000) + ts.tv_nsec / UINT64_C(1000); + return 0; +} + +static int n_acd_schedule(NAcd *acd, uint64_t u_timeout, unsigned int u_jitter) { + uint64_t u_next = u_timeout; + int r; + + /* + * ACD specifies jitter values to reduce packet storms on the local + * link. This call accepts the maximum relative jitter value in + * microseconds as @u_jitter. We then use rand_r(3p) to get a + * pseudo-random jitter on top of the real timeout given as @u_timeout. + * Note that rand_r() is fine for this. Before you try to improve the + * RNG, you better spend some time securing ARP. + */ + if (u_jitter) + u_next += rand_r(&acd->seed) % u_jitter; + + /* + * Setting .it_value to 0 in timerfd_settime() disarms the timer. Avoid + * this and always schedule at least 1us. Otherwise, we'd have to + * recursively call into the time-out handler, which we really want to + * avoid. No reason to optimize performance here. + */ + if (!u_next) + u_next = 1; + + r = timerfd_settime(acd->fd_timer, 0, + &(struct itimerspec){ .it_value = { + .tv_sec = u_next / UINT64_C(1000000), + .tv_nsec = u_next % UINT64_C(1000000) * UINT64_C(1000), + } }, NULL); + if (r < 0) + return -n_acd_errno(); + + return 0; +} + +static int n_acd_send(NAcd *acd, const struct in_addr *spa) { + struct sockaddr_ll address = { + .sll_family = AF_PACKET, + .sll_protocol = htobe16(ETH_P_ARP), + .sll_ifindex = acd->config.ifindex, + .sll_halen = ETH_ALEN, + .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + }; + struct ether_arp arp = { + .ea_hdr.ar_hrd = htobe16(ARPHRD_ETHER), + .ea_hdr.ar_pro = htobe16(ETHERTYPE_IP), + .ea_hdr.ar_hln = sizeof(acd->mac), + .ea_hdr.ar_pln = sizeof(uint32_t), + .ea_hdr.ar_op = htobe16(ARPOP_REQUEST), + }; + ssize_t l; + + memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac)); + memcpy(arp.arp_tpa, &acd->config.ip.s_addr, sizeof(uint32_t)); + + if (spa) + memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr)); + + l = sendto(acd->fd_socket, &arp, sizeof(arp), MSG_NOSIGNAL, (struct sockaddr *)&address, sizeof(address)); + if (l == (ssize_t)sizeof(arp)) { + /* Packet was properly sent. */ + return 0; + } else if (l >= 0) { + /* + * Ugh. The packet was truncated. This should not happen, but + * lets just pretend the packet was dropped. + */ + return 0; + } else if (errno == EAGAIN || errno == ENOBUFS) { + /* + * In case the output buffer is full, the packet is silently + * dropped. This is just as if the physical layer happened to + * drop the packet. We are not on a reliable medium, so no + * reason to pretend we are. + */ + return 0; + } else if (errno == ENETDOWN || errno == ENXIO) { + /* + * We get ENETDOWN if the network-device goes down or is + * removed. ENXIO might happen on async send-operations if the + * network-device was unplugged and thus the kernel is no + * longer aware of it. + * In any case, we do not allow proceeding with this socket. We + * stop the engine and notify the user gracefully. + */ + return -N_ACD_E_DOWN; + } + + return -n_acd_errno(); +} + +static void n_acd_remember_conflict(NAcd *acd, uint64_t now) { + if (++acd->n_conflicts >= N_ACD_RFC_MAX_CONFLICTS) { + acd->n_conflicts = N_ACD_RFC_MAX_CONFLICTS; + acd->last_conflict = now; + } +} + +static int n_acd_handle_timeout(NAcd *acd) { + int r; + + switch (acd->state) { + case N_ACD_STATE_PROBING: + /* + * We are still PROBING. We send 3 probes with a random timeout + * scheduled between each. If, after a fixed timeout, we did + * not receive any conflict we consider the probing successful. + */ + if (acd->n_iteration >= N_ACD_RFC_PROBE_NUM) { + /* + * All 3 probes succeeded and we waited enough to + * consider this address usable by now. Do not announce + * the address, yet. We must first give the caller a + * chance to configure the address (so they can answer + * ARP requests), before announcing it. But our + * callbacks are not necessarily synchronous (we want + * to allow IPC there), so just notify the caller and + * wait for further instructions, thus effectively + * increasing the probe-wait. + */ + r = n_acd_push_event(acd, N_ACD_EVENT_READY, NULL, NULL, NULL); + if (r) + return r; + + acd->state = N_ACD_STATE_CONFIGURING; + } else { + /* + * We have not sent all 3 probes, yet. A timer fired, + * so we are ready to send the next probe. If this is + * the third probe, schedule a timer for ANNOUNCE_WAIT + * to give other peers a chance to answer. If this is + * not the third probe, wait between PROBE_MIN and + * PROBE_MAX for the next probe. + */ + + r = n_acd_send(acd, NULL); + if (r < 0) + return r; + + if (++acd->n_iteration >= N_ACD_RFC_PROBE_NUM) + r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_USEC, 0); + else + r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_PROBE_MIN_USEC, + acd->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_USEC - N_ACD_RFC_PROBE_MIN_USEC)); + if (r < 0) + return r; + } + + break; + + case N_ACD_STATE_ANNOUNCING: + /* + * We are ANNOUNCING, meaning the caller configured the address + * on the interface and is actively using it. We send 3 + * announcements out, in a short interval, and then just + * perform passive conflict detection. + * Note that once all 3 announcements are sent, we no longer + * schedule a timer, so this part should not trigger, anymore. + */ + + r = n_acd_send(acd, &acd->config.ip); + if (r < 0) + return r; + + if (++acd->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) { + r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_ANNOUNCE_INTERVAL_USEC, 0); + if (r < 0) + return r; + } + + break; + + case N_ACD_STATE_INIT: + case N_ACD_STATE_CONFIGURING: + default: + /* + * There are no timeouts in these states. If we trigger one, + * something is fishy. Let the caller deal with this. + */ + return -EIO; + } + + return 0; +} + +static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) { + bool hard_conflict; + uint64_t now; + int r; + + /* + * Via BPF we discard any non-conflict packets. There are only 2 types + * that can pass: A conflict on the Sender Protocol Address, or a + * conflict on the Target Protocol Address. + * + * The former we call a hard-conflict. It implies that the sender uses + * the address already. We must always catch this and in some way react + * to it. Any kind, REQUEST or REPLY must be caught (though it is + * unlikely that we ever catch REPLIES since they tend to be unicasts). + * + * However, in case the Target Protocol Address matches, we just know + * that somebody is looking for the address. Hence, we must also check + * that the packet is an ARP-Probe (Sender Protocol Address is 0). If + * it is, it means someone else does ACD on our address. We call this a + * soft conflict. + */ + if (!memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa)) && + !memcmp(packet->arp_tpa, &acd->config.ip.s_addr, sizeof(packet->arp_tpa)) && + packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) { + hard_conflict = false; + } else if (!memcmp(packet->arp_spa, &acd->config.ip.s_addr, sizeof(packet->arp_spa))) { + hard_conflict = true; + } else { + /* + * Ignore anything that is specific enough to match the BPF + * filter, but is none of the conflicts described above. + */ + return 0; + } + + r = n_acd_now(&now); + if (r < 0) + return r; + + switch (acd->state) { + case N_ACD_STATE_PROBING: + /* + * Regardless whether this is a hard or soft conflict, we must + * treat this as a probe failure. That is, notify the caller of + * the conflict and wait for further instructions. We do not + * react to this, until the caller tells us what to do. But we + * immediately stop the engine, since there is no point in + * continuing the probing. + */ + n_acd_remember_conflict(acd, now); + n_acd_reset(acd); + r = n_acd_push_event(acd, N_ACD_EVENT_USED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa); + if (r) + return r; + + break; + + case N_ACD_STATE_CONFIGURING: + /* + * We are waiting for the caller to configure the interface and + * start ANNOUNCING. In this state, we cannot defend the address + * as that would indicate that it is ready to be used, and we + * cannot signal CONFLICT or USED as the caller may already have + * started to use the address (and may have configured the engine + * to always defend it, which means they should be able to rely on + * never losing it after READY). Simply drop the event, and rely + * on the anticipated ANNOUNCE to trigger it again. + */ + + break; + + case N_ACD_STATE_ANNOUNCING: + /* + * We were already instructed to announce the address, which + * means the address is configured and in use. Hence, the + * caller is responsible to serve regular ARP queries. Meaning, + * we can ignore any soft conflicts (other peers doing ACD). + * + * But if we see a hard-conflict, we either defend the address + * according to the caller's instructions, or we report the + * conflict and bail out. + */ + + if (!hard_conflict) + break; + + if (acd->defend == N_ACD_DEFEND_NEVER) { + n_acd_remember_conflict(acd, now); + n_acd_reset(acd); + r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa); + if (r) + return r; + } else { + if (now > acd->last_defend + N_ACD_RFC_DEFEND_INTERVAL_USEC) { + r = n_acd_send(acd, &acd->config.ip); + if (r < 0) + return r; + + acd->last_defend = now; + r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa); + if (r) + return r; + } else if (acd->defend == N_ACD_DEFEND_ONCE) { + n_acd_remember_conflict(acd, now); + n_acd_reset(acd); + r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa); + if (r) + return r; + } else { + r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa); + if (r) + return r; + } + } + + break; + + case N_ACD_STATE_INIT: + default: + /* + * The socket should not be dispatched in those states, since + * it is neither allocated nor added to epoll. Fail hard if we + * trigger this somehow. + */ + return -EIO; + } + + return 0; +} + +static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) { + uint64_t v; + int r; + + if (event->events & (EPOLLHUP | EPOLLERR)) { + /* + * There is no way to handle either gracefully. If we ignored + * them, we would busy-loop, so lets rather forward the error + * to the caller. + */ + return -EIO; + } + + if (event->events & EPOLLIN) { + for (unsigned int i = 0; i < 128; ++i) { + r = read(acd->fd_timer, &v, sizeof(v)); + if (r == sizeof(v)) { + /* + * We successfully read a timer-value. Handle it and + * return. We do NOT fall-through to EPOLLHUP handling, + * as we always must drain buffers first. + */ + return n_acd_handle_timeout(acd); + } else if (r >= 0) { + /* + * Kernel guarantees 8-byte reads; fail hard if it + * suddenly starts doing weird shit. No clue what to do + * with those values, anyway. + */ + return -EIO; + } else if (errno == EAGAIN) { + /* + * No more pending events. + */ + return 0; + } else { + /* + * Something failed. We use CLOCK_BOOTTIME, so + * ECANCELED cannot happen. Hence, there is no error + * that we could gracefully handle. Fail hard and let + * the caller deal with it. + */ + return -n_acd_errno(); + } + } + + return N_ACD_E_PREEMPTED; + } + + return 0; +} + +static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) { + struct ether_arp packet; + ssize_t l; + + for (unsigned int i = 0; i < 128; ++i) { + /* + * Regardless whether EPOLLIN is set in @event->events, we always + * invoke recv(2). This is a safety-net for sockets, which always fetch + * queued errors on all syscalls. That means, if anything failed on the + * socket, we will be notified via recv(2). This simplifies the code + * and avoid magic EPOLLIN/ERR/HUP juggling. + * + * Note that we must use recv(2) over read(2), since the latter cannot + * deal with empty packets properly. + * + * We explicitly skip passing MSG_TRUNC here. We *WANT* + * overlong packets to be retrieved and truncated. Ethernet + * frames might not have byte-granular lengths. Real hardware + * does add trailing padding/garbage, so we must discard this + * here. + */ + l = recv(acd->fd_socket, &packet, sizeof(packet), 0); + if (l == (ssize_t)sizeof(packet)) { + /* + * We read a full ARP packet. We never fall-through to EPOLLHUP + * handling, as we always must drain buffers first. + */ + return n_acd_handle_packet(acd, &packet); + } else if (l >= 0) { + /* + * The BPF filter discards short packets, so error out + * if something slips through for any reason. Don't silently + * ignore it, since we explicitly want to know if something + * went fishy. + */ + return -EIO; + } else if (errno == ENETDOWN || errno == ENXIO) { + /* + * We get ENETDOWN if the network-device goes down or is + * removed. ENXIO might happen on async send-operations if the + * network-device was unplugged and thus the kernel is no + * longer aware of it. + * In any case, we do not allow proceeding with this socket. We + * stop the engine and notify the user gracefully. + */ + return -N_ACD_E_DOWN; + } else if (errno == EAGAIN) { + /* + * We cannot read data from the socket (we got EAGAIN). As a safety net + * check for EPOLLHUP/ERR. Those cannot be disabled with epoll, so we + * must make sure to not busy-loop by ignoring them. Note that we know + * recv(2) on sockets to return an error if either of these epoll-flags + * is set. Hence, if we did not handle it above, we have no other way + * but treating those flags as fatal errors and returning them to the + * caller. + */ + if (event->events & (EPOLLHUP | EPOLLERR)) + return -EIO; + + return 0; + } else { + /* + * Cannot dispatch the packet. This might be due to OOM, HUP, + * or something else. We cannot handle it gracefully so forward + * to the caller. + */ + return -n_acd_errno(); + } + } + + return N_ACD_E_PREEMPTED; +} + +/** + * n_acd_dispatch() - dispatch ACD context + * @acd: ACD context + * + * Return: 0 on successful dispatch of all pending events, N_ACD_E_PREEMPT in + * case there are more still more events to be dispatched, or a + * negative error code on failure. + */ +_public_ int n_acd_dispatch(NAcd *acd) { + struct epoll_event events[2]; + int n, i, r = 0; + bool preempted = false; + + n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0); + if (n < 0) { + return -n_acd_errno(); + } + + for (i = 0; i < n; ++i) { + switch (events[i].data.u32) { + case N_ACD_EPOLL_TIMER: + r = n_acd_dispatch_timer(acd, events + i); + break; + case N_ACD_EPOLL_SOCKET: + r = n_acd_dispatch_socket(acd, events + i); + break; + default: + r = 0; + break; + } + + if (r == N_ACD_E_PREEMPTED) + preempted = true; + else if (r != 0) + break; + } + + if (r == -N_ACD_E_DOWN) { + /* + * N_ACD_E_DOWN is synthesized whenever we notice + * ENETDOWN-related errors on the network interface. This + * allows bailing out of deep call-paths and then handling the + * error gracefully here. + */ + n_acd_reset(acd); + r = n_acd_push_event(acd, N_ACD_EVENT_DOWN, NULL, NULL, NULL); + if (r) + return r; + + return 0; + } + + if (preempted) + return N_ACD_E_PREEMPTED; + else + return r; +} + +/** + * n_acd_pop_event() - get the next pending event + * @acd: ACD context + * @eventp: output argument for the event + * + * Returns a pointer to the next pending event. The event is still owend by + * the context, and is only valid until the next call to n_acd_pop_event() + * or until the context is freed. + * + * The possible events are: + * * N_ACD_EVENT_READY: The configured IP address was probed successfully + * and is ready to be used. Once configured on the + * interface, the caller must call n_acd_announce() + * to announce and start defending the address. + * No further events may be received before + * n_acd_announce() has been called. + * * N_ACD_EVENT_USED: Someone is already using the IP address being + * probed. The engine was stopped, and the caller + * may restart it to try again. + * * N_ACD_EVENT_DEFENDED: A conflict was detected for the announced IP + * address, and the engine attempted to defend it. + * This is purely informational, and no action is + * required by the caller. + * * N_ACD_EVENT_CONFLICT: A conflict was detected for the announced IP + * address, and the engine was not able to defend + * it (according to the configured policy). The + * engine has stoppde, the caller must stop using + * the address immediately, and may restart the + * engine to retry. + * * N_ACD_EVENT_DOWN: A network error was detected. The engine was + * stopped and it is the responsibility of the + * caller to restart it once the network may be + * functional again. + * + * Returns: 0 on success, N_ACD_E_STOPPED if there are no more events and + * the engine has been stopped, N_ACD_E_DONE if there are no more + * events, but the engine is still running, or a negative error + * code on failure. + */ +_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) { + acd->current = n_acd_event_node_free(acd->current); + + if (c_list_is_empty(&acd->events)) { + if (acd->state == N_ACD_STATE_INIT) + return N_ACD_E_STOPPED; + else + return N_ACD_E_DONE; + } + + acd->current = c_list_first_entry(&acd->events, NAcdEventNode, link); + c_list_unlink(&acd->current->link); + + if (eventp) + *eventp = &acd->current->event; + + return 0; +} + +static int n_acd_bind_socket(NAcd *acd, int s) { + /* + * Due to strict aliasing, we cannot get uint32_t/uint16_t pointers to + * acd->config.mac, so provide a union accessor. + */ + const union { + uint8_t u8[6]; + uint16_t u16[3]; + uint32_t u32[1]; + } mac = { + .u8 = { + acd->mac[0], + acd->mac[1], + acd->mac[2], + acd->mac[3], + acd->mac[4], + acd->mac[5], + }, + }; + struct sock_filter filter[] = { + /* + * Basic ARP header validation. Make sure the packet-length, + * wire type, protocol type, and address lengths are correct. + */ + BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */ + BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(struct ether_arp), 1, 0), /* #packet >= #arp-packet ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hrd)), /* A <- header */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPHRD_ETHER, 1, 0), /* header == ethernet ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pro)), /* A <- protocol */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 1, 0), /* protocol == IP ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hln)), /* A <- hardware address length */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct ether_addr), 1, 0), /* length == sizeof(ether_addr)? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pln)), /* A <- protocol address length */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct in_addr), 1, 0), /* length == sizeof(in_addr) ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_op)), /* A <- operation */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), /* protocol == request ? */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 1, 0), /* protocol == reply ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + + /* + * Sender hardware address must be different from ours. Note + * that BPF runs in big-endian mode, but assumes immediates are + * given in native-endian. This might look weird on 6-byte mac + * addresses, but is needed to revert the BPF magic. + */ + BPF_STMT(BPF_LD + BPF_IMM, be32toh(mac.u32[0])), /* A <- 4 bytes of client's MAC */ + BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_sha)), /* A <- 4 bytes of SHA */ + BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 6), /* A == 0 ? */ + BPF_STMT(BPF_LD + BPF_IMM, be16toh(mac.u16[2])), /* A <- remainder of client's MAC */ + BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */ + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, arp_sha) + 4), /* A <- remainder of SHA */ + BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + + /* + * Sender protocol address or target protocol address must be + * equal to the one we care about. Again, immediates must be + * given in native-endian. + */ + BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */ + BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_spa)), /* A <- SPA */ + BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */ + BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */ + BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */ + BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */ + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_tpa)), /* A <- TPA */ + BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */ + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */ + BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */ + BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */ + }; + const struct sock_fprog fprog = { + .len = sizeof(filter) / sizeof(*filter), + .filter = filter, + }; + const struct sockaddr_ll address = { + .sll_family = AF_PACKET, + .sll_protocol = htobe16(ETH_P_ARP), + .sll_ifindex = acd->config.ifindex, + .sll_halen = ETH_ALEN, + .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + }; + int r; + + /* + * Install a packet filter that matches on the ARP header and + * addresses, to reduce the number of wake-ups to a minimum. + */ + r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog)); + if (r < 0) + return -n_acd_errno(); + + /* + * Bind the packet-socket to ETH_P_ARP and the specified network + * interface. + */ + r = bind(s, (struct sockaddr *)&address, sizeof(address)); + if (r < 0) + return -n_acd_errno(); + + return 0; +} + +static int n_acd_setup_socket(NAcd *acd) { + int r, s; + + s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); + if (s < 0) + return -n_acd_errno(); + + r = n_acd_bind_socket(acd, s); + if (r < 0) + goto error; + + r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, s, + &(struct epoll_event){ + .events = EPOLLIN, + .data.u32 = N_ACD_EPOLL_SOCKET, + }); + if (r < 0) { + r = -n_acd_errno(); + goto error; + } + + acd->fd_socket = s; + return 0; + +error: + close(s); + return r; +} + +/** + * n_acd_start() - start the ACD engine + * @acd: ACD context + * @config: description of interface and desired IP address + * + * Start probing the given address on the given interface. + * + * The engine must not already be running, and there must not be + * any pending events. + * + * Returns: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the configuration + * was invalid, N_ACD_E_BUSY if the engine is running or there are + * pending events, or a negative error code on failure. + */ +_public_ int n_acd_start(NAcd *acd, NAcdConfig *config) { + uint64_t now, delay; + int r; + + if (config->ifindex <= 0 || + config->transport != N_ACD_TRANSPORT_ETHERNET || + config->n_mac != ETH_ALEN || + !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN) || + !config->ip.s_addr) + return N_ACD_E_INVALID_ARGUMENT; + + if (acd->state != N_ACD_STATE_INIT || !c_list_is_empty(&acd->events)) + return N_ACD_E_BUSY; + + acd->config = *config; + memcpy(acd->mac, config->mac, config->n_mac); + acd->config.mac = acd->mac; + acd->timeout_multiplier = config->timeout_msec; + + r = n_acd_setup_socket(acd); + if (r < 0) + goto error; + + if (acd->timeout_multiplier) { + delay = 0; + acd->n_iteration = 0; + + if (acd->last_conflict != TIME_INFINITY) { + r = n_acd_now(&now); + if (r < 0) + goto error; + + if (now < acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC) + delay = acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC - now; + } + + r = n_acd_schedule(acd, delay, acd->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_USEC); + if (r < 0) + goto error; + } else { + /* + * A zero timeout means we drop the probing alltogether, and behave as if + * the last probe succeeded immediately. + */ + acd->n_iteration = N_ACD_RFC_PROBE_NUM; + + r = n_acd_schedule(acd, 0, 0); + if (r < 0) + goto error; + } + + acd->state = N_ACD_STATE_PROBING; + acd->defend = N_ACD_DEFEND_NEVER; + acd->last_defend = 0; + return 0; + +error: + n_acd_reset(acd); + return r; +} + +/** + * n_acd_stop() - stop the ACD engine + * @acd: ACD context + * + * Stop the engine. No new events may be triggered, but pending events are not + * flushed. Before calling n_acd_start() again all pending events must be popped. + * + * Return: 0 on success, negative error code on failure. + */ +_public_ int n_acd_stop(NAcd *acd) { + n_acd_reset(acd); + return 0; +} + +/** + * n_acd_announce() - announce the configured IP address + * @acd: ACD context + * @defend: defence policy + * + * Announce the IP address on the local link, and start defending it according + * to the given policy, which mut be one of N_ACD_DEFEND_ONCE, + * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS. + * + * This must be called after the engine in response to an N_ACD_EVENT_READY + * event, and only after the given address has been configured on the given + * interface. + * + * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defence policy + * is invalid, N_ACD_E_BUSY if this is not in response to a + * N_ACD_EVENT_READY event, or a negative error code on failure. + */ +_public_ int n_acd_announce(NAcd *acd, unsigned int defend) { + uint64_t now; + int r; + + if (defend >= _N_ACD_DEFEND_N) + return N_ACD_E_INVALID_ARGUMENT; + if (acd->state != N_ACD_STATE_CONFIGURING) + return N_ACD_E_BUSY; + + /* + * Sending announcements means we finished probing and use the address + * now. We therefore reset the conflict counter in case we adhered to + * the rate-limit. Since probing is properly delayed, a well-behaving + * client will always reset the conflict counter here. However, if you + * force-use an address regardless of conflicts, then this will not + * trigger and the conflict counter stays untouched. + */ + if (acd->last_conflict != TIME_INFINITY) { + r = n_acd_now(&now); + if (r < 0) + return r; + + if (now >= acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC) + acd->n_conflicts = 0; + } + + /* + * Instead of sending the first announcement here, we schedule an idle + * timer. This avoids possibly recursing into the user callback. We + * should never trigger callbacks from arbitrary stacks, but always + * restrict them to the dispatcher. + */ + r = n_acd_schedule(acd, 0, 0); + if (r < 0) + return r; + + acd->state = N_ACD_STATE_ANNOUNCING; + acd->defend = defend; + acd->n_iteration = 0; + return 0; +} diff --git a/src/n-acd.h b/src/n-acd.h new file mode 100644 index 0000000000..eb12a53eec --- /dev/null +++ b/src/n-acd.h @@ -0,0 +1,94 @@ +#pragma once + +/* + * IPv4 Address Conflict Detection + * + * This is the public header of the n-acd library, implementing IPv4 Address + * Conflict Detection as described in RFC-5227. This header defines the public + * API and all entry points of n-acd. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +enum { + _N_ACD_E_SUCCESS, + + N_ACD_E_DONE, + N_ACD_E_STOPPED, + N_ACD_E_PREEMPTED, + + N_ACD_E_INVALID_ARGUMENT, + N_ACD_E_BUSY, +}; + +typedef struct NAcd NAcd; + +typedef struct NAcdConfig { + int ifindex; + unsigned int transport; + const uint8_t *mac; + size_t n_mac; + struct in_addr ip; + uint64_t timeout_msec; +} NAcdConfig; + +typedef struct NAcdEvent { + unsigned int event; + union { + struct { + } ready, down; + struct { + uint16_t operation; + uint8_t *sender; + size_t n_sender; + struct in_addr target; + } used, defended, conflict; + }; +} NAcdEvent; + +enum { + N_ACD_TRANSPORT_ETHERNET, + _N_ACD_TRANSPORT_N, +}; + +enum { + N_ACD_EVENT_READY, + N_ACD_EVENT_USED, + N_ACD_EVENT_DEFENDED, + N_ACD_EVENT_CONFLICT, + N_ACD_EVENT_DOWN, + _N_ACD_EVENT_N, +}; + +enum { + N_ACD_DEFEND_NEVER, + N_ACD_DEFEND_ONCE, + N_ACD_DEFEND_ALWAYS, + _N_ACD_DEFEND_N, +}; + +int n_acd_new(NAcd **acdp); +NAcd *n_acd_free(NAcd *acd); + +void n_acd_get_fd(NAcd *acd, int *fdp); + +int n_acd_dispatch(NAcd *acd); +int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp); +int n_acd_announce(NAcd *acd, unsigned int defend); + +int n_acd_start(NAcd *acd, NAcdConfig *config); +int n_acd_stop(NAcd *acd); + +static inline void n_acd_freep(NAcd **acd) { + if (*acd) + n_acd_free(*acd); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/test-api.c b/src/test-api.c new file mode 100644 index 0000000000..697181abaa --- /dev/null +++ b/src/test-api.c @@ -0,0 +1,84 @@ +/* + * Tests for n-acd API + * This verifies the visibility and availability of the public API of the + * n-acd library. + */ + +#include +#include "test.h" + +static void test_api_constants(void) { + assert(N_ACD_DEFEND_NEVER != _N_ACD_DEFEND_N); + assert(N_ACD_DEFEND_ONCE != _N_ACD_DEFEND_N); + assert(N_ACD_DEFEND_ALWAYS != _N_ACD_DEFEND_N); + + assert(N_ACD_EVENT_READY != _N_ACD_EVENT_N); + assert(N_ACD_EVENT_USED != _N_ACD_EVENT_N); + assert(N_ACD_EVENT_DEFENDED != _N_ACD_EVENT_N); + assert(N_ACD_EVENT_CONFLICT != _N_ACD_EVENT_N); + assert(N_ACD_EVENT_DOWN != _N_ACD_EVENT_N); +} + +static void test_api_management(void) { + NAcd *acd = NULL; + int r; + + /* new/free/freep */ + + n_acd_freep(&acd); + + r = n_acd_new(&acd); + assert(!r); + + n_acd_free(acd); +} + +static void test_api_runtime(void) { + NAcdConfig config = { + .ifindex = 1, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = (uint8_t[]){ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54 }, + .n_mac = ETH_ALEN, + .ip = { htobe32((127 << 24) | (1 << 0)) }, + .timeout_msec = 100, + }; + NAcd *acd; + int r; + + /* get_fd/dispatch/pop_event/start/stop/announce */ + + r = n_acd_new(&acd); + assert(!r); + + n_acd_get_fd(acd, &r); + assert(r >= 0); + r = n_acd_dispatch(acd); + assert(!r); + r = n_acd_pop_event(acd, NULL); + assert(r == N_ACD_E_STOPPED); + r = n_acd_start(acd, &config); + assert(!r); + r = n_acd_start(acd, &config); + assert(r == N_ACD_E_BUSY); + r = n_acd_pop_event(acd, NULL); + assert(r == N_ACD_E_DONE); + r = n_acd_stop(acd); + assert(!r); + r = n_acd_announce(acd, N_ACD_DEFEND_NEVER); + assert(r == N_ACD_E_BUSY); + + n_acd_free(acd); +} + +int main(int argc, char **argv) { + int r; + + r = test_setup(); + if (r) + return r; + + test_api_constants(); + test_api_management(); + test_api_runtime(); + return 0; +} diff --git a/src/test-basic.c b/src/test-basic.c new file mode 100644 index 0000000000..fa85cb0549 --- /dev/null +++ b/src/test-basic.c @@ -0,0 +1,13 @@ +/* + * Basic Tests + */ + +#include +#include +#include +#include +#include "n-acd.h" + +int main(int argc, char **argv) { + return 0; +} diff --git a/src/test-loopback.c b/src/test-loopback.c new file mode 100644 index 0000000000..98195c93a5 --- /dev/null +++ b/src/test-loopback.c @@ -0,0 +1,66 @@ +/* + * Test on loopback device + * This runs the ACD engine on the loopback device, effectively testing the BPF + * filter of ACD to discard its own packets. This might happen on + * non-spanning-tree networks, or on networks that echo packets. + */ + +#include +#include "test.h" + +static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) { + NAcdConfig config = { + .ifindex = ifindex, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = mac, + .n_mac = n_mac, + .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, + .timeout_msec = 100, + }; + struct pollfd pfds; + NAcd *acd; + int r, fd; + + r = n_acd_new(&acd); + assert(!r); + + n_acd_get_fd(acd, &fd); + r = n_acd_start(acd, &config); + assert(!r); + + for (;;) { + NAcdEvent *event; + pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; + r = poll(&pfds, 1, -1); + assert(r >= 0); + + r = n_acd_dispatch(acd); + assert(!r); + + r = n_acd_pop_event(acd, &event); + if (!r) { + assert(event->event == N_ACD_EVENT_READY); + break; + } else { + assert(r == N_ACD_E_DONE); + } + } + + n_acd_free(acd); +} + +int main(int argc, char **argv) { + struct ether_addr mac; + int r, ifindex; + + r = test_setup(); + if (r) + return r; + + r = system("ip link set lo up"); + assert(r == 0); + test_if_query("lo", &ifindex, &mac); + test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet)); + + return 0; +} diff --git a/src/test-twice.c b/src/test-twice.c new file mode 100644 index 0000000000..157e8a2b96 --- /dev/null +++ b/src/test-twice.c @@ -0,0 +1,97 @@ +/* + * Test with unused address twice in parallel + * This runs the ACD engine with an unused address on a veth pair, but it runs + * it on both ends. We expect the PROBE to fail on at least one of the devices. + */ + +#include +#include "test.h" + +static void test_unused(int ifindex1, uint8_t *mac1, size_t n_mac1, int ifindex2, uint8_t *mac2, size_t n_mac2) { + NAcdConfig config1 = { + .ifindex = ifindex1, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = mac1, + .n_mac = n_mac1, + .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, + .timeout_msec = 100, + }; + NAcdConfig config2 = { + .ifindex = ifindex2, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = mac2, + .n_mac = n_mac2, + .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, + .timeout_msec = 100, + }; + struct pollfd pfds[2]; + NAcd *acd1, *acd2; + int r, fd1, fd2, state1, state2; + + r = n_acd_new(&acd1); + assert(!r); + r = n_acd_new(&acd2); + assert(!r); + + n_acd_get_fd(acd1, &fd1); + n_acd_get_fd(acd2, &fd2); + + r = n_acd_start(acd1, &config1); + assert(!r); + r = n_acd_start(acd2, &config2); + assert(!r); + + for (state1 = state2 = -1; state1 == -1 || state2 == -1; ) { + NAcdEvent *event; + pfds[0] = (struct pollfd){ .fd = fd1, .events = (state1 == -1) ? POLLIN : 0 }; + pfds[1] = (struct pollfd){ .fd = fd2, .events = (state2 == -1) ? POLLIN : 0 }; + + r = poll(pfds, sizeof(pfds) / sizeof(*pfds), -1); + assert(r >= 0); + + if (state1 == -1) { + r = n_acd_dispatch(acd1); + assert(!r); + + r = n_acd_pop_event(acd1, &event); + if (!r) { + assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED); + state1 = !!(event->event == N_ACD_EVENT_READY); + } else { + assert(r == N_ACD_E_DONE); + } + } + + if (state2 == -1) { + r = n_acd_dispatch(acd2); + assert(!r); + + r = n_acd_pop_event(acd2, &event); + if (!r) { + assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED); + state2 = !!(event->event == N_ACD_EVENT_READY); + } else { + assert(r == N_ACD_E_DONE); + } + } + } + + n_acd_free(acd1); + n_acd_free(acd2); + + assert(!state1 || !state2); +} + +int main(int argc, char **argv) { + struct ether_addr mac1, mac2; + int r, ifindex1, ifindex2; + + r = test_setup(); + if (r) + return r; + + test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2); + test_unused(ifindex1, mac1.ether_addr_octet, sizeof(mac2.ether_addr_octet), ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet)); + + return 0; +} diff --git a/src/test-unplug.c b/src/test-unplug.c new file mode 100644 index 0000000000..dd457417ab --- /dev/null +++ b/src/test-unplug.c @@ -0,0 +1,84 @@ +/* + * Unplug device during test run + * Run the ACD engine with an address that is not used by anyone else on the + * link, but DOWN or UNPLUG the device while running. + */ + +#include +#include "test.h" + +static void test_unplug_down(int ifindex, uint8_t *mac, size_t n_mac, unsigned int run) { + NAcdConfig config = { + .ifindex = ifindex, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = mac, + .n_mac = n_mac, + .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, + .timeout_msec = 100, + }; + struct pollfd pfds; + NAcd *acd; + int r, fd; + + if (!run--) + test_veth_cmd(ifindex, "down"); + + r = n_acd_new(&acd); + assert(!r); + + if (!run--) + test_veth_cmd(ifindex, "down"); + + n_acd_get_fd(acd, &fd); + r = n_acd_start(acd, &config); + assert(!r); + + if (!run--) + test_veth_cmd(ifindex, "down"); + + for (;;) { + NAcdEvent *event; + pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; + r = poll(&pfds, 1, -1); + assert(r >= 0); + + if (!run--) + test_veth_cmd(ifindex, "down"); + + r = n_acd_dispatch(acd); + assert(!r); + + r = n_acd_pop_event(acd, &event); + if (!r) { + if (event->event == N_ACD_EVENT_DOWN) { + break; + } else { + assert(event->event == N_ACD_EVENT_READY); + test_veth_cmd(ifindex, "down"); + } + } else { + assert(r == N_ACD_E_DONE); + } + } + + n_acd_free(acd); +} + +int main(int argc, char **argv) { + struct ether_addr mac; + unsigned int i; + int r, ifindex; + + r = test_setup(); + if (r) + return r; + + test_veth_new(&ifindex, &mac, NULL, NULL); + + for (i = 0; i < 5; ++i) { + test_unplug_down(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet), i); + test_veth_cmd(ifindex, "up"); + } + + return 0; +} diff --git a/src/test-unused.c b/src/test-unused.c new file mode 100644 index 0000000000..af1da59c63 --- /dev/null +++ b/src/test-unused.c @@ -0,0 +1,63 @@ +/* + * Test with unused address + * Run the ACD engine with an address that is not used by anyone else on the + * link. This should just pass through, with a short, random timeout. + */ + +#include +#include "test.h" + +static void test_unused(int ifindex, const uint8_t *mac, size_t n_mac) { + NAcdConfig config = { + .ifindex = ifindex, + .transport = N_ACD_TRANSPORT_ETHERNET, + .mac = mac, + .n_mac = n_mac, + .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) }, + .timeout_msec = 100, + }; + struct pollfd pfds; + NAcd *acd; + int r, fd; + + r = n_acd_new(&acd); + assert(!r); + + n_acd_get_fd(acd, &fd); + r = n_acd_start(acd, &config); + assert(!r); + + for (;;) { + NAcdEvent *event; + pfds = (struct pollfd){ .fd = fd, .events = POLLIN }; + r = poll(&pfds, 1, -1); + assert(r >= 0); + + r = n_acd_dispatch(acd); + assert(!r); + + r = n_acd_pop_event(acd, &event); + if (!r) { + assert(event->event == N_ACD_EVENT_READY); + break; + } else { + assert(r == N_ACD_E_DONE); + } + } + + n_acd_free(acd); +} + +int main(int argc, char **argv) { + struct ether_addr mac; + int r, ifindex; + + r = test_setup(); + if (r) + return r; + + test_veth_new(&ifindex, &mac, NULL, NULL); + test_unused(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet)); + + return 0; +} diff --git a/src/test.h b/src/test.h new file mode 100644 index 0000000000..92315858ba --- /dev/null +++ b/src/test.h @@ -0,0 +1,97 @@ +#pragma once + +/* + * Test Helpers + * Bunch of helpers to setup the environment for networking tests. This + * includes net-namespace setups, veth setups, and more. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "n-acd.h" + +static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) { + struct ifreq ifr = {}; + size_t l; + int r, s; + + l = strlen(name); + assert(l <= IF_NAMESIZE); + + if (indexp) { + *indexp = if_nametoindex(name); + assert(*indexp > 0); + } + + if (macp) { + s = socket(AF_INET, SOCK_DGRAM, 0); + assert(s >= 0); + + strncpy(ifr.ifr_name, name, l); + r = ioctl(s, SIOCGIFHWADDR, &ifr); + assert(r >= 0); + + memcpy(macp->ether_addr_octet, ifr.ifr_hwaddr.sa_data, ETH_ALEN); + + close(s); + } +} + +static inline void test_veth_cmd(int ifindex, const char *cmd) { + char *p, name[IF_NAMESIZE + 1] = {}; + int r; + + p = if_indextoname(ifindex, name); + assert(p); + + r = asprintf(&p, "ip link set %s %s", name, cmd); + assert(r >= 0); + + /* Again: Ewwww... */ + r = system(p); + assert(r == 0); + + free(p); +} + +static inline void test_veth_new(int *parent_indexp, + struct ether_addr *parent_macp, + int *child_indexp, + struct ether_addr *child_macp) { + int r; + + /* Eww... but it works. */ + r = system("ip link add type veth"); + assert(r == 0); + r = system("ip link set veth0 up"); + assert(r == 0); + r = system("ip link set veth1 up"); + assert(r == 0); + + test_if_query("veth0", parent_indexp, parent_macp); + test_if_query("veth1", child_indexp, child_macp); +} + +static inline int test_setup(void) { + int r; + + r = unshare(CLONE_NEWNET); + if (r < 0) { + assert(errno == EPERM); + return 77; + } + + return 0; +} diff --git a/subprojects/c-list b/subprojects/c-list new file mode 160000 index 0000000000..72c59181d6 --- /dev/null +++ b/subprojects/c-list @@ -0,0 +1 @@ +Subproject commit 72c59181d677a3f50b201d51f190b1bff02d4279 diff --git a/subprojects/c-siphash b/subprojects/c-siphash new file mode 160000 index 0000000000..e01ab640dc --- /dev/null +++ b/subprojects/c-siphash @@ -0,0 +1 @@ +Subproject commit e01ab640dcf72dfa6928c94a261bf78cd943d9c3