Squashed 'src/n-acd/' content from commit a600afc87087

git-subtree-dir: src/n-acd
git-subtree-split: a600afc870872bbdfc8081ca68d5665334cb9e6e
This commit is contained in:
Thomas Haller 2021-10-01 15:27:52 +02:00
commit 9412d8fa46
31 changed files with 4348 additions and 0 deletions

11
.editorconfig Normal file
View file

@ -0,0 +1,11 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
[*.{c,h}]
indent_style = space
indent_size = 8

122
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,122 @@
name: Continuous Integration
on:
push:
pull_request:
schedule:
- cron: '0 0 * * *'
jobs:
ci:
name: CI with Default Configuration
runs-on: ubuntu-latest
steps:
#
# Prepare CI
#
# We cannot use the github-action of the `ci-c-util` project, because we
# need privileges in the container. Therefore, fetch the CI sources and
# build the container manually.
#
- name: Fetch CI
uses: actions/checkout@v2
with:
repository: c-util/automation
ref: v1
path: automation
- name: Build CI
working-directory: automation/src/ci-c-util
run: docker build --tag ci-c-util:v1 .
#
# Run CI
#
# Take the CI image we built and run the CI with the default project
# configuration. We do not use valgrind, since it falls-over with bpf(2)
# syscalls.
#
- name: Fetch Sources
uses: actions/checkout@v2
with:
path: source
- name: Run through C-Util CI
run: |
docker run \
--privileged \
-v "$(pwd)/source:/github/workspace" \
"ci-c-util:v1" \
"--m32=1" \
"--source=/github/workspace"
ci-no-ebpf:
name: CI without eBPF
runs-on: ubuntu-latest
steps:
# See above in 'ci' job.
- name: Fetch CI
uses: actions/checkout@v2
with:
repository: c-util/automation
ref: v1
path: automation
- name: Build CI
working-directory: automation/src/ci-c-util
run: docker build --tag ci-c-util:v1 .
#
# Run CI
#
# This again runs the CI, but this time disables eBPF. We do support the
# legacy BPF fallback, so lets make sure we test for it.
#
- name: Fetch Sources
uses: actions/checkout@v2
with:
path: source
- name: Run through C-Util CI
run: |
docker run \
--privileged \
-v "$(pwd)/source:/github/workspace" \
"ci-c-util:v1" \
"--m32=1" \
"--mesonargs=-Debpf=false" \
"--source=/github/workspace"
ci-valgrind:
name: CI through Valgrind
runs-on: ubuntu-latest
steps:
# See above in 'ci' job.
- name: Fetch CI
uses: actions/checkout@v2
with:
repository: c-util/automation
ref: v1
path: automation
- name: Build CI
working-directory: automation/src/ci-c-util
run: docker build --tag ci-c-util:v1 .
#
# Run CI
#
# This again runs the CI, but this time through valgrind. Since some
# syscalls are not implemented on x86-64 32bit compat (e.g., bpf(2)), we
# disable the m32 mode.
#
- name: Fetch Sources
uses: actions/checkout@v2
with:
path: source
- name: Run through C-Util CI
run: |
docker run \
--privileged \
-v "$(pwd)/source:/github/workspace" \
"ci-c-util:v1" \
"--source=/github/workspace" \
"--valgrind=1"

12
.gitmodules vendored Normal file
View file

@ -0,0 +1,12 @@
[submodule "subprojects/c-list"]
path = subprojects/c-list
url = https://github.com/c-util/c-list.git
[submodule "subprojects/c-siphash"]
path = subprojects/c-siphash
url = https://github.com/c-util/c-siphash.git
[submodule "subprojects/c-rbtree"]
path = subprojects/c-rbtree
url = https://github.com/c-util/c-rbtree.git
[submodule "subprojects/c-stdaux"]
path = subprojects/c-stdaux
url = https://github.com/c-util/c-stdaux.git

39
AUTHORS Normal file
View file

@ -0,0 +1,39 @@
LICENSE:
This project is dual-licensed under both the Apache License, Version
2.0, and the GNU Lesser General Public License, Version 2.1+.
AUTHORS-ASL:
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
AUTHORS-LGPL:
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; If not, see <http://www.gnu.org/licenses/>.
COPYRIGHT: (ordered alphabetically)
Copyright (C) 2015-2019 Red Hat, Inc.
AUTHORS: (ordered alphabetically)
Beniamino Galvani <bgalvani@redhat.com>
David Rheinsberg <david.rheinsberg@gmail.com>
Thomas Haller <thaller@redhat.com>
Tom Gundersen <teg@jklm.no>

46
NEWS.md Normal file
View file

@ -0,0 +1,46 @@
# n-acd - IPv4 Address Conflict Detection
## CHANGES WITH 2:
* All public destructors now include a variant that returns `void`.
This was requested for easier integration with `glib` and friends.
Similar to the `cleanup` variants, these variants are denoted by a
single-character function-name suffix. E.g., `n_acd_freev()`
* A fallback to `CLOCK_MONOTONIC` is now provided in case
`CLOCK_BOOTTIME` is not supported by the kernel. Note that this is in
no way signalled through the API, so if timers should follow the
`BOOTTIME` rather than monotonic clock, a kernel with this clock is
required.
* The `c-sundry` dependency is no longer needed.
* The `transport` configuration property is now mandatory for
`n_acd_new()`. It defaulted to `ETHERNET` before, by mistake.
* In-source documentation for the public API is now provided.
Contributions from: Beniamino Galvani, David Herrmann, David
Rheinsberg, Thomas Haller, Tom Gundersen
- Tübingen, 2019-03-20
## CHANGES WITH 1:
* Initial release of n-acd. This project implements the IPv4 Address
Conflict Detection standard as defined in RFC-5227. The state machine
is implemented in a shared library and provides a stable ISO-C11 API.
The implementation is linux-only and relies heavily on the API
behavior of recent linux kernel releases.
* Compared to the pre-releases, this release supports many parallel
probes on a single n-acd context. This reduces the number of
allocated network resources to O(1), based on the number of running
parallel probes.
* The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+
Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller,
Tom Gundersen
- Tübingen, 2018-08-08

60
README.md Normal file
View file

@ -0,0 +1,60 @@
n-acd
=====
IPv4 Address Conflict Detection
The n-acd project implements the IPv4 Address Conflict Detection standard as
defined in RFC-5227. The state machine is implemented in a shared library and
provides a stable ISO-C11 API. The implementation is linux-only and relies
heavily on the API behavior of recent linux kernel releases.
### Project
* **Website**: <https://nettools.github.io/n-acd>
* **Bug Tracker**: <https://github.com/nettools/n-acd/issues>
* **Mailing-List**: <https://groups.google.com/forum/#!forum/nettools-devel>
### Requirements
The requirements for this project are:
* `Linux kernel >= 3.19`
* `libc` (e.g., `glibc >= 2.16`)
At build-time, the following software is required:
* `meson >= 0.41`
* `pkg-config >= 0.29`
### Build
The meson build-system is used for this project. Contact upstream
documentation for detailed help. In most situations the following
commands are sufficient to build and install from source:
```sh
mkdir build
cd build
meson setup ..
ninja
meson test
ninja install
```
The following configuration options are available:
* `ebpf`: This boolean controls whether `ebpf` features are used to improve
the package filtering performance. If disabled, classic bpf will be
used. This feature requires a rather recent kernel (>=3.19).
Default is: true
### Repository:
- **web**: <https://github.com/nettools/n-acd>
- **https**: `https://github.com/nettools/n-acd.git`
- **ssh**: `git@github.com:nettools/n-acd.git`
### License:
- **Apache-2.0** OR **LGPL-2.1-or-later**
- See AUTHORS file for details.

27
meson.build Normal file
View file

@ -0,0 +1,27 @@
project(
'n-acd',
'c',
version: '2',
license: 'Apache',
default_options: [
'c_std=c11',
],
)
project_description = 'IPv4 Address Conflict Detection'
add_project_arguments('-D_GNU_SOURCE', language: 'c')
mod_pkgconfig = import('pkgconfig')
sub_clist = subproject('c-list')
sub_crbtree = subproject('c-rbtree')
sub_csiphash = subproject('c-siphash')
sub_cstdaux = subproject('c-stdaux')
dep_clist = sub_clist.get_variable('libclist_dep')
dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep')
dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep')
dep_cstdaux = sub_cstdaux.get_variable('libcstdaux_dep')
use_ebpf = get_option('ebpf')
subdir('src')

1
meson_options.txt Normal file
View file

@ -0,0 +1 @@
option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering')

28
src/libnacd.sym Normal file
View file

@ -0,0 +1,28 @@
LIBNACD_2 {
global:
n_acd_config_new;
n_acd_config_free;
n_acd_config_set_ifindex;
n_acd_config_set_transport;
n_acd_config_set_mac;
n_acd_probe_config_new;
n_acd_probe_config_free;
n_acd_probe_config_set_ip;
n_acd_probe_config_set_timeout;
n_acd_new;
n_acd_ref;
n_acd_unref;
n_acd_get_fd;
n_acd_dispatch;
n_acd_pop_event;
n_acd_probe;
n_acd_probe_free;
n_acd_probe_set_userdata;
n_acd_probe_get_userdata;
n_acd_probe_announce;
local:
*;
};

95
src/meson.build Normal file
View file

@ -0,0 +1,95 @@
#
# target: libnacd.so
#
libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym')
libnacd_deps = [
dep_clist,
dep_crbtree,
dep_csiphash,
dep_cstdaux,
]
libnacd_sources = [
'n-acd.c',
'n-acd-probe.c',
'util/timer.c',
]
if use_ebpf
libnacd_sources += [
'n-acd-bpf.c',
]
else
libnacd_sources += [
'n-acd-bpf-fallback.c',
]
endif
libnacd_private = static_library(
'nacd-private',
libnacd_sources,
c_args: [
'-fvisibility=hidden',
'-fno-common'
],
dependencies: libnacd_deps,
pic: true,
)
libnacd_shared = shared_library(
'nacd',
objects: libnacd_private.extract_all_objects(),
dependencies: libnacd_deps,
install: not meson.is_subproject(),
soversion: 0,
link_depends: libnacd_symfile,
link_args: [
'-Wl,--no-undefined',
'-Wl,--version-script=@0@'.format(libnacd_symfile)
],
)
libnacd_dep = declare_dependency(
include_directories: include_directories('.'),
link_with: libnacd_private,
dependencies: libnacd_deps,
version: meson.project_version(),
)
if not meson.is_subproject()
install_headers('n-acd.h')
mod_pkgconfig.generate(
libraries: libnacd_shared,
version: meson.project_version(),
name: 'libnacd',
filebase: 'libnacd',
description: project_description,
)
endif
#
# target: test-*
#
test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared)
test('API Symbol Visibility', test_api)
if use_ebpf
test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep)
test('eBPF socket filtering', test_bpf)
endif
test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep)
test('Echo Suppression via Loopback', test_loopback)
test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep)
test('Timer helper', test_timer)
#test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep)
#test('Async Interface Hotplug', test_unplug)
test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep)
test('Parallel ACD instances', test_veth)

30
src/n-acd-bpf-fallback.c Normal file
View file

@ -0,0 +1,30 @@
/*
* A noop implementation of eBPF filter for IPv4 Address Conflict Detection
*
* These are a collection of dummy functions that have no effect, but allows
* n-acd to compile without eBPF support.
*
* See n-acd-bpf.c for documentation.
*/
#include <c-stdaux.h>
#include <stddef.h>
#include "n-acd-private.h"
int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
*mapfdp = -1;
return 0;
}
int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
return 0;
}
int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
return 0;
}
int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
*progfdp = -1;
return 0;
}

317
src/n-acd-bpf.c Normal file
View file

@ -0,0 +1,317 @@
/*
* eBPF filter for IPv4 Address Conflict Detection
*
* An eBPF map and an eBPF program are provided. The map contains all the
* addresses address conflict detection is performed on, and the program
* filters out all packets except exactly the packets relevant to the ACD
* protocol on the addresses currently in the map.
*
* Note that userspace still has to filter the incoming packets, as filter
* are applied when packets are queued on the socket, not when userspace
* receives them. It is therefore possible to receive packets about addresses
* that have already been removed.
*/
#include <c-stdaux.h>
#include <errno.h>
#include <inttypes.h>
#include <linux/bpf.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "n-acd-private.h"
#define BPF_LD_ABS(SIZE, IMM) \
((struct bpf_insn) { \
.code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = IMM, \
})
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0, \
})
#define BPF_LD_MAP_FD(DST, MAP_FD) \
((struct bpf_insn) { \
.code = BPF_LD | BPF_DW | BPF_IMM, \
.dst_reg = DST, \
.src_reg = BPF_PSEUDO_MAP_FD, \
.off = 0, \
.imm = (__u32) (MAP_FD), \
}), \
((struct bpf_insn) { \
.code = 0, /* zero is reserved opcode */ \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = ((__u64) (MAP_FD)) >> 32, \
})
#define BPF_ALU_REG(OP, DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0, \
})
#define BPF_ALU_IMM(OP, DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM, \
})
#define BPF_MOV_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0, \
})
#define BPF_MOV_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM, \
})
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0, \
})
#define BPF_JMP_REG(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0, \
})
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM, \
})
#define BPF_EMIT_CALL(FUNC) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_CALL, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = FUNC, \
})
#define BPF_EXIT_INSN() \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_EXIT, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = 0, \
})
static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
return (int)syscall(__NR_bpf, cmd, attr, size);
}
int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
union bpf_attr attr;
int mapfd;
memset(&attr, 0, sizeof(attr));
attr = (union bpf_attr){
.map_type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(uint32_t),
.value_size = sizeof(uint8_t), /* values are never used, but must be set */
.max_entries = max_entries,
};
mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
if (mapfd < 0)
return -errno;
*mapfdp = mapfd;
return 0;
}
int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
union bpf_attr attr;
uint32_t addr = be32toh(addrp->s_addr);
uint8_t _dummy = 0;
int r;
memset(&attr, 0, sizeof(attr));
attr = (union bpf_attr){
.map_fd = mapfd,
.key = (uint64_t)(unsigned long)&addr,
.value = (uint64_t)(unsigned long)&_dummy,
.flags = BPF_NOEXIST,
};
r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
if (r < 0)
return -errno;
return 0;
}
int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
uint32_t addr = be32toh(addrp->s_addr);
union bpf_attr attr;
int r;
memset(&attr, 0, sizeof(attr));
attr = (union bpf_attr){
.map_fd = mapfd,
.key = (uint64_t)(unsigned long)&addr,
};
r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
if (r < 0)
return -errno;
return 0;
}
int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
const union {
uint8_t u8[6];
uint16_t u16[3];
uint32_t u32[1];
} mac = {
.u8 = {
macp->ether_addr_octet[0],
macp->ether_addr_octet[1],
macp->ether_addr_octet[2],
macp->ether_addr_octet[3],
macp->ether_addr_octet[4],
macp->ether_addr_octet[5],
},
};
struct bpf_insn prog[] = {
/* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
BPF_MOV_REG(6, 1), /* r6 = r1 */
/* drop the packet if it is too short */
BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)), /* r0 = skb->len */
BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2), /* if (r0 >= sizeof(ether_arp)) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
/* drop the packet if the header is not as expected */
BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)), /* r0 = header type */
BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2), /* if (r0 == ethernet) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)), /* r0 = protocol */
BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2), /* if (r0 == IP) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)), /* r0 = hw addr length */
BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2), /* if (r0 == sizeof(ether_addr)) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)), /* r0 = protocol addr length */
BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2), /* if (r0 == sizeof(in_addr)) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
/* drop packets from our own mac address */
BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)), /* r0 = first four bytes of packet mac address */
BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4), /* if (r0 != first four bytes of our mac address) skip 4 */
BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4), /* r0 = last two bytes of packet mac address */
BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2), /* if (r0 != last two bytes of our mac address) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
/*
* We listen for two kinds of packets:
* Conflicts)
* These are requests or replies with the sender address not set to INADDR_ANY. The
* conflicted address is the sender address, remember this in r7.
* Probes)
* These are requests with the sender address set to INADDR_ANY. The probed address
* is the target address, remember this in r7.
* Any other packets are dropped.
*/
BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)), /* r0 = sender ip address */
BPF_JMP_IMM(BPF_JEQ, 0, 0, 7), /* if (r0 == 0) skip 7 */
BPF_MOV_REG(7, 0), /* r7 = r0 */
BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3), /* if (r0 == request) skip 3 */
BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2), /* if (r0 == reply) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
BPF_JMP_IMM(BPF_JA, 0, 0, 6), /* skip 6 */
BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)), /* r0 = target ip address */
BPF_MOV_REG(7, 0), /* r7 = r0 */
BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2), /* if (r0 == request) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
/* check if the probe or conflict is for an address we are monitoring */
BPF_STX_MEM(BPF_W, 10, 7, -4), /* *(uint32_t*)fp - 4 = r7 */
BPF_MOV_REG(2, 10), /* r2 = fp */
BPF_ALU_IMM(BPF_ADD, 2, -4), /* r2 -= 4 */
BPF_LD_MAP_FD(1, mapfd), /* r1 = mapfd */
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup_elem(r1, r2) */
BPF_JMP_IMM(BPF_JNE, 0, 0, 2), /* if (r0 != NULL) skip 2 */
BPF_MOV_IMM(0, 0), /* r0 = 0 */
BPF_EXIT_INSN(), /* return */
/* return exactly the packet length*/
BPF_MOV_IMM(0, sizeof(struct ether_arp)), /* r0 = sizeof(struct ether_arp) */
BPF_EXIT_INSN(), /* return */
};
union bpf_attr attr;
int progfd;
memset(&attr, 0, sizeof(attr));
attr = (union bpf_attr){
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
.insns = (uint64_t)(unsigned long)prog,
.insn_cnt = sizeof(prog) / sizeof(*prog),
.license = (uint64_t)(unsigned long)"ASL",
};
progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (progfd < 0)
return -errno;
*progfdp = progfd;
return 0;
}

154
src/n-acd-private.h Normal file
View file

@ -0,0 +1,154 @@
#pragma once
#include <c-list.h>
#include <c-rbtree.h>
#include <c-stdaux.h>
#include <errno.h>
#include <inttypes.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <stdlib.h>
#include "util/timer.h"
#include "n-acd.h"
typedef struct NAcdEventNode NAcdEventNode;
/* This augments the error-codes with internal ones that are never exposed. */
enum {
_N_ACD_INTERNAL = _N_ACD_E_N,
N_ACD_E_DROPPED,
};
enum {
N_ACD_PROBE_STATE_PROBING,
N_ACD_PROBE_STATE_CONFIGURING,
N_ACD_PROBE_STATE_ANNOUNCING,
N_ACD_PROBE_STATE_FAILED,
};
struct NAcdConfig {
int ifindex;
unsigned int transport;
uint8_t mac[ETH_ALEN];
size_t n_mac;
};
#define N_ACD_CONFIG_NULL(_x) { \
.transport = _N_ACD_TRANSPORT_N, \
}
struct NAcdProbeConfig {
struct in_addr ip;
uint64_t timeout_msecs;
};
#define N_ACD_PROBE_CONFIG_NULL(_x) { \
.timeout_msecs = N_ACD_TIMEOUT_RFC5227, \
}
struct NAcdEventNode {
CList acd_link;
CList probe_link;
NAcdEvent event;
uint8_t sender[ETH_ALEN];
bool is_public : 1;
};
#define N_ACD_EVENT_NODE_NULL(_x) { \
.acd_link = C_LIST_INIT((_x).acd_link), \
.probe_link = C_LIST_INIT((_x).probe_link), \
}
struct NAcd {
unsigned long n_refs;
unsigned int seed;
int fd_epoll;
int fd_socket;
CRBTree ip_tree;
CList event_list;
Timer timer;
/* BPF map */
int fd_bpf_map;
size_t n_bpf_map;
size_t max_bpf_map;
/* configuration */
int ifindex;
uint8_t mac[ETH_ALEN];
/* flags */
bool preempted : 1;
};
#define N_ACD_NULL(_x) { \
.n_refs = 1, \
.fd_epoll = -1, \
.fd_socket = -1, \
.ip_tree = C_RBTREE_INIT, \
.event_list = C_LIST_INIT((_x).event_list), \
.timer = TIMER_NULL((_x).timer), \
.fd_bpf_map = -1, \
}
struct NAcdProbe {
NAcd *acd;
CRBNode ip_node;
CList event_list;
Timeout timeout;
/* configuration */
struct in_addr ip;
uint64_t timeout_multiplier;
void *userdata;
/* state */
unsigned int state;
unsigned int n_iteration;
unsigned int defend;
uint64_t last_defend;
};
#define N_ACD_PROBE_NULL(_x) { \
.ip_node = C_RBNODE_INIT((_x).ip_node), \
.event_list = C_LIST_INIT((_x).event_list), \
.timeout = TIMEOUT_INIT((_x).timeout), \
.state = N_ACD_PROBE_STATE_PROBING, \
.defend = N_ACD_DEFEND_NEVER, \
}
/* events */
int n_acd_event_node_new(NAcdEventNode **nodep);
NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node);
/* contexts */
void n_acd_remember(NAcd *acd, uint64_t now, bool success);
int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event);
int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa);
int n_acd_ensure_bpf_map_space(NAcd *acd);
/* probes */
int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config);
int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event);
int n_acd_probe_handle_timeout(NAcdProbe *probe);
int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict);
/* eBPF */
int n_acd_bpf_map_create(int *mapfdp, size_t max_elements);
int n_acd_bpf_map_add(int mapfd, struct in_addr *addr);
int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr);
int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac);
/* inline helpers */
static inline void n_acd_event_node_freep(NAcdEventNode **node) {
if (*node)
n_acd_event_node_free(*node);
}

712
src/n-acd-probe.c Normal file
View file

@ -0,0 +1,712 @@
/*
* IPv4 Address Conflict Detection
*
* This file implements the probe object. A probe is basically the
* state-machine of a single ACD run. It takes an address to probe for, checks
* for conflicts and then defends it once configured.
*/
#include <assert.h>
#include <c-rbtree.h>
#include <c-stdaux.h>
#include <endian.h>
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include "n-acd.h"
#include "n-acd-private.h"
/*
* These parameters and timing intervals are specified in RFC-5227. The
* original values are:
*
* PROBE_NUM 3
* PROBE_WAIT 1s
* PROBE_MIN 1s
* PROBE_MAX 3s
* ANNOUNCE_NUM 3
* ANNOUNCE_WAIT 2s
* ANNOUNCE_INTERVAL 2s
* MAX_CONFLICTS 10
* RATE_LIMIT_INTERVAL 60s
* DEFEND_INTERVAL 10s
*
* If we assume a best-case and worst-case scenario for non-conflicted runs, we
* end up with a runtime between 4s and 9s to finish the probe. Then it still
* takes a fixed 4s to finish the announcements.
*
* RFC 5227 section 1.1:
* [...] (Note that the values listed here are fixed constants; they are
* not intended to be modifiable by implementers, operators, or end users.
* These constants are given symbolic names here to facilitate the writing
* of future standards that may want to reference this document with
* different values for these named constants; however, at the present time
* no such future standards exist.) [...]
*
* Unfortunately, no-one ever stepped up to write a "future standard" to revise
* the timings. A 9s timeout for successful link setups is not acceptable today.
* Hence, we will just go forward and ignore the proposed values. On both
* wired and wireless local links round-trip latencies of below 3ms are common.
* We require the caller to set a timeout multiplier, where 1 corresponds to a
* total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier
* of about 100 should be a reasonable default. To comply with the RFC select a
* multiplier of 9000.
*/
#define N_ACD_RFC_PROBE_NUM (3)
#define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */
#define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */
#define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */
#define N_ACD_RFC_ANNOUNCE_NUM (3)
#define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */
#define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */
#define N_ACD_RFC_MAX_CONFLICTS (10)
#define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */
#define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */
/**
* n_acd_probe_config_new() - create probe configuration
* @configp: output argument for new probe configuration
*
* This creates a new probe configuration. It will be returned in @configp to
* the caller, which upon return fully owns the object.
*
* A probe configuration collects parameters for probes. It never validates the
* input, but this is left to the consumer of the configuration to do.
*
* Return: 0 on success, negative error code on failure.
*/
_c_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) {
_c_cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL;
config = malloc(sizeof(*config));
if (!config)
return -ENOMEM;
*config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config);
*configp = config;
config = NULL;
return 0;
}
/**
* n_acd_probe_config_free() - destroy probe configuration
* @config: configuration to operate on, or NULL
*
* This destroys the probe configuration and all associated objects. If @config
* is NULL, this is a no-op.
*
* Return: NULL is returned.
*/
_c_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) {
if (!config)
return NULL;
free(config);
return NULL;
}
/**
* n_acd_probe_config_set_ip() - set ip property
* @config: configuration to operate on
* @ip: ip to set
*
* This sets the IP property to the value `ip`. The address is copied into the
* configuration object. No validation is performed.
*
* The IP property selects the IP address that a probe checks for. It is the
* caller's responsibility to guarantee the address is valid and can be used.
*/
_c_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) {
config->ip = ip;
}
/**
* n_acd_probe_config_set_timeout() - set timeout property
* @config: configuration to operate on
* @msecs: timeout to set, in milliseconds
*
* This sets the timeout to use for a conflict detection probe. The
* specification default is provided as `N_ACD_TIMEOUT_RFC5227` and corresponds
* to 9 seconds.
*
* If set to 0, conflict detection is skipped and the address is immediately
* advertised and defended.
*
* Depending on the transport used, the API user should select a suitable
* timeout. Since `ACD` only operates on the link layer, timeouts in the
* hundreds of milliseconds range should be more than enough for any modern
* network. Note that increasing this value directly affects the time it takes
* to connect to a network, since an address should not be used unless conflict
* detection finishes.
*
* Using the specification default is **discouraged**. It is way too slow and
* not appropriate for modern networks.
*
* Default value is `N_ACD_TIMEOUT_RFC5227`.
*/
_c_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) {
config->timeout_msecs = msecs;
}
static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) {
uint64_t n_time;
timer_now(&probe->acd->timer, &n_time);
n_time += n_timeout;
/*
* ACD specifies jitter values to reduce packet storms on the local
* link. This call accepts the maximum relative jitter value in
* nanoseconds as @n_jitter. We then use rand_r(3p) to get a
* pseudo-random jitter on top of the real timeout given as @n_timeout.
*/
if (n_jitter) {
uint64_t random;
random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed);
n_time += random % n_jitter;
}
timeout_schedule(&probe->timeout, &probe->acd->timer, n_time);
}
static void n_acd_probe_unschedule(NAcdProbe *probe) {
timeout_unschedule(&probe->timeout);
}
static bool n_acd_probe_is_unique(NAcdProbe *probe) {
NAcdProbe *sibling;
if (!c_rbnode_is_linked(&probe->ip_node))
return false;
sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node);
if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
return false;
sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node);
if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
return false;
return true;
}
static int n_acd_probe_link(NAcdProbe *probe) {
int r;
/*
* Make sure the kernel bpf map has space for at least one more
* entry.
*/
r = n_acd_ensure_bpf_map_space(probe->acd);
if (r)
return r;
/*
* Link entry into context, indexed by its IP. Note that we allow
* duplicates just fine. It is up to you to decide whether to avoid
* duplicates, if you don't want them. Duplicates on the same context
* do not conflict with each other, though.
*/
{
CRBNode **slot, *parent;
NAcdProbe *other;
slot = &probe->acd->ip_tree.root;
parent = NULL;
while (*slot) {
other = c_rbnode_entry(*slot, NAcdProbe, ip_node);
parent = *slot;
if (probe->ip.s_addr < other->ip.s_addr)
slot = &(*slot)->left;
else
slot = &(*slot)->right;
}
c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node);
}
/*
* Add the ip address to the map, if it is not already there.
*/
if (n_acd_probe_is_unique(probe)) {
r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip);
if (r) {
/*
* Make sure the IP address is linked in userspace iff
* it is linked in the kernel.
*/
c_rbnode_unlink(&probe->ip_node);
return r;
}
++probe->acd->n_bpf_map;
}
return 0;
}
static void n_acd_probe_unlink(NAcdProbe *probe) {
int r;
/*
* If this is the only probe for a given IP, remove the IP from the
* kernel BPF map.
*/
if (n_acd_probe_is_unique(probe)) {
r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip);
c_assert(r >= 0);
--probe->acd->n_bpf_map;
}
c_rbnode_unlink(&probe->ip_node);
}
int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) {
_c_cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL;
int r;
if (!config->ip.s_addr)
return N_ACD_E_INVALID_ARGUMENT;
probe = malloc(sizeof(*probe));
if (!probe)
return -ENOMEM;
*probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe);
probe->acd = n_acd_ref(acd);
probe->ip = config->ip;
/*
* We use the provided timeout-length as multiplier for all our
* timeouts. The provided timeout defines the maximum length of an
* entire probe-interval until the first announcement. Given the
* spec-provided parameters, this ends up as:
*
* PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT
* = 1s + 3s + 3s + 2s
* = 9s
*
* Hence, the default value for this timeout is 9000ms, which just
* ends up matching the spec-provided values.
*
* What we now semantically do is divide this timeout by 1ns/1000000.
* This first turns it into nanoseconds, then strips the unit by
* turning it into a multiplier. However, rather than performing the
* division here, we multiplier all our timeouts by 1000000 statically
* at compile time. Therefore, we can use the user-provided timeout as
* unmodified multiplier. No conversion necessary.
*/
probe->timeout_multiplier = config->timeout_msecs;
r = n_acd_probe_link(probe);
if (r)
return r;
/*
* Now that everything is set up, we have to send the first probe. This
* is done after ~PROBE_WAIT seconds, hence we schedule our timer.
* In case no timeout-multiplier is set, we pretend we already sent all
* probes successfully and schedule the timer so we proceed with the
* announcements. We must schedule a fake timer there, since we are not
* allowed to advance the state machine outside of n_acd_dispatch().
*/
if (probe->timeout_multiplier) {
probe->n_iteration = 0;
n_acd_probe_schedule(probe,
0,
probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC);
} else {
probe->n_iteration = N_ACD_RFC_PROBE_NUM;
n_acd_probe_schedule(probe, 0, 0);
}
*probep = probe;
probe = NULL;
return 0;
}
/**
* n_acd_probe_free() - destroy a probe
* @probe: probe to operate on, or NULL
*
* This destroys the probe specified by @probe. All operations are immediately
* ceded and all associated objects are released.
*
* If @probe is NULL, this is a no-op.
*
* This function will flush all events associated with @probe from the event
* queue. That is, no events will be returned for this @probe anymore.
*
* Return: NULL is returned.
*/
_c_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) {
NAcdEventNode *node, *t_node;
if (!probe)
return NULL;
c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link)
n_acd_event_node_free(node);
n_acd_probe_unschedule(probe);
n_acd_probe_unlink(probe);
probe->acd = n_acd_unref(probe->acd);
free(probe);
return NULL;
}
int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) {
_c_cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL;
int r;
r = n_acd_raise(probe->acd, &node, event);
if (r)
return r;
switch (event) {
case N_ACD_EVENT_READY:
node->event.ready.probe = probe;
break;
case N_ACD_EVENT_USED:
node->event.used.probe = probe;
break;
case N_ACD_EVENT_DEFENDED:
node->event.defended.probe = probe;
break;
case N_ACD_EVENT_CONFLICT:
node->event.conflict.probe = probe;
break;
default:
c_assert(0);
return -ENOTRECOVERABLE;
}
c_list_link_tail(&probe->event_list, &node->probe_link);
if (nodep)
*nodep = node;
node = NULL;
return 0;
}
int n_acd_probe_handle_timeout(NAcdProbe *probe) {
int r;
switch (probe->state) {
case N_ACD_PROBE_STATE_PROBING:
/*
* We are still PROBING. We send 3 probes with a random timeout
* scheduled between each. If, after a fixed timeout, we did
* not receive any conflict we consider the probing successful.
*/
if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) {
/*
* We have not sent all 3 probes, yet. A timer fired,
* so we are ready to send the next probe. If this is
* the third probe, schedule a timer for ANNOUNCE_WAIT
* to give other peers a chance to answer. If this is
* not the third probe, wait between PROBE_MIN and
* PROBE_MAX for the next probe.
*/
r = n_acd_send(probe->acd, &probe->ip, NULL);
if (r) {
if (r != N_ACD_E_DROPPED)
return r;
/*
* Packet was dropped, and we know about it. It
* never reached the network. Reasons are
* manifold, and n_acd_send() raises events if
* necessary.
* From a probe-perspective, we simply pretend
* we never sent the probe and schedule a
* timeout for the next probe, effectively
* doubling a single probe-interval.
*/
} else {
/* Successfully sent, so advance counter. */
++probe->n_iteration;
}
if (probe->n_iteration < N_ACD_RFC_PROBE_NUM)
n_acd_probe_schedule(probe,
probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC,
probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC));
else
n_acd_probe_schedule(probe,
probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC,
0);
} else {
/*
* All 3 probes succeeded and we waited enough to
* consider this address usable by now. Do not announce
* the address, yet. We must first give the caller a
* chance to configure the address (so they can answer
* ARP requests), before announcing it.
*/
r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY);
if (r)
return r;
probe->state = N_ACD_PROBE_STATE_CONFIGURING;
}
break;
case N_ACD_PROBE_STATE_ANNOUNCING:
/*
* We are ANNOUNCING, meaning the caller configured the address
* on the interface and is actively using it. We send 3
* announcements out, in a short interval, and then just
* perform passive conflict detection.
* Note that once all 3 announcements are sent, we no longer
* schedule a timer, so this part should not trigger, anymore.
*/
r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
if (r) {
if (r != N_ACD_E_DROPPED)
return r;
/*
* See above in STATE_PROBING for details. We know the
* packet was never sent, so we simply try again after
* extending the timer.
*/
} else {
/* Successfully sent, so advance counter. */
++probe->n_iteration;
}
if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
/*
* Announcements are always scheduled according to the
* time-intervals specified in the spec. We always use
* the RFC5227-mandated multiplier.
* If you reconsider this, note that timeout_multiplier
* might be 0 here.
*/
n_acd_probe_schedule(probe,
N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC,
0);
}
break;
case N_ACD_PROBE_STATE_CONFIGURING:
case N_ACD_PROBE_STATE_FAILED:
default:
/*
* There are no timeouts in these states. If we trigger one,
* something is fishy.
*/
c_assert(0);
return -ENOTRECOVERABLE;
}
return 0;
}
int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) {
NAcdEventNode *node;
uint64_t now;
int r;
timer_now(&probe->acd->timer, &now);
switch (probe->state) {
case N_ACD_PROBE_STATE_PROBING:
/*
* Regardless whether this is a hard or soft conflict, we must
* treat this as a probe failure. That is, notify the caller of
* the conflict and wait for further instructions. We do not
* react to this, until the caller tells us what to do, but we
* do stop sending further probes.
*/
r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED);
if (r)
return r;
node->event.used.sender = node->sender;
node->event.used.n_sender = ETH_ALEN;
memcpy(node->sender, packet->arp_sha, ETH_ALEN);
n_acd_probe_unschedule(probe);
n_acd_probe_unlink(probe);
probe->state = N_ACD_PROBE_STATE_FAILED;
break;
case N_ACD_PROBE_STATE_CONFIGURING:
/*
* We are waiting for the caller to configure the interface and
* start ANNOUNCING. In this state, we cannot defend the
* address as that would indicate that it is ready to be used,
* and we cannot signal CONFLICT or USED as the caller may
* already have started to use the address (and may have
* configured the engine to always defend it, which means they
* should be able to rely on never losing it after READY).
* Simply drop the event, and rely on the anticipated ANNOUNCE
* to trigger it again.
*/
break;
case N_ACD_PROBE_STATE_ANNOUNCING: {
/*
* We were already instructed to announce the address, which
* means the address is configured and in use. Hence, the
* caller is responsible to serve regular ARP queries. Meaning,
* we can ignore any soft conflicts (other peers doing ACD).
*
* But if we see a hard-conflict, we either defend the address
* according to the caller's instructions, or we report the
* conflict and bail out.
*/
bool conflict = false, rate_limited = false;
if (!hard_conflict)
break;
rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC;
switch (probe->defend) {
case N_ACD_DEFEND_NEVER:
conflict = true;
break;
case N_ACD_DEFEND_ONCE:
if (rate_limited) {
conflict = true;
break;
}
/* fallthrough */
case N_ACD_DEFEND_ALWAYS:
if (!rate_limited) {
r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
if (r) {
if (r != N_ACD_E_DROPPED)
return r;
if (probe->defend == N_ACD_DEFEND_ONCE) {
conflict = true;
break;
}
}
if (r != N_ACD_E_DROPPED)
probe->last_defend = now;
}
r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED);
if (r)
return r;
node->event.defended.sender = node->sender;
node->event.defended.n_sender = ETH_ALEN;
memcpy(node->sender, packet->arp_sha, ETH_ALEN);
break;
}
if (conflict) {
r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT);
if (r)
return r;
node->event.conflict.sender = node->sender;
node->event.conflict.n_sender = ETH_ALEN;
memcpy(node->sender, packet->arp_sha, ETH_ALEN);
n_acd_probe_unschedule(probe);
n_acd_probe_unlink(probe);
probe->state = N_ACD_PROBE_STATE_FAILED;
}
break;
}
case N_ACD_PROBE_STATE_FAILED:
default:
/*
* We are not listening for packets in these states. If we receive one,
* something is fishy.
*/
c_assert(0);
return -ENOTRECOVERABLE;
}
return 0;
}
/**
* n_acd_probe_set_userdata - set userdata
* @probe: probe to operate on
* @userdata: userdata pointer
*
* This can be used to set a caller-controlled user-data pointer on @probe. The
* value of the pointer is never inspected or used by `n-acd` and is fully
* under control of the caller.
*
* The default value is NULL.
*/
_c_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) {
probe->userdata = userdata;
}
/**
* n_acd_probe_get_userdata - get userdata
* @probe: probe to operate on
*
* This queries the userdata pointer that was previously set through
* n_acd_probe_set_userdata().
*
* The default value is NULL.
*
* Return: The stored userdata pointer is returned.
*/
_c_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) {
*userdatap = probe->userdata;
}
/**
* n_acd_probe_announce() - announce the configured IP address
* @probe: probe to operate on
* @defend: defense policy
*
* Announce the IP address on the local link, and start defending it according
* to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
* N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
*
* This must be called in response to an N_ACD_EVENT_READY event, and only
* after the given address has been configured on the given network interface.
*
* Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defense policy
* is invalid, negative error code on failure.
*/
_c_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) {
if (defend >= _N_ACD_DEFEND_N)
return N_ACD_E_INVALID_ARGUMENT;
probe->state = N_ACD_PROBE_STATE_ANNOUNCING;
probe->defend = defend;
probe->n_iteration = 0;
/*
* We must schedule a fake-timeout, since we are not allowed to
* advance the state-machine outside of n_acd_dispatch().
*/
n_acd_probe_schedule(probe, 0, 0);
return 0;
}

1027
src/n-acd.c Normal file

File diff suppressed because it is too large Load diff

150
src/n-acd.h Normal file
View file

@ -0,0 +1,150 @@
#pragma once
/*
* IPv4 Address Conflict Detection
*
* This is the public header of the n-acd library, implementing IPv4 Address
* Conflict Detection as described in RFC-5227. This header defines the public
* API and all entry points of n-acd.
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <netinet/in.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
typedef struct NAcd NAcd;
typedef struct NAcdConfig NAcdConfig;
typedef struct NAcdEvent NAcdEvent;
typedef struct NAcdProbe NAcdProbe;
typedef struct NAcdProbeConfig NAcdProbeConfig;
#define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000))
enum {
_N_ACD_E_SUCCESS,
N_ACD_E_PREEMPTED,
N_ACD_E_INVALID_ARGUMENT,
_N_ACD_E_N,
};
enum {
N_ACD_TRANSPORT_ETHERNET,
_N_ACD_TRANSPORT_N,
};
enum {
N_ACD_EVENT_READY,
N_ACD_EVENT_USED,
N_ACD_EVENT_DEFENDED,
N_ACD_EVENT_CONFLICT,
N_ACD_EVENT_DOWN,
_N_ACD_EVENT_N,
};
enum {
N_ACD_DEFEND_NEVER,
N_ACD_DEFEND_ONCE,
N_ACD_DEFEND_ALWAYS,
_N_ACD_DEFEND_N,
};
struct NAcdEvent {
unsigned int event;
union {
struct {
NAcdProbe *probe;
} ready;
struct {
} down;
struct {
NAcdProbe *probe;
uint8_t *sender;
size_t n_sender;
} used, defended, conflict;
};
};
/* configs */
int n_acd_config_new(NAcdConfig **configp);
NAcdConfig *n_acd_config_free(NAcdConfig *config);
void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex);
void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport);
void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac);
int n_acd_probe_config_new(NAcdProbeConfig **configp);
NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config);
void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip);
void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs);
/* contexts */
int n_acd_new(NAcd **acdp, NAcdConfig *config);
NAcd *n_acd_ref(NAcd *acd);
NAcd *n_acd_unref(NAcd *acd);
void n_acd_get_fd(NAcd *acd, int *fdp);
int n_acd_dispatch(NAcd *acd);
int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp);
int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config);
/* probes */
NAcdProbe *n_acd_probe_free(NAcdProbe *probe);
void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata);
void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap);
int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend);
/* inline helpers */
static inline void n_acd_config_freep(NAcdConfig **config) {
if (*config)
n_acd_config_free(*config);
}
static inline void n_acd_config_freev(NAcdConfig *config) {
n_acd_config_free(config);
}
static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) {
if (*config)
n_acd_probe_config_free(*config);
}
static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) {
n_acd_probe_config_free(config);
}
static inline void n_acd_unrefp(NAcd **acd) {
if (*acd)
n_acd_unref(*acd);
}
static inline void n_acd_unrefv(NAcd *acd) {
n_acd_unref(acd);
}
static inline void n_acd_probe_freep(NAcdProbe **probe) {
if (*probe)
n_acd_probe_free(*probe);
}
static inline void n_acd_probe_freev(NAcdProbe *probe) {
n_acd_probe_free(probe);
}
#ifdef __cplusplus
}
#endif

88
src/test-api.c Normal file
View file

@ -0,0 +1,88 @@
/*
* Tests for n-acd API
* This verifies the visibility and availability of the public API.
*/
#undef NDEBUG
#include <assert.h>
#include <stdlib.h>
#include "n-acd.h"
static void test_api_constants(void) {
assert(1 + N_ACD_TIMEOUT_RFC5227);
assert(1 + _N_ACD_E_SUCCESS);
assert(1 + N_ACD_E_PREEMPTED);
assert(1 + N_ACD_E_INVALID_ARGUMENT);
assert(1 + _N_ACD_E_N);
assert(1 + N_ACD_TRANSPORT_ETHERNET);
assert(1 + _N_ACD_TRANSPORT_N);
assert(1 + N_ACD_EVENT_READY);
assert(1 + N_ACD_EVENT_USED);
assert(1 + N_ACD_EVENT_DEFENDED);
assert(1 + N_ACD_EVENT_CONFLICT);
assert(1 + N_ACD_EVENT_DOWN);
assert(1 + _N_ACD_EVENT_N);
assert(1 + N_ACD_DEFEND_NEVER);
assert(1 + N_ACD_DEFEND_ONCE);
assert(1 + N_ACD_DEFEND_ALWAYS);
assert(1 + _N_ACD_DEFEND_N);
}
static void test_api_types(void) {
assert(sizeof(NAcdEvent*));
assert(sizeof(NAcdConfig*));
assert(sizeof(NAcdProbeConfig*));
assert(sizeof(NAcd*));
assert(sizeof(NAcdProbe*));
}
static void test_api_functions(void) {
void *fns[] = {
(void *)n_acd_config_new,
(void *)n_acd_config_free,
(void *)n_acd_config_set_ifindex,
(void *)n_acd_config_set_transport,
(void *)n_acd_config_set_mac,
(void *)n_acd_probe_config_new,
(void *)n_acd_probe_config_free,
(void *)n_acd_probe_config_set_ip,
(void *)n_acd_probe_config_set_timeout,
(void *)n_acd_new,
(void *)n_acd_ref,
(void *)n_acd_unref,
(void *)n_acd_get_fd,
(void *)n_acd_dispatch,
(void *)n_acd_pop_event,
(void *)n_acd_probe,
(void *)n_acd_probe_free,
(void *)n_acd_probe_set_userdata,
(void *)n_acd_probe_get_userdata,
(void *)n_acd_probe_announce,
(void *)n_acd_config_freep,
(void *)n_acd_config_freev,
(void *)n_acd_probe_config_freep,
(void *)n_acd_probe_config_freev,
(void *)n_acd_unrefp,
(void *)n_acd_unrefv,
(void *)n_acd_probe_freep,
(void *)n_acd_probe_freev,
};
size_t i;
for (i = 0; i < sizeof(fns) / sizeof(*fns); ++i)
assert(!!fns[i]);
}
int main(int argc, char **argv) {
test_api_constants();
test_api_types();
test_api_functions();
return 0;
}

226
src/test-bpf.c Normal file
View file

@ -0,0 +1,226 @@
/*
* eBPF socket filter tests
*/
#undef NDEBUG
#include <assert.h>
#include <c-stdaux.h>
#include <errno.h>
#include <inttypes.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include "n-acd.h"
#include "n-acd-private.h"
#include "test.h"
#define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) { \
.ea_hdr = { \
.ar_hrd = htobe16(ARPHRD_ETHER), \
.ar_pro = htobe16(ETHERTYPE_IP), \
.ar_hln = 6, \
.ar_pln = 4, \
.ar_op = htobe16(_op), \
}, \
.arp_sha[0] = (_mac)->ether_addr_octet[0], \
.arp_sha[1] = (_mac)->ether_addr_octet[1], \
.arp_sha[2] = (_mac)->ether_addr_octet[2], \
.arp_sha[3] = (_mac)->ether_addr_octet[3], \
.arp_sha[4] = (_mac)->ether_addr_octet[4], \
.arp_sha[5] = (_mac)->ether_addr_octet[5], \
.arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff, \
.arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff, \
.arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff, \
.arp_spa[3] = be32toh((_sip)->s_addr) & 0xff, \
.arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff, \
.arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff, \
.arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff, \
.arp_tpa[3] = be32toh((_tip)->s_addr) & 0xff, \
}
static void test_map(void) {
int r, mapfd = -1;
struct in_addr addr = { 1 };
r = n_acd_bpf_map_create(&mapfd, 8);
c_assert(r >= 0);
c_assert(mapfd >= 0);
r = n_acd_bpf_map_remove(mapfd, &addr);
c_assert(r == -ENOENT);
r = n_acd_bpf_map_add(mapfd, &addr);
c_assert(r >= 0);
r = n_acd_bpf_map_add(mapfd, &addr);
c_assert(r == -EEXIST);
r = n_acd_bpf_map_remove(mapfd, &addr);
c_assert(r >= 0);
r = n_acd_bpf_map_remove(mapfd, &addr);
c_assert(r == -ENOENT);
close(mapfd);
}
static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) {
uint8_t buf[sizeof(struct ether_arp)];
int r;
r = send(out_fd, packet, sizeof(struct ether_arp), 0);
c_assert(r == sizeof(struct ether_arp));
r = recv(in_fd, buf, sizeof(buf), 0);
c_assert(r == sizeof(struct ether_arp));
}
static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) {
uint8_t buf[sizeof(struct ether_arp)];
int r;
r = send(out_fd, packet, sizeof(struct ether_arp), 0);
c_assert(r == sizeof(struct ether_arp));
r = recv(in_fd, buf, sizeof(buf), 0);
c_assert(r < 0);
c_assert(errno == EAGAIN);
}
static void test_filter(void) {
uint8_t buf[sizeof(struct ether_arp) + 1] = {};
struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } };
struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } };
struct in_addr ip0 = { 0 };
struct in_addr ip1 = { 1 };
struct in_addr ip2 = { 2 };
struct ether_arp *packet = (struct ether_arp *)buf;
int r, mapfd = -1, progfd = -1, pair[2];
r = n_acd_bpf_map_create(&mapfd, 1);
c_assert(r >= 0);
r = n_acd_bpf_compile(&progfd, mapfd, &mac1);
c_assert(r >= 0);
c_assert(progfd >= 0);
r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair);
c_assert(r >= 0);
r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd,
sizeof(progfd));
c_assert(r >= 0);
r = n_acd_bpf_map_add(mapfd, &ip1);
c_assert(r >= 0);
/* valid */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
verify_success(packet, pair[0], pair[1]);
/* valid: reply instead of request */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2);
verify_success(packet, pair[0], pair[1]);
/* valid: to us instead of from us */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1);
verify_success(packet, pair[0], pair[1]);
/* invalid header type */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
packet->arp_hrd += 1;
verify_failure(packet, pair[0], pair[1]);
/* invalid protocol */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
packet->arp_pro += 1;
verify_failure(packet, pair[0], pair[1]);
/* invalid hw addr length */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
packet->arp_hln += 1;
verify_failure(packet, pair[0], pair[1]);
/* invalid protocol addr length */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
packet->arp_pln += 1;
verify_failure(packet, pair[0], pair[1]);
/* invalid operation */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2);
packet->arp_hln += 1;
verify_failure(packet, pair[0], pair[1]);
/* own mac */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2);
verify_failure(packet, pair[0], pair[1]);
/* not to, nor from us, with source */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2);
verify_failure(packet, pair[0], pair[1]);
/* not to, nor from us, without source */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2);
verify_failure(packet, pair[0], pair[1]);
/* to us instead of from us, but reply */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1);
verify_failure(packet, pair[0], pair[1]);
/* long */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0);
c_assert(r == sizeof(struct ether_arp) + 1);
r = recv(pair[1], buf, sizeof(buf), 0);
c_assert(r == sizeof(struct ether_arp));
/* short */
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0);
c_assert(r == sizeof(struct ether_arp) - 1);
r = recv(pair[1], buf, sizeof(buf), 0);
c_assert(r < 0);
c_assert(errno == EAGAIN);
/*
* Send one packet before and one packet after modifying the map,
* verify that the modification applies at the time of send(), not recv().
*/
*packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
r = send(pair[0], buf, sizeof(struct ether_arp), 0);
c_assert(r == sizeof(struct ether_arp));
r = n_acd_bpf_map_remove(mapfd, &ip1);
c_assert(r >= 0);
r = send(pair[0], buf, sizeof(struct ether_arp), 0);
c_assert(r == sizeof(struct ether_arp));
r = recv(pair[1], buf, sizeof(buf), 0);
c_assert(r == sizeof(struct ether_arp));
r = recv(pair[1], buf, sizeof(buf), 0);
c_assert(r < 0);
c_assert(errno == EAGAIN);
close(pair[0]);
close(pair[1]);
close(progfd);
close(mapfd);
}
int main(int argc, char **argv) {
test_setup();
test_map();
test_filter();
return 0;
}

82
src/test-loopback.c Normal file
View file

@ -0,0 +1,82 @@
/*
* Test on loopback device
* This runs the ACD engine on the loopback device, effectively testing the BPF
* filter of ACD to discard its own packets. This might happen on
* non-spanning-tree networks, or on networks that echo packets.
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <stdlib.h>
#include "test.h"
static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) {
NAcdConfig *config;
NAcd *acd;
struct pollfd pfds;
int r, fd;
r = n_acd_config_new(&config);
c_assert(!r);
n_acd_config_set_ifindex(config, ifindex);
n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
n_acd_config_set_mac(config, mac, n_mac);
r = n_acd_new(&acd, config);
c_assert(!r);
n_acd_config_free(config);
{
NAcdProbeConfig *probe_config;
NAcdProbe *probe;
struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) };
r = n_acd_probe_config_new(&probe_config);
c_assert(!r);
n_acd_probe_config_set_ip(probe_config, ip);
n_acd_probe_config_set_timeout(probe_config, 100);
r = n_acd_probe(acd, &probe, probe_config);
c_assert(!r);
n_acd_probe_config_free(probe_config);
n_acd_get_fd(acd, &fd);
for (;;) {
NAcdEvent *event;
pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
r = poll(&pfds, 1, -1);
c_assert(r >= 0);
r = n_acd_dispatch(acd);
c_assert(!r);
r = n_acd_pop_event(acd, &event);
c_assert(!r);
if (event) {
c_assert(event->event == N_ACD_EVENT_READY);
break;
}
}
n_acd_probe_free(probe);
}
n_acd_unref(acd);
}
int main(int argc, char **argv) {
struct ether_addr mac;
int ifindex;
test_setup();
test_loopback_up(&ifindex, &mac);
test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
return 0;
}

97
src/test-twice.c Normal file
View file

@ -0,0 +1,97 @@
/*
* Test with unused address twice in parallel
* This runs the ACD engine with an unused address on a veth pair, but it runs
* it on both ends. We expect the PROBE to fail on at least one of the devices.
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <stdlib.h>
#include "test.h"
static void test_unused(int ifindex1, uint8_t *mac1, size_t n_mac1, int ifindex2, uint8_t *mac2, size_t n_mac2) {
NAcdConfig config1 = {
.ifindex = ifindex1,
.transport = N_ACD_TRANSPORT_ETHERNET,
.mac = mac1,
.n_mac = n_mac1,
.ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
.timeout_msec = 100,
};
NAcdConfig config2 = {
.ifindex = ifindex2,
.transport = N_ACD_TRANSPORT_ETHERNET,
.mac = mac2,
.n_mac = n_mac2,
.ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
.timeout_msec = 100,
};
struct pollfd pfds[2];
NAcd *acd1, *acd2;
int r, fd1, fd2, state1, state2;
r = n_acd_new(&acd1);
c_assert(!r);
r = n_acd_new(&acd2);
c_assert(!r);
n_acd_get_fd(acd1, &fd1);
n_acd_get_fd(acd2, &fd2);
r = n_acd_start(acd1, &config1);
c_assert(!r);
r = n_acd_start(acd2, &config2);
c_assert(!r);
for (state1 = state2 = -1; state1 == -1 || state2 == -1; ) {
NAcdEvent *event;
pfds[0] = (struct pollfd){ .fd = fd1, .events = (state1 == -1) ? POLLIN : 0 };
pfds[1] = (struct pollfd){ .fd = fd2, .events = (state2 == -1) ? POLLIN : 0 };
r = poll(pfds, sizeof(pfds) / sizeof(*pfds), -1);
c_assert(r >= 0);
if (state1 == -1) {
r = n_acd_dispatch(acd1);
c_assert(!r);
r = n_acd_pop_event(acd1, &event);
if (!r) {
c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
state1 = !!(event->event == N_ACD_EVENT_READY);
} else {
c_assert(r == N_ACD_E_DONE);
}
}
if (state2 == -1) {
r = n_acd_dispatch(acd2);
c_assert(!r);
r = n_acd_pop_event(acd2, &event);
if (!r) {
c_assert(event->event == N_ACD_EVENT_READY || event->event == N_ACD_EVENT_USED);
state2 = !!(event->event == N_ACD_EVENT_READY);
} else {
c_assert(r == N_ACD_E_DONE);
}
}
}
n_acd_free(acd1);
n_acd_free(acd2);
c_assert(!state1 || !state2);
}
int main(int argc, char **argv) {
struct ether_addr mac1, mac2;
int ifindex1, ifindex2;
test_setup();
test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
test_unused(ifindex1, mac1.ether_addr_octet, sizeof(mac2.ether_addr_octet), ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
return 0;
}

84
src/test-unplug.c Normal file
View file

@ -0,0 +1,84 @@
/*
* Unplug device during test run
* Run the ACD engine with an address that is not used by anyone else on the
* link, but DOWN or UNPLUG the device while running.
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <stdlib.h>
#include "test.h"
static void test_unplug_down(int ifindex, uint8_t *mac, size_t n_mac, unsigned int run) {
NAcdConfig config = {
.ifindex = ifindex,
.transport = N_ACD_TRANSPORT_ETHERNET,
.mac = mac,
.n_mac = n_mac,
.ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
.timeout_msec = 100,
};
struct pollfd pfds;
NAcd *acd;
int r, fd;
if (!run--)
test_veth_cmd(ifindex, "down");
r = n_acd_new(&acd);
c_assert(!r);
if (!run--)
test_veth_cmd(ifindex, "down");
n_acd_get_fd(acd, &fd);
r = n_acd_start(acd, &config);
c_assert(!r);
if (!run--)
test_veth_cmd(ifindex, "down");
for (;;) {
NAcdEvent *event;
pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
r = poll(&pfds, 1, -1);
c_assert(r >= 0);
if (!run--)
test_veth_cmd(ifindex, "down");
r = n_acd_dispatch(acd);
c_assert(!r);
r = n_acd_pop_event(acd, &event);
if (!r) {
if (event->event == N_ACD_EVENT_DOWN) {
break;
} else {
c_assert(event->event == N_ACD_EVENT_READY);
test_veth_cmd(ifindex, "down");
}
} else {
c_assert(r == N_ACD_E_DONE);
}
}
n_acd_free(acd);
}
int main(int argc, char **argv) {
struct ether_addr mac;
unsigned int i;
int ifindex;
test_setup();
test_veth_new(&ifindex, &mac, NULL, NULL);
for (i = 0; i < 5; ++i) {
test_unplug_down(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet), i);
test_veth_cmd(ifindex, "up");
}
return 0;
}

63
src/test-unused.c Normal file
View file

@ -0,0 +1,63 @@
/*
* Test with unused address
* Run the ACD engine with an address that is not used by anyone else on the
* link. This should just pass through, with a short, random timeout.
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <stdlib.h>
#include "test.h"
static void test_unused(int ifindex, const uint8_t *mac, size_t n_mac) {
NAcdConfig config = {
.ifindex = ifindex,
.transport = N_ACD_TRANSPORT_ETHERNET,
.mac = mac,
.n_mac = n_mac,
.ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
.timeout_msec = 100,
};
struct pollfd pfds;
NAcd *acd;
int r, fd;
r = n_acd_new(&acd);
c_assert(!r);
n_acd_get_fd(acd, &fd);
r = n_acd_start(acd, &config);
c_assert(!r);
for (;;) {
NAcdEvent *event;
pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
r = poll(&pfds, 1, -1);
c_assert(r >= 0);
r = n_acd_dispatch(acd);
c_assert(!r);
r = n_acd_pop_event(acd, &event);
if (!r) {
c_assert(event->event == N_ACD_EVENT_READY);
break;
} else {
c_assert(r == N_ACD_E_DONE);
}
}
n_acd_free(acd);
}
int main(int argc, char **argv) {
struct ether_addr mac;
int ifindex;
test_setup();
test_veth_new(&ifindex, &mac, NULL, NULL);
test_unused(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
return 0;
}

240
src/test-veth.c Normal file
View file

@ -0,0 +1,240 @@
/*
* Test on a veth link
*
* This essentially mimics a real network with two peers.
*
* Run one ACD context on each end of the tunnel. On one end probe for N,
* addresses on the other end pre-configure N/3 of the same addresses and probe
* for another N/3 of the addresses.
*
* Verify that in the case of simultaneous probes of the same address at most one
* succeed, in the case of probing for a configured address it always fails, and
* probing for a non-existent address always succeeds.
*
* Make sure to keep N fairly high as the protocol is probabilistic, and we also
* want to verify that resizing the internal maps works correctly.
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <stdlib.h>
#include "test.h"
#define TEST_ACD_N_PROBES (9)
typedef enum {
TEST_ACD_STATE_UNKNOWN,
TEST_ACD_STATE_USED,
TEST_ACD_STATE_READY,
} TestAcdState;
static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1,
int ifindex2, uint8_t *mac2, size_t n_mac2) {
NAcdConfig *config;
NAcd *acd1, *acd2;
NAcdProbe *probes1[TEST_ACD_N_PROBES];
NAcdProbe *probes2[TEST_ACD_N_PROBES];
unsigned long state1, state2;
size_t n_running = 0;
int r;
r = n_acd_config_new(&config);
c_assert(!r);
n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
n_acd_config_set_ifindex(config, ifindex1);
n_acd_config_set_mac(config, mac1, n_mac1);
r = n_acd_new(&acd1, config);
c_assert(!r);
n_acd_config_set_ifindex(config, ifindex2);
n_acd_config_set_mac(config, mac2, n_mac2);
r = n_acd_new(&acd2, config);
c_assert(!r);
n_acd_config_free(config);
{
NAcdProbeConfig *probe_config;
r = n_acd_probe_config_new(&probe_config);
c_assert(!r);
n_acd_probe_config_set_timeout(probe_config, 1024);
c_assert(TEST_ACD_N_PROBES <= 10 << 24);
for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
struct in_addr ip = { htobe32((10 << 24) | i) };
n_acd_probe_config_set_ip(probe_config, ip);
switch (i % 3) {
case 0:
/*
* Probe on one side, and leave the address
* unset on the other. The probe must succeed.
*/
break;
case 1:
/*
* Preconfigure the address on one side, and
* probe on the other. The probe must fail.
*/
test_add_child_ip(&ip);
break;
case 2:
/*
* Probe both sides for the same address, at
* most one may succeed.
*/
r = n_acd_probe(acd2, &probes2[i], probe_config);
c_assert(!r);
++n_running;
break;
default:
c_assert(0);
abort();
break;
}
r = n_acd_probe(acd1, &probes1[i], probe_config);
c_assert(!r);
++n_running;
}
n_acd_probe_config_free(probe_config);
while (n_running > 0) {
NAcdEvent *event;
struct pollfd pfds[2] = {
{ .events = POLLIN },
{ .events = POLLIN },
};
n_acd_get_fd(acd1, &pfds[0].fd);
n_acd_get_fd(acd2, &pfds[1].fd);
r = poll(pfds, 2, -1);
c_assert(r >= 0);
if (pfds[0].revents & POLLIN) {
r = n_acd_dispatch(acd1);
c_assert(!r || r == N_ACD_E_PREEMPTED);
for (;;) {
r = n_acd_pop_event(acd1, &event);
c_assert(!r);
if (event) {
switch (event->event) {
case N_ACD_EVENT_READY:
n_acd_probe_get_userdata(event->ready.probe, (void**)&state1);
c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
state1 = TEST_ACD_STATE_READY;
n_acd_probe_set_userdata(event->ready.probe, (void*)state1);
break;
case N_ACD_EVENT_USED:
n_acd_probe_get_userdata(event->used.probe, (void**)&state1);
c_assert(state1 == TEST_ACD_STATE_UNKNOWN);
state1 = TEST_ACD_STATE_USED;
n_acd_probe_set_userdata(event->used.probe, (void*)state1);
break;
default:
c_assert(0);
}
--n_running;
} else {
break;
}
}
}
if (pfds[1].revents & POLLIN) {
r = n_acd_dispatch(acd2);
c_assert(!r || r == N_ACD_E_PREEMPTED);
for (;;) {
r = n_acd_pop_event(acd2, &event);
c_assert(!r);
if (event) {
switch (event->event) {
case N_ACD_EVENT_READY:
n_acd_probe_get_userdata(event->ready.probe, (void**)&state2);
c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
state2 = TEST_ACD_STATE_READY;
n_acd_probe_set_userdata(event->ready.probe, (void*)state2);
break;
case N_ACD_EVENT_USED:
n_acd_probe_get_userdata(event->used.probe, (void**)&state2);
c_assert(state2 == TEST_ACD_STATE_UNKNOWN);
state2 = TEST_ACD_STATE_USED;
n_acd_probe_set_userdata(event->used.probe, (void*)state2);
break;
default:
c_assert(0);
}
--n_running;
} else {
break;
}
}
}
}
for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
struct in_addr ip = { htobe32((10 << 24) | i) };
switch (i % 3) {
case 0:
n_acd_probe_get_userdata(probes1[i], (void **)&state1);
c_assert(state1 == TEST_ACD_STATE_READY);
break;
case 1:
test_del_child_ip(&ip);
n_acd_probe_get_userdata(probes1[i], (void **)&state1);
c_assert(state1 == TEST_ACD_STATE_USED);
break;
case 2:
n_acd_probe_get_userdata(probes1[i], (void **)&state1);
n_acd_probe_get_userdata(probes2[i], (void **)&state2);
c_assert(state1 != TEST_ACD_STATE_UNKNOWN);
c_assert(state2 != TEST_ACD_STATE_UNKNOWN);
c_assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED);
n_acd_probe_free(probes2[i]);
break;
}
n_acd_probe_free(probes1[i]);
}
}
n_acd_unref(acd2);
n_acd_unref(acd1);
}
int main(int argc, char **argv) {
struct ether_addr mac1, mac2;
int ifindex1, ifindex2;
test_setup();
test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
for (unsigned int i = 0; i < 8; ++i) {
test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet),
ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
}
return 0;
}

213
src/test.h Normal file
View file

@ -0,0 +1,213 @@
#pragma once
/*
* Test Helpers
* Bunch of helpers to setup the environment for networking tests. This
* includes net-namespace setups, veth setups, and more.
*/
#undef NDEBUG
#include <assert.h>
#include <c-stdaux.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "n-acd.h"
static inline void test_add_child_ip(const struct in_addr *addr) {
char *p;
int r;
r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr));
c_assert(r >= 0);
r = system(p);
c_assert(r >= 0);
free(p);
}
static inline void test_del_child_ip(const struct in_addr *addr) {
char *p;
int r;
r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr));
c_assert(r >= 0);
r = system(p);
c_assert(r >= 0);
free(p);
}
static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) {
struct ifreq ifr = {};
size_t l;
int r, s;
l = strlen(name);
c_assert(l <= IF_NAMESIZE);
if (indexp) {
*indexp = if_nametoindex(name);
c_assert(*indexp > 0);
}
if (macp) {
s = socket(AF_INET, SOCK_DGRAM, 0);
c_assert(s >= 0);
strncpy(ifr.ifr_name, name, l + 1);
r = ioctl(s, SIOCGIFHWADDR, &ifr);
c_assert(r >= 0);
memcpy(macp->ether_addr_octet, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
close(s);
}
}
static inline void test_veth_cmd(int ifindex, const char *cmd) {
char *p, name[IF_NAMESIZE + 1] = {};
int r;
p = if_indextoname(ifindex, name);
c_assert(p);
r = asprintf(&p, "ip link set %s %s", name, cmd);
c_assert(r >= 0);
/* Again: Ewwww... */
r = system(p);
c_assert(r == 0);
free(p);
}
static inline void test_veth_new(int *parent_indexp,
struct ether_addr *parent_macp,
int *child_indexp,
struct ether_addr *child_macp) {
int r;
/* Eww... but it works. */
r = system("ip link add type veth");
c_assert(r == 0);
r = system("ip link set veth0 up");
c_assert(r == 0);
r = system("ip link set veth1 up");
c_assert(r == 0);
test_if_query("veth0", parent_indexp, parent_macp);
test_if_query("veth1", child_indexp, child_macp);
}
static inline void test_loopback_up(int *indexp, struct ether_addr *macp) {
int r;
r = system("ip link set lo up");
c_assert(r == 0);
test_if_query("lo", indexp, macp);
}
static inline void test_raise_memlock(void) {
const size_t wanted = 64 * 1024 * 1024;
struct rlimit get, set;
int r;
r = getrlimit(RLIMIT_MEMLOCK, &get);
c_assert(!r);
/* try raising limit to @wanted */
set.rlim_cur = wanted;
set.rlim_max = (wanted > get.rlim_max) ? wanted : get.rlim_max;
r = setrlimit(RLIMIT_MEMLOCK, &set);
if (r) {
c_assert(errno == EPERM);
/* not privileged to raise limit, so maximize soft limit */
set.rlim_cur = get.rlim_max;
set.rlim_max = get.rlim_max;
r = setrlimit(RLIMIT_MEMLOCK, &set);
c_assert(!r);
}
}
static inline void test_unshare_user_namespace(void) {
uid_t euid;
gid_t egid;
int r, fd;
/*
* Enter a new user namespace as root:root.
*/
euid = geteuid();
egid = getegid();
r = unshare(CLONE_NEWUSER);
c_assert(r >= 0);
fd = open("/proc/self/uid_map", O_WRONLY);
c_assert(fd >= 0);
r = dprintf(fd, "0 %d 1\n", euid);
c_assert(r >= 0);
close(fd);
fd = open("/proc/self/setgroups", O_WRONLY);
c_assert(fd >= 0);
r = dprintf(fd, "deny");
c_assert(r >= 0);
close(fd);
fd = open("/proc/self/gid_map", O_WRONLY);
c_assert(fd >= 0);
r = dprintf(fd, "0 %d 1\n", egid);
c_assert(r >= 0);
close(fd);
}
static inline void test_setup(void) {
int r;
/*
* Move into a new network and mount namespace both associated
* with a new user namespace where the current eUID is mapped to
* 0. Then create a private instance of /run/netns. This ensures
* that any network devices or network namespaces are private to
* the test process.
*/
test_raise_memlock();
test_unshare_user_namespace();
r = unshare(CLONE_NEWNET | CLONE_NEWNS);
c_assert(r >= 0);
r = mount(NULL, "/", "", MS_PRIVATE | MS_REC, NULL);
c_assert(r >= 0);
r = mount(NULL, "/run", "tmpfs", 0, NULL);
c_assert(r >= 0);
r = mkdir("/run/netns", 0755);
c_assert(r >= 0);
}

177
src/util/test-timer.c Normal file
View file

@ -0,0 +1,177 @@
/*
* Tests for timer utility library
*/
#undef NDEBUG
#include <c-stdaux.h>
#include <errno.h>
#include <poll.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/timerfd.h>
#include "timer.h"
#define N_TIMEOUTS (10000)
static void test_api(void) {
Timer timer = TIMER_NULL(timer);
Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t;
int r;
r = timer_init(&timer);
c_assert(!r);
timeout_schedule(&t1, &timer, 1);
timeout_schedule(&t2, &timer, 2);
r = timer_pop_timeout(&timer, 10, &t);
c_assert(!r);
c_assert(t == &t1);
timeout_unschedule(&t2);
r = timer_pop_timeout(&timer, 10, &t);
c_assert(!r);
c_assert(!t);
timer_deinit(&timer);
}
static void test_pop(void) {
Timer timer = TIMER_NULL(timer);
Timeout timeouts[N_TIMEOUTS] = {};
uint64_t times[N_TIMEOUTS] = {};
size_t n_timeouts = 0;
bool armed;
Timeout *t;
int r;
r = timer_init(&timer);
c_assert(!r);
for(size_t i = 0; i < N_TIMEOUTS; ++i) {
timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]);
times[i] = rand() % 128 + 1;
timeout_schedule(&timeouts[i], &timer, times[i]);
}
armed = true;
for(size_t i = 0; i <= 128; ++i) {
if (armed) {
struct pollfd pfd = {
.fd = timer.fd,
.events = POLLIN,
};
uint64_t count;
r = poll(&pfd, 1, -1);
c_assert(r == 1);
r = read(timer.fd, &count, sizeof(count));
c_assert(r == sizeof(count));
c_assert(count == 1);
armed = false;
}
for (;;) {
uint64_t current_time;
r = timer_pop_timeout(&timer, i, &t);
c_assert(!r);
if (!t) {
timer_rearm(&timer);
break;
}
current_time = times[t - timeouts];
c_assert(current_time == i);
++n_timeouts;
armed = true;
}
}
c_assert(n_timeouts == N_TIMEOUTS);
r = timer_pop_timeout(&timer, (uint64_t)-1, &t);
c_assert(!r);
c_assert(!t);
timer_deinit(&timer);
}
void test_arm(void) {
struct itimerspec spec = {
.it_value = {
.tv_sec = 1000,
},
};
int fd1, fd2, r;
fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
c_assert(fd1 >= 0);
fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
c_assert(fd1 >= 0);
r = timerfd_settime(fd1, 0, &spec, NULL);
c_assert(r >= 0);
r = timerfd_settime(fd2, 0, &spec, NULL);
c_assert(r >= 0);
r = timerfd_gettime(fd1, &spec);
c_assert(r >= 0);
c_assert(spec.it_value.tv_sec);
r = timerfd_gettime(fd2, &spec);
c_assert(r >= 0);
c_assert(spec.it_value.tv_sec);
spec = (struct itimerspec){};
r = timerfd_settime(fd1, 0, &spec, NULL);
c_assert(r >= 0);
r = timerfd_gettime(fd1, &spec);
c_assert(r >= 0);
c_assert(!spec.it_value.tv_sec);
c_assert(!spec.it_value.tv_nsec);
r = timerfd_gettime(fd2, &spec);
c_assert(r >= 0);
c_assert(spec.it_value.tv_sec);
spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, };
r = timerfd_settime(fd1, 0, &spec, NULL);
c_assert(r >= 0);
r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1);
c_assert(r == 1);
r = timerfd_settime(fd2, 0, &spec, NULL);
c_assert(r >= 0);
r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
c_assert(r == 1);
spec = (struct itimerspec){};
r = timerfd_settime(fd1, 0, &spec, NULL);
c_assert(r >= 0);
r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
c_assert(r == 1);
close(fd2);
close(fd1);
}
int main(int argc, char **argv) {
test_arm();
test_api();
test_pop();
return 0;
}

189
src/util/timer.c Normal file
View file

@ -0,0 +1,189 @@
/*
* Timer Utility Library
*/
#include <assert.h>
#include <c-rbtree.h>
#include <c-stdaux.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/timerfd.h>
#include <time.h>
#include "timer.h"
int timer_init(Timer *timer) {
clockid_t clock = CLOCK_BOOTTIME;
int r;
r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
if (r < 0 && errno == EINVAL) {
clock = CLOCK_MONOTONIC;
r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
}
if (r < 0)
return -errno;
*timer = (Timer)TIMER_NULL(*timer);
timer->fd = r;
timer->clock = clock;
return 0;
}
void timer_deinit(Timer *timer) {
c_assert(c_rbtree_is_empty(&timer->tree));
if (timer->fd >= 0) {
close(timer->fd);
timer->fd = -1;
}
}
void timer_now(Timer *timer, uint64_t *nowp) {
struct timespec ts;
int r;
r = clock_gettime(timer->clock, &ts);
c_assert(r >= 0);
*nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
}
void timer_rearm(Timer *timer) {
uint64_t time;
Timeout *timeout;
int r;
/*
* A timeout value of 0 clears the timer, we should only set that if
* no timeout exists in the tree.
*/
timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
c_assert(!timeout || timeout->timeout);
time = timeout ? timeout->timeout : 0;
if (time != timer->scheduled_timeout) {
r = timerfd_settime(timer->fd,
TFD_TIMER_ABSTIME,
&(struct itimerspec){
.it_value = {
.tv_sec = time / UINT64_C(1000000000),
.tv_nsec = time % UINT64_C(1000000000),
},
},
NULL);
c_assert(r >= 0);
timer->scheduled_timeout = time;
}
}
int timer_read(Timer *timer) {
uint64_t v;
int r;
r = read(timer->fd, &v, sizeof(v));
if (r < 0) {
if (errno == EAGAIN) {
/*
* No more pending events.
*/
return 0;
} else {
/*
* Something failed. We use CLOCK_BOOTTIME/MONOTONIC,
* so ECANCELED cannot happen. Hence, there is no
* error that we could gracefully handle. Fail hard
* and let the caller deal with it.
*/
return -errno;
}
} else if (r != sizeof(v) || v == 0) {
/*
* Kernel guarantees 8-byte reads, and only to return
* data if at least one timer triggered; fail hard if
* it suddenly starts doing weird shit.
*/
return -EIO;
}
return TIMER_E_TRIGGERED;
}
int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) {
Timeout *timeout;
/*
* If the first timeout is scheduled before @until, then unlink
* it and return it. Otherwise, return NULL.
*/
timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
if (timeout && timeout->timeout <= until) {
c_rbnode_unlink(&timeout->node);
timeout->timeout = 0;
*timeoutp = timeout;
} else {
*timeoutp = NULL;
}
return 0;
}
void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) {
c_assert(time);
/*
* In case @timeout was already scheduled, remove it from the
* tree. If we are moving it to a new timer, rearm the old one.
*/
if (timeout->timer) {
c_rbnode_unlink(&timeout->node);
if (timeout->timer != timer)
timer_rearm(timeout->timer);
}
timeout->timer = timer;
timeout->timeout = time;
/*
* Now insert it back into the tree in the correct new position.
* We allow duplicates in the tree, so this insertion is open-coded.
*/
{
Timeout *other;
CRBNode **slot, *parent;
slot = &timer->tree.root;
parent = NULL;
while (*slot) {
other = c_rbnode_entry(*slot, Timeout, node);
parent = *slot;
if (timeout->timeout < other->timeout)
slot = &(*slot)->left;
else
slot = &(*slot)->right;
}
c_rbtree_add(&timer->tree, parent, slot, &timeout->node);
}
/*
* Rearm the timer as we updated the timeout tree.
*/
timer_rearm(timer);
}
void timeout_unschedule(Timeout *timeout) {
Timer *timer = timeout->timer;
if (!timer)
return;
c_rbnode_unlink(&timeout->node);
timeout->timeout = 0;
timeout->timer = NULL;
timer_rearm(timer);
}

54
src/util/timer.h Normal file
View file

@ -0,0 +1,54 @@
#pragma once
#include <c-rbtree.h>
#include <c-stdaux.h>
#include <inttypes.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
typedef struct Timer Timer;
typedef struct Timeout Timeout;
enum {
_TIMER_E_SUCCESS,
TIMER_E_TRIGGERED,
_TIMER_E_N,
};
struct Timer {
int fd;
clockid_t clock;
CRBTree tree;
uint64_t scheduled_timeout;
};
#define TIMER_NULL(_x) { \
.fd = -1, \
.tree = C_RBTREE_INIT, \
}
struct Timeout {
Timer *timer;
CRBNode node;
uint64_t timeout;
};
#define TIMEOUT_INIT(_x) { \
.node = C_RBNODE_INIT((_x).node), \
}
int timer_init(Timer *timer);
void timer_deinit(Timer *timer);
void timer_now(Timer *timer, uint64_t *nowp);
int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp);
void timer_rearm(Timer *timer);
int timer_read(Timer *timer);
void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time);
void timeout_unschedule(Timeout *timeout);

1
subprojects/c-list Submodule

@ -0,0 +1 @@
Subproject commit 6c53ef1c0066a3b0d82e9e181e90114eacb7c4aa

1
subprojects/c-rbtree Submodule

@ -0,0 +1 @@
Subproject commit c8cf175278452686cc5993e154d472d0a64d7fac

1
subprojects/c-siphash Submodule

@ -0,0 +1 @@
Subproject commit 2d159c7da1d542f2b1fcbbefea6931bce242b943

1
subprojects/c-stdaux Submodule

@ -0,0 +1 @@
Subproject commit 8b8f941c57a790c277f49b099e73ed9f8ea141af