clat: support layer3 interfaces

When running the CLAT over an interface that doesn't use the Ethernet
header, like an IP tunnel, the BPF program needs to behave
differently.

Pass a boolean configuration flag saying whether to expect the
Ethernet header. The flag is propagated to most functions inside the
program and it is used to compute the right offsets.
This commit is contained in:
Beniamino Galvani 2026-02-05 18:58:52 +01:00
parent 8b0568e911
commit eec0f093db
3 changed files with 107 additions and 51 deletions

View file

@ -78,12 +78,12 @@ struct ip6_frag {
__u32 identification;
} __attribute__((packed));
#define ETH_H_LEN (sizeof(struct ethhdr))
#define IP_H_LEN (sizeof(struct iphdr))
#define IP6_H_LEN (sizeof(struct ipv6hdr))
#define IP6_FRAG_H_LEN (sizeof(struct ip6_frag))
#define ICMP_H_LEN (sizeof(struct icmphdr))
#define ICMP6_H_LEN (sizeof(struct icmp6hdr))
#define L2_H_LEN(has_eth) (has_eth ? sizeof(struct ethhdr) : 0)
#define IP_H_LEN (sizeof(struct iphdr))
#define IP6_H_LEN (sizeof(struct ipv6hdr))
#define IP6_FRAG_H_LEN (sizeof(struct ip6_frag))
#define ICMP_H_LEN (sizeof(struct icmphdr))
#define ICMP6_H_LEN (sizeof(struct icmp6hdr))
#define ensure_header(header, skb, data, data_end, offset) \
_ensure_header((void **) header, (skb), (data), (data_end), sizeof(**(header)), (offset))
@ -121,6 +121,7 @@ static __always_inline void
update_l4_checksum(struct __sk_buff *skb,
struct ipv6hdr *ip6h,
struct iphdr *iph,
bool has_eth,
bool v4to6,
bool is_inner,
bool is_v6_fragment,
@ -136,14 +137,14 @@ update_l4_checksum(struct __sk_buff *skb,
void *to_ptr = &ip6h->saddr;
csum = bpf_csum_diff(from_ptr, 2 * sizeof(__u32), to_ptr, 2 * sizeof(struct in6_addr), 0);
offset = ETH_H_LEN + IP_H_LEN;
offset = L2_H_LEN(has_eth) + IP_H_LEN;
ip_type = ip6h->nexthdr;
} else {
void *from_ptr = &ip6h->saddr;
void *to_ptr = &iph->saddr;
csum = bpf_csum_diff(from_ptr, 2 * sizeof(struct in6_addr), to_ptr, 2 * sizeof(__u32), 0);
offset = ETH_H_LEN + IP6_H_LEN;
offset = L2_H_LEN(has_eth) + IP6_H_LEN;
ip_type = iph->protocol;
if (is_inner) {
@ -179,6 +180,7 @@ update_icmp_checksum(struct __sk_buff *skb,
const struct ipv6hdr *ip6h,
void *icmp_before,
void *icmp_after,
bool has_eth,
bool v4to6,
bool is_inner,
__u32 seed)
@ -210,9 +212,9 @@ update_icmp_checksum(struct __sk_buff *skb,
seed);
if (v4to6) {
offset = ETH_H_LEN + IP_H_LEN + 2;
offset = L2_H_LEN(has_eth) + IP_H_LEN + 2;
} else {
offset = ETH_H_LEN + IP6_H_LEN + 2;
offset = L2_H_LEN(has_eth) + IP6_H_LEN + 2;
if (is_inner)
offset += ICMP6_H_LEN + IP6_H_LEN;
}
@ -233,7 +235,7 @@ update_icmp_checksum(struct __sk_buff *skb,
}
static int
rewrite_icmp(struct __sk_buff *skb, const struct ipv6hdr *ip6h)
rewrite_icmp(struct __sk_buff *skb, const struct ipv6hdr *ip6h, bool has_eth)
{
void *data_end = SKB_DATA_END(skb);
void *data = SKB_DATA(skb);
@ -243,7 +245,7 @@ rewrite_icmp(struct __sk_buff *skb, const struct ipv6hdr *ip6h)
struct icmp6hdr *icmp6;
__u32 mtu;
if (!ensure_header(&icmp, skb, &data, &data_end, ETH_H_LEN + IP_H_LEN))
if (!ensure_header(&icmp, skb, &data, &data_end, L2_H_LEN(has_eth) + IP_H_LEN))
return -1;
icmp_buf = *icmp;
@ -346,7 +348,7 @@ rewrite_icmp(struct __sk_buff *skb, const struct ipv6hdr *ip6h)
}
*icmp6 = icmp6_buf;
update_icmp_checksum(skb, ip6h, &icmp_buf, icmp6, true, false, 0);
update_icmp_checksum(skb, ip6h, &icmp_buf, icmp6, has_eth, true, false, 0);
/* FIXME: also need to rewrite IP header embedded in ICMP error */
@ -530,12 +532,18 @@ clat_handle_v4(struct __sk_buff *skb)
};
struct iphdr *iph;
struct ethhdr *eth;
bool has_eth = config.has_eth_header;
if (!ensure_header(&iph, skb, &data, &data_end, ETH_H_LEN))
if (!ensure_header(&iph, skb, &data, &data_end, L2_H_LEN(has_eth)))
goto out;
eth = data;
if (eth->h_proto != bpf_htons(ETH_P_IP))
if (has_eth) {
eth = data;
if (eth->h_proto != bpf_htons(ETH_P_IP))
goto out;
}
if (iph->version != 4)
goto out;
if (iph->saddr != config.local_v4.s_addr)
@ -576,13 +584,13 @@ clat_handle_v4(struct __sk_buff *skb)
switch (dst_hdr.nexthdr) {
case IPPROTO_ICMP:
if (rewrite_icmp(skb, &dst_hdr))
if (rewrite_icmp(skb, &dst_hdr, has_eth))
goto out;
dst_hdr.nexthdr = IPPROTO_ICMPV6;
break;
case IPPROTO_TCP:
case IPPROTO_UDP:
update_l4_checksum(skb, &dst_hdr, iph, true, false, false, NULL);
update_l4_checksum(skb, &dst_hdr, iph, has_eth, true, false, false, NULL);
break;
default:
break;
@ -594,12 +602,15 @@ clat_handle_v4(struct __sk_buff *skb)
data = SKB_DATA(skb);
data_end = SKB_DATA_END(skb);
if (!ensure_header(&ip6h, skb, &data, &data_end, ETH_H_LEN))
if (!ensure_header(&ip6h, skb, &data, &data_end, L2_H_LEN(has_eth)))
goto out;
eth = data;
eth->h_proto = bpf_htons(ETH_P_IPV6);
*ip6h = dst_hdr;
if (has_eth) {
eth = data;
eth->h_proto = bpf_htons(ETH_P_IPV6);
}
*ip6h = dst_hdr;
ret = bpf_redirect(skb->ifindex, 0);
out:
@ -735,7 +746,7 @@ translate_icmpv6_header(const struct icmp6hdr *icmp6, struct icmphdr *icmp)
}
static int
rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff)
rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff, bool has_eth)
{
void *data_end = SKB_DATA_END(skb);
void *data = SKB_DATA(skb);
@ -751,7 +762,11 @@ rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff)
* -------------------------------------------------------------------------
*/
if (!ensure_header(&icmp6, skb, &data, &data_end, ETH_H_LEN + 2 * IP6_H_LEN + ICMP6_H_LEN))
if (!ensure_header(&icmp6,
skb,
&data,
&data_end,
L2_H_LEN(has_eth) + 2 * IP6_H_LEN + ICMP6_H_LEN))
return -1;
icmp6_buf = *icmp6;
@ -763,9 +778,10 @@ rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff)
*icmp = icmp_buf;
update_icmp_checksum(skb,
(struct ipv6hdr *) (data + ETH_H_LEN),
(struct ipv6hdr *) (data + L2_H_LEN(has_eth)),
&icmp6_buf,
icmp,
has_eth,
false,
true,
0);
@ -774,7 +790,11 @@ rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff)
data_end = SKB_DATA_END(skb);
data = SKB_DATA(skb);
if (!ensure_header(&icmp, skb, &data, &data_end, ETH_H_LEN + 2 * IP6_H_LEN + ICMP6_H_LEN))
if (!ensure_header(&icmp,
skb,
&data,
&data_end,
L2_H_LEN(has_eth) + 2 * IP6_H_LEN + ICMP6_H_LEN))
return -1;
/* Compute the checksum difference between the old ICMPv6 header and the new ICMPv4 one */
@ -786,7 +806,7 @@ rewrite_icmpv6_inner(struct __sk_buff *skb, __u32 *csum_diff)
}
static int
rewrite_ipv6_inner(struct __sk_buff *skb, struct iphdr *dst_hdr, __u32 *csum_diff)
rewrite_ipv6_inner(struct __sk_buff *skb, struct iphdr *dst_hdr, __u32 *csum_diff, bool has_eth)
{
void *data_end = SKB_DATA_END(skb);
void *data = SKB_DATA(skb);
@ -801,7 +821,7 @@ rewrite_ipv6_inner(struct __sk_buff *skb, struct iphdr *dst_hdr, __u32 *csum_dif
* ----------------------------------------------------------------
*/
if (!ensure_header(&ip6h, skb, &data, &data_end, ETH_H_LEN + IP6_H_LEN + ICMP6_H_LEN))
if (!ensure_header(&ip6h, skb, &data, &data_end, L2_H_LEN(has_eth) + IP6_H_LEN + ICMP6_H_LEN))
return -1;
if (!v6addr_equal(&ip6h->saddr, &config.local_v6))
@ -822,12 +842,12 @@ rewrite_ipv6_inner(struct __sk_buff *skb, struct iphdr *dst_hdr, __u32 *csum_dif
switch (dst_hdr->protocol) {
case IPPROTO_ICMP:
if (rewrite_icmpv6_inner(skb, csum_diff))
if (rewrite_icmpv6_inner(skb, csum_diff, has_eth))
return -1;
break;
case IPPROTO_TCP:
case IPPROTO_UDP:
update_l4_checksum(skb, ip6h, dst_hdr, false, true, false, csum_diff);
update_l4_checksum(skb, ip6h, dst_hdr, has_eth, false, true, false, csum_diff);
break;
default:
break;
@ -837,7 +857,7 @@ rewrite_ipv6_inner(struct __sk_buff *skb, struct iphdr *dst_hdr, __u32 *csum_dif
}
static int
rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff, bool has_eth)
{
void *data_end = SKB_DATA_END(skb);
void *data = SKB_DATA(skb);
@ -856,7 +876,7 @@ rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
* ---------------------------------------------
*/
if (!ensure_header(&icmp6, skb, &data, &data_end, ETH_H_LEN + IP6_H_LEN))
if (!ensure_header(&icmp6, skb, &data, &data_end, L2_H_LEN(has_eth) + IP6_H_LEN))
return -1;
icmp6_buf = *icmp6;
@ -870,9 +890,10 @@ rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
/* ICMPv6 non-error message: only translate the header */
*icmp = icmp_buf;
update_icmp_checksum(skb,
(struct ipv6hdr *) (data + ETH_H_LEN),
(struct ipv6hdr *) (data + L2_H_LEN(has_eth)),
&icmp6_buf,
icmp,
has_eth,
false,
false,
0);
@ -883,7 +904,7 @@ rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
* Track in csum_diff the incremental changes to the checksum for the ICMPv4
* header. */
if (rewrite_ipv6_inner(skb, &ip_in_buf, &csum_diff))
if (rewrite_ipv6_inner(skb, &ip_in_buf, &csum_diff, has_eth))
return -1;
/* The inner IP header shrinks from 40 (IPv6) to 20 (IPv4) bytes; we need to move
@ -901,10 +922,10 @@ rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
data_end = SKB_DATA_END(skb);
data = SKB_DATA(skb);
if (!ensure_header(&ip, skb, &data, &data_end, ETH_H_LEN + IP6_H_LEN + ICMP_H_LEN))
if (!ensure_header(&ip, skb, &data, &data_end, L2_H_LEN(has_eth) + IP6_H_LEN + ICMP_H_LEN))
return -1;
icmp = data + ETH_H_LEN + IP6_H_LEN;
icmp = data + L2_H_LEN(has_eth) + IP6_H_LEN;
/* Rewrite the ICMPv6 header with the translated ICMPv4 one */
*icmp = icmp_buf;
@ -913,9 +934,10 @@ rewrite_icmpv6(struct __sk_buff *skb, int *out_length_diff)
/* Update the ICMPv4 checksum according to all the changes in headers */
update_icmp_checksum(skb,
(struct ipv6hdr *) (data + ETH_H_LEN),
(struct ipv6hdr *) (data + L2_H_LEN(has_eth)),
&icmp6_buf,
icmp,
has_eth,
false,
false,
csum_diff);
@ -938,12 +960,18 @@ clat_handle_v6(struct __sk_buff *skb)
__be32 addr4;
int length_diff = 0;
bool fragmented = false;
bool has_eth = config.has_eth_header;
if (!ensure_header(&ip6h, skb, &data, &data_end, ETH_H_LEN))
if (!ensure_header(&ip6h, skb, &data, &data_end, L2_H_LEN(has_eth)))
goto out;
eth = data;
if (eth->h_proto != bpf_htons(ETH_P_IPV6))
if (has_eth) {
eth = data;
if (eth->h_proto != bpf_htons(ETH_P_IPV6))
goto out;
}
if (ip6h->version != 6)
goto out;
if (!v6addr_equal(&ip6h->daddr, &config.local_v6))
@ -965,10 +993,10 @@ clat_handle_v6(struct __sk_buff *skb)
if (ip6h->nexthdr != IPPROTO_ICMPV6)
goto out;
if (!ensure_header(&icmp6, skb, &data, &data_end, ETH_H_LEN + IP6_H_LEN))
if (!ensure_header(&icmp6, skb, &data, &data_end, L2_H_LEN(has_eth) + IP6_H_LEN))
goto out;
ip6h = data + ETH_H_LEN;
ip6h = data + L2_H_LEN(has_eth);
if (icmp6->icmp6_type != ICMPV6_DEST_UNREACH && icmp6->icmp6_type != ICMPV6_TIME_EXCEED
&& icmp6->icmp6_type != ICMPV6_PKT_TOOBIG)
@ -995,10 +1023,10 @@ clat_handle_v6(struct __sk_buff *skb)
int tot_len;
__u16 offset;
if (!ensure_header(&frag, skb, &data, &data_end, ETH_H_LEN + IP6_H_LEN))
if (!ensure_header(&frag, skb, &data, &data_end, L2_H_LEN(has_eth) + IP6_H_LEN))
goto out;
ip6h = data + ETH_H_LEN;
ip6h = data + L2_H_LEN(has_eth);
/* Translate into an IPv4 fragmented packet, RFC 6145 5.1.1 */
@ -1046,7 +1074,7 @@ clat_handle_v6(struct __sk_buff *skb)
if (fragmented)
goto out;
if (rewrite_icmpv6(skb, &length_diff))
if (rewrite_icmpv6(skb, &length_diff, has_eth))
goto out;
break;
case IPPROTO_TCP:
@ -1054,7 +1082,7 @@ clat_handle_v6(struct __sk_buff *skb)
/* Update the L4 headers only for non-fragmented packets or for the first
* fragment, which contains the L4 header. */
if (!fragmented || (bpf_ntohs(dst_hdr.frag_off) & 0x1FFF) == 0) {
update_l4_checksum(skb, ip6h, &dst_hdr, false, false, fragmented, NULL);
update_l4_checksum(skb, ip6h, &dst_hdr, has_eth, false, false, fragmented, NULL);
}
break;
default:
@ -1067,7 +1095,7 @@ clat_handle_v6(struct __sk_buff *skb)
data = SKB_DATA(skb);
data_end = SKB_DATA_END(skb);
if (!ensure_header(&ip6h, skb, &data, &data_end, ETH_H_LEN))
if (!ensure_header(&ip6h, skb, &data, &data_end, L2_H_LEN(has_eth)))
goto out;
dst_hdr.tot_len = bpf_htons(bpf_ntohs(ip6h->payload_len) + length_diff + IP_H_LEN);
@ -1089,12 +1117,15 @@ clat_handle_v6(struct __sk_buff *skb)
data = SKB_DATA(skb);
data_end = SKB_DATA_END(skb);
if (!ensure_header(&iph, skb, &data, &data_end, ETH_H_LEN))
if (!ensure_header(&iph, skb, &data, &data_end, L2_H_LEN(has_eth)))
goto out;
eth = data;
eth->h_proto = bpf_htons(ETH_P_IP);
*iph = dst_hdr;
if (has_eth) {
eth = data;
eth->h_proto = bpf_htons(ETH_P_IP);
}
*iph = dst_hdr;
ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS);
out:

View file

@ -9,6 +9,7 @@ struct clat_config {
struct in6_addr pref64;
struct in_addr local_v4;
unsigned pref64_len;
bool has_eth_header;
};
#endif

View file

@ -7,6 +7,7 @@
#include "libnm-std-aux/nm-linux-compat.h"
#include <net/if.h>
#include <net/if_arp.h>
#include "nm-compat-headers/linux/if_addr.h"
#include <linux/if_ether.h>
#include <linux/rtnetlink.h>
@ -5691,6 +5692,8 @@ _l3_commit_pref64(NML3Cfg *self, NML3CfgCommitType commit_type)
char buf[100];
struct clat_config clat_config;
gboolean v6_changed;
const NMPlatformLink *pllink;
gboolean has_ethernet_header = FALSE;
if (l3cd && nm_l3_config_data_get_pref64(l3cd, &_l3cd_pref64_inner, &l3cd_pref64_plen)) {
l3cd_pref64 = &_l3cd_pref64_inner;
@ -5730,12 +5733,33 @@ _l3_commit_pref64(NML3Cfg *self, NML3CfgCommitType commit_type)
_LOGT("clat: program attached successfully");
}
pllink = nm_l3cfg_get_pllink(self, TRUE);
if (!pllink) {
has_ethernet_header = TRUE;
} else {
switch (pllink->arptype) {
case ARPHRD_ETHER:
has_ethernet_header = TRUE;
break;
case ARPHRD_NONE:
case ARPHRD_PPP:
case ARPHRD_RAWIP:
has_ethernet_header = FALSE;
break;
default:
_LOGD("clat: unknown ARP type %u, assuming the interface uses no L2 header",
pllink->arptype);
has_ethernet_header = FALSE;
}
}
/* Pass configuration to the BPF program */
memset(&clat_config, 0, sizeof(clat_config));
clat_config.local_v4.s_addr = self->priv.p->clat_address_4->addr;
clat_config.local_v6 = self->priv.p->clat_address_6.address;
clat_config.pref64 = *l3cd_pref64;
clat_config.pref64_len = l3cd_pref64_plen;
clat_config.has_eth_header = has_ethernet_header;
self->priv.p->clat_bpf->bss->config = clat_config;
if (self->priv.p->clat_socket < 0) {