bpf: fix bpf helpers to use skb->mac_header relative offsets
For the short-term solution, lets fix bpf helper functions to use
skb->mac_header relative offsets instead of skb->data in order to
get the same eBPF programs with cls_bpf and act_bpf work on ingress
and egress qdisc path. We need to ensure that mac_header is set
before calling into programs. This is effectively the first option
from below referenced discussion.
More long term solution for LD_ABS|LD_IND instructions will be more
intrusive but also more beneficial than this, and implemented later
as it's too risky at this point in time.
I.e., we plan to look into the option of moving skb_pull() out of
eth_type_trans() and into netif_receive_skb() as has been suggested
as second option. Meanwhile, this solution ensures ingress can be
used with eBPF, too, and that we won't run into ABI troubles later.
For dealing with negative offsets inside eBPF helper functions,
we've implemented bpf_skb_clone_unwritable() to test for unwriteable
headers.
Reference: http://thread.gmane.org/gmane.linux.network/359129/focus=359694
Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 7cf3f42..7c27710 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -4,6 +4,8 @@
#include <uapi/linux/ip.h>
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+
#include "bpf_helpers.h"
/* compiler workaround */
@@ -14,18 +16,12 @@
bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
}
-/* use 1 below for ingress qdisc and 0 for egress */
-#if 0
-#undef ETH_HLEN
-#define ETH_HLEN 0
-#endif
-
#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
{
- __u8 old_tos = load_byte(skb, TOS_OFF);
+ __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -38,7 +34,7 @@
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
{
- __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+ __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -48,7 +44,7 @@
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
{
- __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+ __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -57,7 +53,7 @@
SEC("classifier")
int bpf_prog1(struct __sk_buff *skb)
{
- __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+ __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
if (proto == IPPROTO_TCP) {