bpf: introduce bpf_skb_vlan_push/pop() helpers

Allow eBPF programs attached to TC qdiscs call skb_vlan_push/pop via
helper functions. These functions may change skb->data/hlen which are
cached by some JITs to improve performance of ld_abs/ld_ind instructions.
Therefore JITs need to recognize bpf_skb_vlan_push/pop() calls,
re-compute header len and re-cache skb->data/hlen back into cpu registers.
Note, skb->data/hlen are not directly accessible from the programs,
so any changes to skb->data done either by these helpers or by other
TC actions are safe.

eBPF JIT supported by three architectures:
- arm64 JIT is using bpf_load_pointer() without caching, so it's ok as-is.
- x64 JIT re-caches skb->data/hlen unconditionally after vlan_push/pop calls
  (experiments showed that conditional re-caching is slower).
- s390 JIT falls back to interpreter for now when bpf_skb_vlan_push() is present
  in the program (re-caching is tbd).

These helpers allow more scalable handling of vlan from the programs.
Instead of creating thousands of vlan netdevs on top of eth0 and attaching
TC+ingress+bpf to all of them, the program can be attached to eth0 directly
and manipulate vlans as necessary.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 579a8fd..6c335a8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -315,6 +315,26 @@
 	*pprog = prog;
 }
 
+
+static void emit_load_skb_data_hlen(u8 **pprog)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	/* r9d = skb->len - skb->data_len (headlen)
+	 * r10 = skb->data
+	 */
+	/* mov %r9d, off32(%rdi) */
+	EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
+
+	/* sub %r9d, off32(%rdi) */
+	EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
+
+	/* mov %r10, off32(%rdi) */
+	EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
+	*pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -329,36 +349,8 @@
 
 	emit_prologue(&prog);
 
-	if (seen_ld_abs) {
-		/* r9d : skb->len - skb->data_len (headlen)
-		 * r10 : skb->data
-		 */
-		if (is_imm8(offsetof(struct sk_buff, len)))
-			/* mov %r9d, off8(%rdi) */
-			EMIT4(0x44, 0x8b, 0x4f,
-			      offsetof(struct sk_buff, len));
-		else
-			/* mov %r9d, off32(%rdi) */
-			EMIT3_off32(0x44, 0x8b, 0x8f,
-				    offsetof(struct sk_buff, len));
-
-		if (is_imm8(offsetof(struct sk_buff, data_len)))
-			/* sub %r9d, off8(%rdi) */
-			EMIT4(0x44, 0x2b, 0x4f,
-			      offsetof(struct sk_buff, data_len));
-		else
-			EMIT3_off32(0x44, 0x2b, 0x8f,
-				    offsetof(struct sk_buff, data_len));
-
-		if (is_imm8(offsetof(struct sk_buff, data)))
-			/* mov %r10, off8(%rdi) */
-			EMIT4(0x4c, 0x8b, 0x57,
-			      offsetof(struct sk_buff, data));
-		else
-			/* mov %r10, off32(%rdi) */
-			EMIT3_off32(0x4c, 0x8b, 0x97,
-				    offsetof(struct sk_buff, data));
-	}
+	if (seen_ld_abs)
+		emit_load_skb_data_hlen(&prog);
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		const s32 imm32 = insn->imm;
@@ -367,6 +359,7 @@
 		u8 b1 = 0, b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
+		bool reload_skb_data;
 		int ilen;
 		u8 *func;
 
@@ -818,12 +811,18 @@
 			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 			if (seen_ld_abs) {
-				EMIT2(0x41, 0x52); /* push %r10 */
-				EMIT2(0x41, 0x51); /* push %r9 */
-				/* need to adjust jmp offset, since
-				 * pop %r9, pop %r10 take 4 bytes after call insn
-				 */
-				jmp_offset += 4;
+				reload_skb_data = bpf_helper_changes_skb_data(func);
+				if (reload_skb_data) {
+					EMIT1(0x57); /* push %rdi */
+					jmp_offset += 22; /* pop, mov, sub, mov */
+				} else {
+					EMIT2(0x41, 0x52); /* push %r10 */
+					EMIT2(0x41, 0x51); /* push %r9 */
+					/* need to adjust jmp offset, since
+					 * pop %r9, pop %r10 take 4 bytes after call insn
+					 */
+					jmp_offset += 4;
+				}
 			}
 			if (!imm32 || !is_simm32(jmp_offset)) {
 				pr_err("unsupported bpf func %d addr %p image %p\n",
@@ -832,8 +831,13 @@
 			}
 			EMIT1_off32(0xE8, jmp_offset);
 			if (seen_ld_abs) {
-				EMIT2(0x41, 0x59); /* pop %r9 */
-				EMIT2(0x41, 0x5A); /* pop %r10 */
+				if (reload_skb_data) {
+					EMIT1(0x5F); /* pop %rdi */
+					emit_load_skb_data_hlen(&prog);
+				} else {
+					EMIT2(0x41, 0x59); /* pop %r9 */
+					EMIT2(0x41, 0x5A); /* pop %r10 */
+				}
 			}
 			break;