bpf, arm64: fix jit branch offset related to ldimm64

When the instruction right before the branch destination is
a 64 bit load immediate, we currently calculate the wrong
jump offset in the ctx->offset[] array as we only account
one instruction slot for the 64 bit load immediate although
it uses two BPF instructions. Fix it up by setting the offset
into the right slot after we incremented the index.

Before (ldimm64 test 1):

  [...]
  00000020:  52800007  mov w7, #0x0 // #0
  00000024:  d2800060  mov x0, #0x3 // #3
  00000028:  d2800041  mov x1, #0x2 // #2
  0000002c:  eb01001f  cmp x0, x1
  00000030:  54ffff82  b.cs 0x00000020
  00000034:  d29fffe7  mov x7, #0xffff // #65535
  00000038:  f2bfffe7  movk x7, #0xffff, lsl #16
  0000003c:  f2dfffe7  movk x7, #0xffff, lsl #32
  00000040:  f2ffffe7  movk x7, #0xffff, lsl #48
  00000044:  d29dddc7  mov x7, #0xeeee // #61166
  00000048:  f2bdddc7  movk x7, #0xeeee, lsl #16
  0000004c:  f2ddddc7  movk x7, #0xeeee, lsl #32
  00000050:  f2fdddc7  movk x7, #0xeeee, lsl #48
  [...]

After (ldimm64 test 1):

  [...]
  00000020:  52800007  mov w7, #0x0 // #0
  00000024:  d2800060  mov x0, #0x3 // #3
  00000028:  d2800041  mov x1, #0x2 // #2
  0000002c:  eb01001f  cmp x0, x1
  00000030:  540000a2  b.cs 0x00000044
  00000034:  d29fffe7  mov x7, #0xffff // #65535
  00000038:  f2bfffe7  movk x7, #0xffff, lsl #16
  0000003c:  f2dfffe7  movk x7, #0xffff, lsl #32
  00000040:  f2ffffe7  movk x7, #0xffff, lsl #48
  00000044:  d29dddc7  mov x7, #0xeeee // #61166
  00000048:  f2bdddc7  movk x7, #0xeeee, lsl #16
  0000004c:  f2ddddc7  movk x7, #0xeeee, lsl #32
  00000050:  f2fdddc7  movk x7, #0xeeee, lsl #48
  [...]

Also, add a couple of test cases to make sure JITs pass
this test. Tested on Cavium ThunderX ARMv8. The added
test cases all pass after the fix.

Fixes: 8eee539ddea0 ("arm64: bpf: fix out-of-bounds read in bpf2a64_offset()")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Xi Wang <xi.wang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3a7730c..a0f6628 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -4761,6 +4761,51 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		/* Mainly testing JIT + imm64 here. */
+		"JMP_JGE_X: ldimm64 test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JGE, R1, R2, 2),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
+			BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xeeeeeeeeU } },
+	},
+	{
+		"JMP_JGE_X: ldimm64 test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JGE, R1, R2, 0),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xffffffffU } },
+	},
+	{
+		"JMP_JGE_X: ldimm64 test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JGE, R1, R2, 4),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
+			BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JNE | BPF_X */
 	{
 		"JMP_JNE_X: if (3 != 2) return 1",