bpf/tests: Add more tests for ALU and ATOMIC register clobbering

This patch expands the register-clobbering-during-function-call tests
to cover more all ALU32/64 MUL, DIV and MOD operations and all ATOMIC
operations. In short, if a JIT implements a complex operation with
a call to an external function, it must make sure to save and restore
all its caller-saved registers that may be clobbered by the call.

Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211001130348.3670534-6-johan.almbladh@anyfinetworks.com
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index c7db901..201f340 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -3746,76 +3746,6 @@ static struct bpf_test tests[] = {
 	},
 	{
 		/*
-		 * Register (non-)clobbering test, in the case where a 32-bit
-		 * JIT implements complex ALU64 operations via function calls.
-		 * If so, the function call must be invisible in the eBPF
-		 * registers. The JIT must then save and restore relevant
-		 * registers during the call. The following tests check that
-		 * the eBPF registers retain their values after such a call.
-		 */
-		"INT: Register clobbering, R1 updated",
-		.u.insns_int = {
-			BPF_ALU32_IMM(BPF_MOV, R0, 0),
-			BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
-			BPF_ALU32_IMM(BPF_MOV, R2, 2),
-			BPF_ALU32_IMM(BPF_MOV, R3, 3),
-			BPF_ALU32_IMM(BPF_MOV, R4, 4),
-			BPF_ALU32_IMM(BPF_MOV, R5, 5),
-			BPF_ALU32_IMM(BPF_MOV, R6, 6),
-			BPF_ALU32_IMM(BPF_MOV, R7, 7),
-			BPF_ALU32_IMM(BPF_MOV, R8, 8),
-			BPF_ALU32_IMM(BPF_MOV, R9, 9),
-			BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
-			BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
-			BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
-			BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
-			BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
-			BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
-			BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
-			BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
-			BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
-			BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
-			BPF_ALU32_IMM(BPF_MOV, R0, 1),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ },
-		{ { 0, 1 } }
-	},
-	{
-		"INT: Register clobbering, R2 updated",
-		.u.insns_int = {
-			BPF_ALU32_IMM(BPF_MOV, R0, 0),
-			BPF_ALU32_IMM(BPF_MOV, R1, 1),
-			BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
-			BPF_ALU32_IMM(BPF_MOV, R3, 3),
-			BPF_ALU32_IMM(BPF_MOV, R4, 4),
-			BPF_ALU32_IMM(BPF_MOV, R5, 5),
-			BPF_ALU32_IMM(BPF_MOV, R6, 6),
-			BPF_ALU32_IMM(BPF_MOV, R7, 7),
-			BPF_ALU32_IMM(BPF_MOV, R8, 8),
-			BPF_ALU32_IMM(BPF_MOV, R9, 9),
-			BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
-			BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
-			BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
-			BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
-			BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
-			BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
-			BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
-			BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
-			BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
-			BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
-			BPF_ALU32_IMM(BPF_MOV, R0, 1),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ },
-		{ { 0, 1 } }
-	},
-	{
-		/*
 		 * Test 32-bit JITs that implement complex ALU64 operations as
 		 * function calls R0 = f(R1, R2), and must re-arrange operands.
 		 */
@@ -10586,6 +10516,203 @@ static struct bpf_test tests[] = {
 		{},
 		{ { 0, 2 } },
 	},
+	/*
+	 * Register (non-)clobbering tests for the case where a JIT implements
+	 * complex ALU or ATOMIC operations via function calls. If so, the
+	 * function call must be transparent to the eBPF registers. The JIT
+	 * must therefore save and restore relevant registers across the call.
+	 * The following tests check that the eBPF registers retain their
+	 * values after such an operation. Mainly intended for complex ALU
+	 * and atomic operation, but we run it for all. You never know...
+	 *
+	 * Note that each operations should be tested twice with different
+	 * destinations, to check preservation for all registers.
+	 */
+#define BPF_TEST_CLOBBER_ALU(alu, op, dst, src)			\
+	{							\
+		#alu "_" #op " to " #dst ": no clobbering",	\
+		.u.insns_int = {				\
+			BPF_ALU64_IMM(BPF_MOV, R0, R0),		\
+			BPF_ALU64_IMM(BPF_MOV, R1, R1),		\
+			BPF_ALU64_IMM(BPF_MOV, R2, R2),		\
+			BPF_ALU64_IMM(BPF_MOV, R3, R3),		\
+			BPF_ALU64_IMM(BPF_MOV, R4, R4),		\
+			BPF_ALU64_IMM(BPF_MOV, R5, R5),		\
+			BPF_ALU64_IMM(BPF_MOV, R6, R6),		\
+			BPF_ALU64_IMM(BPF_MOV, R7, R7),		\
+			BPF_ALU64_IMM(BPF_MOV, R8, R8),		\
+			BPF_ALU64_IMM(BPF_MOV, R9, R9),		\
+			BPF_##alu(BPF_ ##op, dst, src),		\
+			BPF_ALU32_IMM(BPF_MOV, dst, dst),	\
+			BPF_JMP_IMM(BPF_JNE, R0, R0, 10),	\
+			BPF_JMP_IMM(BPF_JNE, R1, R1, 9),	\
+			BPF_JMP_IMM(BPF_JNE, R2, R2, 8),	\
+			BPF_JMP_IMM(BPF_JNE, R3, R3, 7),	\
+			BPF_JMP_IMM(BPF_JNE, R4, R4, 6),	\
+			BPF_JMP_IMM(BPF_JNE, R5, R5, 5),	\
+			BPF_JMP_IMM(BPF_JNE, R6, R6, 4),	\
+			BPF_JMP_IMM(BPF_JNE, R7, R7, 3),	\
+			BPF_JMP_IMM(BPF_JNE, R8, R8, 2),	\
+			BPF_JMP_IMM(BPF_JNE, R9, R9, 1),	\
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),		\
+			BPF_EXIT_INSN(),			\
+		},						\
+		INTERNAL,					\
+		{ },						\
+		{ { 0, 1 } }					\
+	}
+	/* ALU64 operations, register clobbering */
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, AND, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, AND, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, OR, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, OR, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, XOR, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, XOR, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, LSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, LSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, RSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, RSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, ARSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, ARSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, ADD, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, ADD, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, SUB, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, SUB, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, MUL, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, MUL, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, DIV, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, DIV, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, MOD, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU64_IMM, MOD, R9, 123456789),
+	/* ALU32 immediate operations, register clobbering */
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, AND, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, AND, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, OR, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, OR, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, XOR, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, XOR, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, LSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, LSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, RSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, RSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, ARSH, R8, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, ARSH, R9, 12),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, ADD, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, ADD, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, SUB, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, SUB, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, MUL, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, MUL, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, DIV, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, DIV, R9, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, MOD, R8, 123456789),
+	BPF_TEST_CLOBBER_ALU(ALU32_IMM, MOD, R9, 123456789),
+	/* ALU64 register operations, register clobbering */
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, AND, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, AND, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, OR, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, OR, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, XOR, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, XOR, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, LSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, LSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, RSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, RSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, ARSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, ARSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, ADD, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, ADD, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, SUB, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, SUB, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, MUL, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, MUL, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, DIV, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, DIV, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, MOD, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU64_REG, MOD, R9, R1),
+	/* ALU32 register operations, register clobbering */
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, AND, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, AND, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, OR, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, OR, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, XOR, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, XOR, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, LSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, LSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, RSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, RSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, ARSH, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, ARSH, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, ADD, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, ADD, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, SUB, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, SUB, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, MUL, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, MUL, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, DIV, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, DIV, R9, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, MOD, R8, R1),
+	BPF_TEST_CLOBBER_ALU(ALU32_REG, MOD, R9, R1),
+#undef BPF_TEST_CLOBBER_ALU
+#define BPF_TEST_CLOBBER_ATOMIC(width, op)			\
+	{							\
+		"Atomic_" #width " " #op ": no clobbering",	\
+		.u.insns_int = {				\
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),		\
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),		\
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),		\
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),		\
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),		\
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),		\
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),		\
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),		\
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),		\
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),		\
+			BPF_ST_MEM(width, R10, -8,		\
+				   (op) == BPF_CMPXCHG ? 0 :	\
+				   (op) & BPF_FETCH ? 1 : 0),	\
+			BPF_ATOMIC_OP(width, op, R10, R1, -8),	\
+			BPF_JMP_IMM(BPF_JNE, R0, 0, 10),	\
+			BPF_JMP_IMM(BPF_JNE, R1, 1, 9),		\
+			BPF_JMP_IMM(BPF_JNE, R2, 2, 8),		\
+			BPF_JMP_IMM(BPF_JNE, R3, 3, 7),		\
+			BPF_JMP_IMM(BPF_JNE, R4, 4, 6),		\
+			BPF_JMP_IMM(BPF_JNE, R5, 5, 5),		\
+			BPF_JMP_IMM(BPF_JNE, R6, 6, 4),		\
+			BPF_JMP_IMM(BPF_JNE, R7, 7, 3),		\
+			BPF_JMP_IMM(BPF_JNE, R8, 8, 2),		\
+			BPF_JMP_IMM(BPF_JNE, R9, 9, 1),		\
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),		\
+			BPF_EXIT_INSN(),			\
+		},						\
+		INTERNAL,					\
+		{ },						\
+		{ { 0, 1 } },					\
+		.stack_depth = 8,				\
+	}
+	/* 64-bit atomic operations, register clobbering */
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_ADD),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_AND),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_OR),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XOR),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_ADD | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_AND | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_OR | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XOR | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_XCHG),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_DW, BPF_CMPXCHG),
+	/* 32-bit atomic operations, register clobbering */
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_ADD),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_AND),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_OR),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XOR),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_ADD | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_AND | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_OR | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XOR | BPF_FETCH),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_XCHG),
+	BPF_TEST_CLOBBER_ATOMIC(BPF_W, BPF_CMPXCHG),
+#undef BPF_TEST_CLOBBER_ATOMIC
 	/* Checking that ALU32 src is not zero extended in place */
 #define BPF_ALU32_SRC_ZEXT(op)					\
 	{							\