x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations


Spectre v1 isn't only about array bounds checks.  It can affect any
conditional checks.  The kernel entry code interrupt, exception, and NMI
handlers all have conditional swapgs checks.  Those may be problematic in
the context of Spectre v1, as kernel code can speculatively run with a user
GS.

For example:

	if (coming from user space)
		swapgs
	mov %gs:<percpu_offset>, %reg
	mov (%reg), %reg1

When coming from user space, the CPU can speculatively skip the swapgs, and
then do a speculative percpu load using the user GS value.  So the user can
speculatively force a read of any kernel value.  If a gadget exists which
uses the percpu value as an address in another load/store, then the
contents of the kernel value may become visible via an L1 side channel
attack.

A similar attack exists when coming from kernel space.  The CPU can
speculatively do the swapgs, causing the user GS to get used for the rest
of the speculative window.

The mitigation is similar to a traditional Spectre v1 mitigation, except:

  a) index masking isn't possible; because the index (percpu offset)
     isn't user-controlled; and

  b) an lfence is needed in both the "from user" swapgs path and the
     "from kernel" non-swapgs path (because of the two attacks described
     above).

The user entry swapgs paths already have SWITCH_TO_KERNEL_CR3, which has a
CR3 write when PTI is enabled.  Since CR3 writes are serializing, the
lfences can be skipped in those cases.

On the other hand, the kernel entry swapgs paths don't depend on PTI.

To avoid unnecessary lfences for the user entry case, create two separate
features for alternative patching:

  X86_FEATURE_FENCE_SWAPGS_USER
  X86_FEATURE_FENCE_SWAPGS_KERNEL

Use these features in entry code to patch in lfences where needed.

The features aren't enabled yet, so there's no functional change.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a829dd3..57a0d96 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -519,7 +519,7 @@
 	testb	$3, CS-ORIG_RAX+8(%rsp)
 	jz	1f
 	SWAPGS
-
+	FENCE_SWAPGS_USER_ENTRY
 	/*
 	 * Switch to the thread stack. The IRET frame and orig_ax are
 	 * on the stack, as well as the return address. RDI..R12 are
@@ -549,8 +549,10 @@
 	UNWIND_HINT_FUNC
 
 	movq	(%rdi), %rdi
+	jmpq	2f
 1:
-
+	FENCE_SWAPGS_KERNEL_ENTRY
+2:
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
 
@@ -1221,6 +1223,13 @@
 	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
+	/*
+	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
+	 * unconditional CR3 write, even in the PTI case.  So do an lfence
+	 * to prevent GS speculation, regardless of whether PTI is enabled.
+	 */
+	FENCE_SWAPGS_KERNEL_ENTRY
+
 	ret
 END(paranoid_entry)
 
@@ -1271,6 +1280,7 @@
 	 * from user mode due to an IRET fault.
 	 */
 	SWAPGS
+	FENCE_SWAPGS_USER_ENTRY
 	/* We have user CR3.  Change to kernel CR3. */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
@@ -1292,6 +1302,8 @@
 	CALL_enter_from_user_mode
 	ret
 
+.Lerror_entry_done_lfence:
+	FENCE_SWAPGS_KERNEL_ENTRY
 .Lerror_entry_done:
 	TRACE_IRQS_OFF
 	ret
@@ -1310,7 +1322,7 @@
 	cmpq	%rax, RIP+8(%rsp)
 	je	.Lbstep_iret
 	cmpq	$.Lgs_change, RIP+8(%rsp)
-	jne	.Lerror_entry_done
+	jne	.Lerror_entry_done_lfence
 
 	/*
 	 * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
@@ -1318,6 +1330,7 @@
 	 * .Lgs_change's error handler with kernel gsbase.
 	 */
 	SWAPGS
+	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 	jmp .Lerror_entry_done
 
@@ -1332,6 +1345,7 @@
 	 * gsbase and CR3.  Switch to kernel gsbase and CR3:
 	 */
 	SWAPGS
+	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 	/*
@@ -1423,6 +1437,7 @@
 
 	swapgs
 	cld
+	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp