x86/entry/64: Simplify reg restore code in the standard IRET paths

The old code restored all the registers with movq instead of pop.

In theory, this was done because some CPUs have higher movq
throughput, but any gain there would be tiny and is almost certainly
outweighed by the higher text size.

This saves 96 bytes of text.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/ad82520a207ccd851b04ba613f4f752b33ac05f7.1509609304.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 640aafe..0b9dd81 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -151,6 +151,27 @@ For 32-bit we have the following conventions - kernel is built with
 	UNWIND_HINT_REGS offset=\offset extra=0
 	.endm
 
+	.macro POP_EXTRA_REGS
+	popq %r15
+	popq %r14
+	popq %r13
+	popq %r12
+	popq %rbp
+	popq %rbx
+	.endm
+
+	.macro POP_C_REGS
+	popq %r11
+	popq %r10
+	popq %r9
+	popq %r8
+	popq %rax
+	popq %rcx
+	popq %rdx
+	popq %rsi
+	popq %rdi
+	.endm
+
 	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
 	movq 6*8(%rsp), %r11
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d6ffdc9..925d562 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -618,9 +618,9 @@
 1:
 #endif
 	SWAPGS
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
+	POP_EXTRA_REGS
+	POP_C_REGS
+	addq	$8, %rsp	/* skip regs->orig_ax */
 	INTERRUPT_RETURN
 
 
@@ -650,9 +650,9 @@
 	ud2
 1:
 #endif
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
+	POP_EXTRA_REGS
+	POP_C_REGS
+	addq	$8, %rsp	/* skip regs->orig_ax */
 	INTERRUPT_RETURN
 
 ENTRY(native_iret)