blob: 3d7f2cc29be3709b419397606625b96cd0cdbf74 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * linux/arch/x86_64/entry.S
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
7 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
Ingo Molnar4d732132015-06-08 20:43:07 +02008 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * entry.S contains the system-call and fault low-level handling routines.
10 *
Mauro Carvalho Chehabcb1aaeb2019-06-07 15:54:32 -030011 * Some of this is documented in Documentation/x86/entry_64.rst
Andy Lutomirski8b4777a2011-06-05 13:50:18 -040012 *
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +010013 * A note on terminology:
Ingo Molnar4d732132015-06-08 20:43:07 +020014 * - iret frame: Architecture defined interrupt frame from SS to RIP
15 * at the top of the kernel process stack.
Andi Kleen2e91a172006-09-26 10:52:29 +020016 *
17 * Some macro usage:
Jiri Slaby6dcc5622019-10-11 13:51:04 +020018 * - SYM_FUNC_START/END:Define functions in the symbol table.
Ingo Molnar4d732132015-06-08 20:43:07 +020019 * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
20 * - idtentry: Define exception entry points.
Linus Torvalds1da177e2005-04-16 15:20:36 -070021 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/linkage.h>
23#include <asm/segment.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <asm/cache.h>
25#include <asm/errno.h>
Sam Ravnborge2d5df92005-09-09 21:28:48 +020026#include <asm/asm-offsets.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <asm/msr.h>
28#include <asm/unistd.h>
29#include <asm/thread_info.h>
30#include <asm/hw_irq.h>
Jeremy Fitzhardinge0341c142009-02-13 11:14:01 -080031#include <asm/page_types.h>
Ingo Molnar2601e642006-07-03 00:24:45 -070032#include <asm/irqflags.h>
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010033#include <asm/paravirt.h>
Tejun Heo9939dda2009-01-13 20:41:35 +090034#include <asm/percpu.h>
H. Peter Anvind7abc0f2012-04-20 12:19:50 -070035#include <asm/asm.h>
H. Peter Anvin63bcff22012-09-21 12:43:12 -070036#include <asm/smap.h>
H. Peter Anvin3891a042014-04-29 16:46:09 -070037#include <asm/pgtable_types.h>
Al Viro784d5692016-01-11 11:04:34 -050038#include <asm/export.h>
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -050039#include <asm/frame.h>
Thomas Gleixnercfa82a02020-02-25 23:16:10 +010040#include <asm/trapnr.h>
David Woodhouse2641f082018-01-11 21:46:28 +000041#include <asm/nospec-branch.h>
Eric Parisd7e75282012-01-03 14:23:06 -050042#include <linux/err.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043
Peter Zijlstra6fd166a2017-12-04 15:07:59 +010044#include "calling.h"
45
Ingo Molnar4d732132015-06-08 20:43:07 +020046.code64
47.section .entry.text, "ax"
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +020048
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010049#ifdef CONFIG_PARAVIRT
Jiri Slabybc7b11c2019-10-11 13:51:03 +020050SYM_CODE_START(native_usergs_sysret64)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -050051 UNWIND_HINT_EMPTY
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010052 swapgs
53 sysretq
Jiri Slabybc7b11c2019-10-11 13:51:03 +020054SYM_CODE_END(native_usergs_sysret64)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010055#endif /* CONFIG_PARAVIRT */
56
Andy Lutomirskica37e572017-11-22 20:39:16 -080057.macro TRACE_IRQS_FLAGS flags:req
Ingo Molnar2601e642006-07-03 00:24:45 -070058#ifdef CONFIG_TRACE_IRQFLAGS
Jan Beulicha368d7f2018-02-26 04:11:21 -070059 btl $9, \flags /* interrupts off? */
Ingo Molnar4d732132015-06-08 20:43:07 +020060 jnc 1f
Ingo Molnar2601e642006-07-03 00:24:45 -070061 TRACE_IRQS_ON
621:
63#endif
64.endm
65
Andy Lutomirskica37e572017-11-22 20:39:16 -080066.macro TRACE_IRQS_IRETQ
67 TRACE_IRQS_FLAGS EFLAGS(%rsp)
68.endm
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070/*
Steven Rostedt5963e312012-05-30 11:54:53 -040071 * When dynamic function tracer is enabled it will add a breakpoint
72 * to all locations that it is about to modify, sync CPUs, update
73 * all the code, sync CPUs, then remove the breakpoints. In this time
74 * if lockdep is enabled, it might jump back into the debug handler
75 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
76 *
77 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
78 * make sure the stack pointer does not get reset back to the top
79 * of the debug stack, and instead just reuses the current stack.
80 */
81#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
82
83.macro TRACE_IRQS_OFF_DEBUG
Ingo Molnar4d732132015-06-08 20:43:07 +020084 call debug_stack_set_zero
Steven Rostedt5963e312012-05-30 11:54:53 -040085 TRACE_IRQS_OFF
Ingo Molnar4d732132015-06-08 20:43:07 +020086 call debug_stack_reset
Steven Rostedt5963e312012-05-30 11:54:53 -040087.endm
88
89.macro TRACE_IRQS_ON_DEBUG
Ingo Molnar4d732132015-06-08 20:43:07 +020090 call debug_stack_set_zero
Steven Rostedt5963e312012-05-30 11:54:53 -040091 TRACE_IRQS_ON
Ingo Molnar4d732132015-06-08 20:43:07 +020092 call debug_stack_reset
Steven Rostedt5963e312012-05-30 11:54:53 -040093.endm
94
Denys Vlasenkof2db9382015-02-26 14:40:30 -080095.macro TRACE_IRQS_IRETQ_DEBUG
Jan Beulich67098122018-07-02 04:47:57 -060096 btl $9, EFLAGS(%rsp) /* interrupts off? */
Ingo Molnar4d732132015-06-08 20:43:07 +020097 jnc 1f
Steven Rostedt5963e312012-05-30 11:54:53 -040098 TRACE_IRQS_ON_DEBUG
991:
100.endm
101
102#else
Ingo Molnar4d732132015-06-08 20:43:07 +0200103# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
104# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
105# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
Steven Rostedt5963e312012-05-30 11:54:53 -0400106#endif
107
108/*
Ingo Molnar4d732132015-06-08 20:43:07 +0200109 * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 *
Andy Lutomirskifda57b22016-03-09 19:00:35 -0800111 * This is the only entry point used for 64-bit system calls. The
112 * hardware interface is reasonably well designed and the register to
113 * argument mapping Linux uses fits well with the registers that are
114 * available when SYSCALL is used.
115 *
116 * SYSCALL instructions can be found inlined in libc implementations as
117 * well as some other programs and libraries. There are also a handful
118 * of SYSCALL instructions in the vDSO used, for example, as a
119 * clock_gettimeofday fallback.
120 *
Ingo Molnar4d732132015-06-08 20:43:07 +0200121 * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
Denys Vlasenkob87cf632015-02-26 14:40:32 -0800122 * then loads new ss, cs, and rip from previously programmed MSRs.
123 * rflags gets masked by a value from another MSR (so CLD and CLAC
124 * are not needed). SYSCALL does not save anything on the stack
125 * and does not change rsp.
126 *
127 * Registers on entry:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 * rax system call number
Denys Vlasenkob87cf632015-02-26 14:40:32 -0800129 * rcx return address
130 * r11 saved rflags (note: r11 is callee-clobbered register in C ABI)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 * rdi arg0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 * rsi arg1
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +0100133 * rdx arg2
Denys Vlasenkob87cf632015-02-26 14:40:32 -0800134 * r10 arg3 (needs to be moved to rcx to conform to C ABI)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 * r8 arg4
136 * r9 arg5
Ingo Molnar4d732132015-06-08 20:43:07 +0200137 * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +0100138 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 * Only called from user space.
140 *
Ingo Molnar7fcb3bc2015-03-17 14:42:59 +0100141 * When user can change pt_regs->foo always force IRET. That is because
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200142 * it deals with uncanonical addresses better. SYSRET has trouble
143 * with them due to bugs in both AMD and Intel CPUs.
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +0100144 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200146SYM_CODE_START(entry_SYSCALL_64)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500147 UNWIND_HINT_EMPTY
Denys Vlasenko9ed8e7d2015-03-19 18:17:47 +0100148 /*
149 * Interrupts are off on entry.
150 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
151 * it is too small to ever cause noticeable irq latency.
152 */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100153
Andy Lutomirski8a9949b2017-08-07 20:59:21 -0700154 swapgs
Andy Lutomirskibf904d22018-09-03 15:59:44 -0700155 /* tss.sp2 is scratch space. */
Andy Lutomirski98f05b52018-09-03 15:59:43 -0700156 movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
Andy Lutomirskibf904d22018-09-03 15:59:44 -0700157 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
Ingo Molnar4d732132015-06-08 20:43:07 +0200158 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
Denys Vlasenko9ed8e7d2015-03-19 18:17:47 +0100159
160 /* Construct struct pt_regs on stack */
Andy Lutomirski98f05b52018-09-03 15:59:43 -0700161 pushq $__USER_DS /* pt_regs->ss */
162 pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
163 pushq %r11 /* pt_regs->flags */
164 pushq $__USER_CS /* pt_regs->cs */
165 pushq %rcx /* pt_regs->ip */
Jiri Slaby26ba4e52019-10-11 13:50:57 +0200166SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
Andy Lutomirski98f05b52018-09-03 15:59:43 -0700167 pushq %rax /* pt_regs->orig_ax */
Dominik Brodowski30907fd2018-02-11 11:49:46 +0100168
169 PUSH_AND_CLEAR_REGS rax=$-ENOSYS
Denys Vlasenko9ed8e7d2015-03-19 18:17:47 +0100170
Andy Lutomirski1e423bf2016-01-28 15:11:28 -0800171 /* IRQs are off. */
Linus Torvaldsdfe64502018-04-05 11:53:00 +0200172 movq %rax, %rdi
173 movq %rsp, %rsi
Andy Lutomirski1e423bf2016-01-28 15:11:28 -0800174 call do_syscall_64 /* returns with IRQs disabled */
175
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200176 /*
177 * Try to use SYSRET instead of IRET if we're returning to
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700178 * a completely clean 64-bit userspace context. If we're not,
179 * go to the slow exit path.
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200180 */
Ingo Molnar4d732132015-06-08 20:43:07 +0200181 movq RCX(%rsp), %rcx
182 movq RIP(%rsp), %r11
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700183
184 cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
185 jne swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200186
187 /*
188 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
189 * in kernel space. This essentially lets the user take over
Denys Vlasenko17be0ae2015-04-21 18:27:29 +0200190 * the kernel, since userspace controls RSP.
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200191 *
Denys Vlasenko17be0ae2015-04-21 18:27:29 +0200192 * If width of "canonical tail" ever becomes variable, this will need
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200193 * to be updated to remain correct on both old and new CPUs.
Kirill A. Shutemov361b4b52017-03-30 11:07:26 +0300194 *
Kirill A. Shutemovcbe03172017-06-06 14:31:21 +0300195 * Change top bits to match most significant bit (47th or 56th bit
196 * depending on paging mode) in the address.
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200197 */
Kirill A. Shutemov09e61a72018-02-14 14:16:55 +0300198#ifdef CONFIG_X86_5LEVEL
Kirill A. Shutemov39b95522018-02-16 14:49:48 +0300199 ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
200 "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
Kirill A. Shutemov09e61a72018-02-14 14:16:55 +0300201#else
Denys Vlasenko17be0ae2015-04-21 18:27:29 +0200202 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
203 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
Kirill A. Shutemov09e61a72018-02-14 14:16:55 +0300204#endif
Ingo Molnar4d732132015-06-08 20:43:07 +0200205
Denys Vlasenko17be0ae2015-04-21 18:27:29 +0200206 /* If this changed %rcx, it was not canonical */
207 cmpq %rcx, %r11
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700208 jne swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200209
Ingo Molnar4d732132015-06-08 20:43:07 +0200210 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700211 jne swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200212
Ingo Molnar4d732132015-06-08 20:43:07 +0200213 movq R11(%rsp), %r11
214 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700215 jne swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200216
217 /*
Borislav Petkov3e035302016-08-03 19:14:29 +0200218 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
219 * restore RF properly. If the slowpath sets it for whatever reason, we
220 * need to restore it correctly.
221 *
222 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
223 * trap from userspace immediately after SYSRET. This would cause an
224 * infinite loop whenever #DB happens with register state that satisfies
225 * the opportunistic SYSRET conditions. For example, single-stepping
226 * this user code:
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200227 *
Ingo Molnar4d732132015-06-08 20:43:07 +0200228 * movq $stuck_here, %rcx
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200229 * pushfq
230 * popq %r11
231 * stuck_here:
232 *
233 * would never get past 'stuck_here'.
234 */
Ingo Molnar4d732132015-06-08 20:43:07 +0200235 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700236 jnz swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200237
238 /* nothing to check for RSP */
239
Ingo Molnar4d732132015-06-08 20:43:07 +0200240 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700241 jne swapgs_restore_regs_and_return_to_usermode
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200242
243 /*
Ingo Molnar4d732132015-06-08 20:43:07 +0200244 * We win! This label is here just for ease of understanding
245 * perf profiles. Nothing jumps here.
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200246 */
247syscall_return_via_sysret:
Denys Vlasenko17be0ae2015-04-21 18:27:29 +0200248 /* rcx and r11 are already restored (see code above) */
Dominik Brodowski502af0d2018-02-11 11:49:43 +0100249 POP_REGS pop_rdi=0 skip_r11rcx=1
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100250
251 /*
252 * Now all regs are restored except RSP and RDI.
253 * Save old stack pointer and switch to trampoline stack.
254 */
255 movq %rsp, %rdi
Andy Lutomirskic482fee2017-12-04 15:07:29 +0100256 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
Josh Poimboeuf1fb14362020-04-25 05:03:02 -0500257 UNWIND_HINT_EMPTY
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100258
259 pushq RSP-RDI(%rdi) /* RSP */
260 pushq (%rdi) /* RDI */
261
262 /*
263 * We are on the trampoline stack. All regs except RDI are live.
264 * We can do future final exit work right here.
265 */
Alexander Popovafaef012018-08-17 01:16:58 +0300266 STACKLEAK_ERASE_NOCLOBBER
267
Peter Zijlstra6fd166a2017-12-04 15:07:59 +0100268 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100269
Andy Lutomirski4fbb3912017-11-02 00:59:03 -0700270 popq %rdi
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100271 popq %rsp
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200272 USERGS_SYSRET64
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200273SYM_CODE_END(entry_SYSCALL_64)
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +0100274
Jan Beulich7effaa82005-09-12 18:49:24 +0200275/*
Brian Gerst01003012016-08-13 12:38:19 -0400276 * %rdi: prev task
277 * %rsi: next task
278 */
Thomas Gleixnerb9f69762020-03-25 19:45:26 +0100279.pushsection .text, "ax"
Josh Poimboeuf96c64802020-04-25 05:03:03 -0500280SYM_FUNC_START(__switch_to_asm)
Brian Gerst01003012016-08-13 12:38:19 -0400281 /*
282 * Save callee-saved registers
283 * This must match the order in inactive_task_frame
284 */
285 pushq %rbp
286 pushq %rbx
287 pushq %r12
288 pushq %r13
289 pushq %r14
290 pushq %r15
291
292 /* switch stack */
293 movq %rsp, TASK_threadsp(%rdi)
294 movq TASK_threadsp(%rsi), %rsp
295
Linus Torvalds050e9ba2018-06-14 12:21:18 +0900296#ifdef CONFIG_STACKPROTECTOR
Brian Gerst01003012016-08-13 12:38:19 -0400297 movq TASK_stack_canary(%rsi), %rbx
Andy Lutomirskie6401c12019-04-14 18:00:06 +0200298 movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
Brian Gerst01003012016-08-13 12:38:19 -0400299#endif
300
David Woodhousec995efd2018-01-12 17:49:25 +0000301#ifdef CONFIG_RETPOLINE
302 /*
303 * When switching from a shallower to a deeper call stack
304 * the RSB may either underflow or use entries populated
305 * with userspace addresses. On CPUs where those concerns
306 * exist, overwrite the RSB with entries which capture
307 * speculative execution to prevent attack.
308 */
David Woodhoused1c99102018-02-19 10:50:56 +0000309 FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
David Woodhousec995efd2018-01-12 17:49:25 +0000310#endif
311
Brian Gerst01003012016-08-13 12:38:19 -0400312 /* restore callee-saved registers */
313 popq %r15
314 popq %r14
315 popq %r13
316 popq %r12
317 popq %rbx
318 popq %rbp
319
320 jmp __switch_to
Josh Poimboeuf96c64802020-04-25 05:03:03 -0500321SYM_FUNC_END(__switch_to_asm)
Thomas Gleixnerb9f69762020-03-25 19:45:26 +0100322.popsection
Brian Gerst01003012016-08-13 12:38:19 -0400323
324/*
Denys Vlasenko1eeb2072015-02-26 14:40:33 -0800325 * A newly forked process directly context switches into this address.
326 *
Brian Gerst01003012016-08-13 12:38:19 -0400327 * rax: prev task we switched from
Brian Gerst616d2482016-08-13 12:38:20 -0400328 * rbx: kernel thread func (NULL for user thread)
329 * r12: kernel thread arg
Denys Vlasenko1eeb2072015-02-26 14:40:33 -0800330 */
Thomas Gleixnerb9f69762020-03-25 19:45:26 +0100331.pushsection .text, "ax"
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200332SYM_CODE_START(ret_from_fork)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500333 UNWIND_HINT_EMPTY
Brian Gerst01003012016-08-13 12:38:19 -0400334 movq %rax, %rdi
Josh Poimboeufebd57492017-05-23 10:37:29 -0500335 call schedule_tail /* rdi: 'prev' task parameter */
Denys Vlasenko1eeb2072015-02-26 14:40:33 -0800336
Josh Poimboeufebd57492017-05-23 10:37:29 -0500337 testq %rbx, %rbx /* from kernel_thread? */
338 jnz 1f /* kernel threads are uncommon */
Denys Vlasenko1eeb2072015-02-26 14:40:33 -0800339
Brian Gerst616d2482016-08-13 12:38:20 -04003402:
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500341 UNWIND_HINT_REGS
Josh Poimboeufebd57492017-05-23 10:37:29 -0500342 movq %rsp, %rdi
Andy Lutomirski24d978b2016-01-28 15:11:27 -0800343 call syscall_return_slowpath /* returns with IRQs disabled */
Andy Lutomirski8a055d72017-11-02 00:59:00 -0700344 jmp swapgs_restore_regs_and_return_to_usermode
Brian Gerst616d2482016-08-13 12:38:20 -0400345
3461:
347 /* kernel thread */
Josh Poimboeufd31a5802018-05-18 08:47:12 +0200348 UNWIND_HINT_EMPTY
Brian Gerst616d2482016-08-13 12:38:20 -0400349 movq %r12, %rdi
Peter Zijlstra34fdce62020-04-22 17:16:40 +0200350 CALL_NOSPEC rbx
Brian Gerst616d2482016-08-13 12:38:20 -0400351 /*
352 * A kernel thread is allowed to return here after successfully
353 * calling do_execve(). Exit to userspace to complete the execve()
354 * syscall.
355 */
356 movq $0, RAX(%rsp)
357 jmp 2b
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200358SYM_CODE_END(ret_from_fork)
Thomas Gleixnerb9f69762020-03-25 19:45:26 +0100359.popsection
Denys Vlasenko1eeb2072015-02-26 14:40:33 -0800360
361/*
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200362 * Build the entry stubs with some assembler magic.
363 * We pack 1 stub into every 8-byte block.
H. Peter Anvin939b7872008-11-11 13:51:52 -0800364 */
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200365 .align 8
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200366SYM_CODE_START(irq_entries_start)
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200367 vector=FIRST_EXTERNAL_VECTOR
368 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500369 UNWIND_HINT_IRET_REGS
Ingo Molnar4d732132015-06-08 20:43:07 +0200370 pushq $(~vector+0x80) /* Note: always in signed byte range */
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200371 jmp common_interrupt
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200372 .align 8
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500373 vector=vector+1
Denys Vlasenko3304c9c2015-04-03 21:49:13 +0200374 .endr
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200375SYM_CODE_END(irq_entries_start)
H. Peter Anvin939b7872008-11-11 13:51:52 -0800376
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200377 .align 8
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200378SYM_CODE_START(spurious_entries_start)
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200379 vector=FIRST_SYSTEM_VECTOR
380 .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
381 UNWIND_HINT_IRET_REGS
382 pushq $(~vector+0x80) /* Note: always in signed byte range */
383 jmp common_spurious
384 .align 8
385 vector=vector+1
386 .endr
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200387SYM_CODE_END(spurious_entries_start)
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200388
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500389.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
390#ifdef CONFIG_DEBUG_ENTRY
Boris Ostrovskye17f8232017-12-04 15:07:07 +0100391 pushq %rax
392 SAVE_FLAGS(CLBR_RAX)
393 testl $X86_EFLAGS_IF, %eax
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500394 jz .Lokay_\@
395 ud2
396.Lokay_\@:
Boris Ostrovskye17f8232017-12-04 15:07:07 +0100397 popq %rax
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500398#endif
399.endm
400
401/*
402 * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
403 * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
404 * Requires kernel GSBASE.
405 *
406 * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
407 */
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100408.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500409 DEBUG_ENTRY_ASSERT_IRQS_OFF
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100410
411 .if \save_ret
412 /*
413 * If save_ret is set, the original stack contains one additional
414 * entry -- the return address. Therefore, move the address one
415 * entry below %rsp to \old_rsp.
416 */
417 leaq 8(%rsp), \old_rsp
418 .else
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500419 movq %rsp, \old_rsp
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100420 .endif
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500421
422 .if \regs
423 UNWIND_HINT_REGS base=\old_rsp
424 .endif
425
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500426 incl PER_CPU_VAR(irq_count)
Andy Lutomirski29955902017-07-11 10:33:39 -0500427 jnz .Lirq_stack_push_old_rsp_\@
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500428
429 /*
430 * Right now, if we just incremented irq_count to zero, we've
431 * claimed the IRQ stack but we haven't switched to it yet.
432 *
433 * If anything is added that can interrupt us here without using IST,
434 * it must be *extremely* careful to limit its stack usage. This
435 * could include kprobes and a hypothetical future IST-less #DB
436 * handler.
Andy Lutomirski29955902017-07-11 10:33:39 -0500437 *
438 * The OOPS unwinder relies on the word at the top of the IRQ
439 * stack linking back to the previous RSP for the entire time we're
440 * on the IRQ stack. For this to work reliably, we need to write
441 * it before we actually move ourselves to the IRQ stack.
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500442 */
443
Andy Lutomirskie6401c12019-04-14 18:00:06 +0200444 movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
Thomas Gleixner758a2e32019-04-14 18:00:02 +0200445 movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
Andy Lutomirski29955902017-07-11 10:33:39 -0500446
447#ifdef CONFIG_DEBUG_ENTRY
448 /*
449 * If the first movq above becomes wrong due to IRQ stack layout
450 * changes, the only way we'll notice is if we try to unwind right
451 * here. Assert that we set up the stack right to catch this type
452 * of bug quickly.
453 */
454 cmpq -8(%rsp), \old_rsp
455 je .Lirq_stack_okay\@
456 ud2
457 .Lirq_stack_okay\@:
458#endif
459
460.Lirq_stack_push_old_rsp_\@:
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500461 pushq \old_rsp
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500462
463 .if \regs
464 UNWIND_HINT_REGS indirect=1
465 .endif
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100466
467 .if \save_ret
468 /*
469 * Push the return address to the stack. This return address can
470 * be found at the "real" original RSP, which was offset by 8 at
471 * the beginning of this macro.
472 */
473 pushq -8(\old_rsp)
474 .endif
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500475.endm
476
477/*
478 * Undoes ENTER_IRQ_STACK.
479 */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500480.macro LEAVE_IRQ_STACK regs=1
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500481 DEBUG_ENTRY_ASSERT_IRQS_OFF
482 /* We need to be off the IRQ stack before decrementing irq_count. */
483 popq %rsp
484
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500485 .if \regs
486 UNWIND_HINT_REGS
487 .endif
488
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500489 /*
490 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
491 * the irq stack but we're not on it.
492 */
493
494 decl PER_CPU_VAR(irq_count)
495.endm
496
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100497/**
498 * idtentry_body - Macro to emit code calling the C function
499 * @vector: Vector number
500 * @cfunc: C function to be called
501 * @has_error_code: Hardware pushed error code on stack
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100502 * @sane: Sane variant which handles irq tracing, context tracking in C
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100503 */
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100504.macro idtentry_body vector cfunc has_error_code:req sane=0
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100505
506 call error_entry
507 UNWIND_HINT_REGS
508
509 .if \vector == X86_TRAP_PF
510 /*
511 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
512 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
513 * GET_CR2_INTO can clobber RAX.
514 */
515 GET_CR2_INTO(%r12);
516 .endif
517
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100518 .if \sane == 0
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100519 TRACE_IRQS_OFF
520
521#ifdef CONFIG_CONTEXT_TRACKING
522 testb $3, CS(%rsp)
523 jz .Lfrom_kernel_no_ctxt_tracking_\@
524 CALL_enter_from_user_mode
525.Lfrom_kernel_no_ctxt_tracking_\@:
526#endif
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100527 .endif
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100528
529 movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
530
531 .if \has_error_code == 1
532 movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
533 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
534 .else
535 xorl %esi, %esi /* Clear the error code */
536 .endif
537
538 .if \vector == X86_TRAP_PF
539 movq %r12, %rdx /* Move CR2 into 3rd argument */
540 .endif
541
542 call \cfunc
543
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100544 .if \sane == 0
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100545 jmp error_exit
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100546 .else
547 jmp error_return
548 .endif
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100549.endm
550
551/**
552 * idtentry - Macro to generate entry stubs for simple IDT entries
553 * @vector: Vector number
554 * @asmsym: ASM symbol for the entry point
555 * @cfunc: C function to be called
556 * @has_error_code: Hardware pushed error code on stack
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100557 * @sane: Sane variant which handles irq tracing, context tracking in C
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100558 *
559 * The macro emits code to set up the kernel context for straight forward
560 * and simple IDT entries. No IST stack, no paranoid entry checks.
561 */
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100562.macro idtentry vector asmsym cfunc has_error_code:req sane=0
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100563SYM_CODE_START(\asmsym)
564 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
565 ASM_CLAC
566
567 .if \has_error_code == 0
568 pushq $-1 /* ORIG_RAX: no syscall to restart */
569 .endif
570
571 .if \vector == X86_TRAP_BP
572 /*
573 * If coming from kernel space, create a 6-word gap to allow the
574 * int3 handler to emulate a call instruction.
575 */
576 testb $3, CS-ORIG_RAX(%rsp)
577 jnz .Lfrom_usermode_no_gap_\@
578 .rept 6
579 pushq 5*8(%rsp)
580 .endr
581 UNWIND_HINT_IRET_REGS offset=8
582.Lfrom_usermode_no_gap_\@:
583 .endif
584
Thomas Gleixner424c7d02020-03-26 16:56:20 +0100585 idtentry_body \vector \cfunc \has_error_code \sane
Thomas Gleixnercfa82a02020-02-25 23:16:10 +0100586
587_ASM_NOKPROBE(\asmsym)
588SYM_CODE_END(\asmsym)
589.endm
590
591/*
592 * MCE and DB exceptions
593 */
594#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
595
596/**
597 * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB
598 * @vector: Vector number
599 * @asmsym: ASM symbol for the entry point
600 * @cfunc: C function to be called
601 *
602 * The macro emits code to set up the kernel context for #MC and #DB
603 *
604 * If the entry comes from user space it uses the normal entry path
605 * including the return to user space work and preemption checks on
606 * exit.
607 *
608 * If hits in kernel mode then it needs to go through the paranoid
609 * entry as the exception can hit any random state. No preemption
610 * check on exit to keep the paranoid path simple.
611 *
612 * If the trap is #DB then the interrupt stack entry in the IST is
613 * moved to the second stack, so a potential recursion will have a
614 * fresh IST.
615 */
616.macro idtentry_mce_db vector asmsym cfunc
617SYM_CODE_START(\asmsym)
618 UNWIND_HINT_IRET_REGS
619 ASM_CLAC
620
621 pushq $-1 /* ORIG_RAX: no syscall to restart */
622
623 /*
624 * If the entry is from userspace, switch stacks and treat it as
625 * a normal entry.
626 */
627 testb $3, CS-ORIG_RAX(%rsp)
628 jnz .Lfrom_usermode_switch_stack_\@
629
630 /*
631 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
632 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
633 */
634 call paranoid_entry
635
636 UNWIND_HINT_REGS
637
638 .if \vector == X86_TRAP_DB
639 TRACE_IRQS_OFF_DEBUG
640 .else
641 TRACE_IRQS_OFF
642 .endif
643
644 movq %rsp, %rdi /* pt_regs pointer */
645 xorl %esi, %esi /* Clear the error code */
646
647 .if \vector == X86_TRAP_DB
648 subq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB)
649 .endif
650
651 call \cfunc
652
653 .if \vector == X86_TRAP_DB
654 addq $DB_STACK_OFFSET, CPU_TSS_IST(IST_INDEX_DB)
655 .endif
656
657 jmp paranoid_exit
658
659 /* Switch to the regular task stack and use the noist entry point */
660.Lfrom_usermode_switch_stack_\@:
661 idtentry_body vector \cfunc, has_error_code=0
662
663_ASM_NOKPROBE(\asmsym)
664SYM_CODE_END(\asmsym)
665.endm
666
667/*
668 * Double fault entry. Straight paranoid. No checks from which context
669 * this comes because for the espfix induced #DF this would do the wrong
670 * thing.
671 */
672.macro idtentry_df vector asmsym cfunc
673SYM_CODE_START(\asmsym)
674 UNWIND_HINT_IRET_REGS offset=8
675 ASM_CLAC
676
677 /*
678 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
679 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
680 */
681 call paranoid_entry
682 UNWIND_HINT_REGS
683
684 /* Read CR2 early */
685 GET_CR2_INTO(%r12);
686
687 TRACE_IRQS_OFF
688
689 movq %rsp, %rdi /* pt_regs pointer into first argument */
690 movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
691 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
692 movq %r12, %rdx /* Move CR2 into 3rd argument */
693 call \cfunc
694
695 jmp paranoid_exit
696
697_ASM_NOKPROBE(\asmsym)
698SYM_CODE_END(\asmsym)
699.endm
700
Alexander van Heukelumd99015b2008-11-19 01:18:11 +0100701/*
Thomas Gleixner53aaf262020-02-25 23:16:12 +0100702 * Include the defines which emit the idt entries which are shared
703 * shared between 32 and 64 bit.
704 */
705#include <asm/idtentry.h>
706
707/*
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100708 * Interrupt entry helper function.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 *
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100710 * Entry runs with interrupts off. Stack layout at entry:
711 * +----------------------------------------------------+
712 * | regs->ss |
713 * | regs->rsp |
714 * | regs->eflags |
715 * | regs->cs |
716 * | regs->ip |
717 * +----------------------------------------------------+
718 * | regs->orig_ax = ~(interrupt number) |
719 * +----------------------------------------------------+
720 * | return address |
721 * +----------------------------------------------------+
Alexander van Heukelumd99015b2008-11-19 01:18:11 +0100722 */
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200723SYM_CODE_START(interrupt_entry)
Josh Poimboeuf81b67432020-04-25 05:06:14 -0500724 UNWIND_HINT_IRET_REGS offset=16
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100725 ASM_CLAC
Denys Vlasenkof6f64682015-01-08 17:25:15 +0100726 cld
Andy Lutomirski7f2590a2017-12-04 15:07:23 +0100727
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100728 testb $3, CS-ORIG_RAX+8(%rsp)
Andy Lutomirski7f2590a2017-12-04 15:07:23 +0100729 jz 1f
730 SWAPGS
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -0500731 FENCE_SWAPGS_USER_ENTRY
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100732 /*
733 * Switch to the thread stack. The IRET frame and orig_ax are
734 * on the stack, as well as the return address. RDI..R12 are
735 * not (yet) on the stack and space has not (yet) been
736 * allocated for them.
737 */
Dominik Brodowski90a6acc2018-02-20 22:01:10 +0100738 pushq %rdi
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100739
Dominik Brodowski90a6acc2018-02-20 22:01:10 +0100740 /* Need to switch before accessing the thread stack. */
741 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
742 movq %rsp, %rdi
743 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100744
745 /*
746 * We have RDI, return address, and orig_ax on the stack on
747 * top of the IRET frame. That means offset=24
748 */
749 UNWIND_HINT_IRET_REGS base=%rdi offset=24
Dominik Brodowski90a6acc2018-02-20 22:01:10 +0100750
751 pushq 7*8(%rdi) /* regs->ss */
752 pushq 6*8(%rdi) /* regs->rsp */
753 pushq 5*8(%rdi) /* regs->eflags */
754 pushq 4*8(%rdi) /* regs->cs */
755 pushq 3*8(%rdi) /* regs->ip */
Josh Poimboeuf81b67432020-04-25 05:06:14 -0500756 UNWIND_HINT_IRET_REGS
Dominik Brodowski90a6acc2018-02-20 22:01:10 +0100757 pushq 2*8(%rdi) /* regs->orig_ax */
758 pushq 8(%rdi) /* return address */
Dominik Brodowski90a6acc2018-02-20 22:01:10 +0100759
760 movq (%rdi), %rdi
Josh Poimboeuf64dbc122019-07-15 11:51:39 -0500761 jmp 2f
Andy Lutomirski7f2590a2017-12-04 15:07:23 +01007621:
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -0500763 FENCE_SWAPGS_KERNEL_ENTRY
7642:
Dominik Brodowski0e34d222018-02-20 22:01:08 +0100765 PUSH_AND_CLEAR_REGS save_ret=1
766 ENCODE_FRAME_POINTER 8
Denys Vlasenkof6f64682015-01-08 17:25:15 +0100767
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100768 testb $3, CS+8(%rsp)
Denys Vlasenkodde74f22015-04-27 15:21:51 +0200769 jz 1f
Andy Lutomirski02bc7762015-07-03 12:44:31 -0700770
771 /*
Andy Lutomirski7f2590a2017-12-04 15:07:23 +0100772 * IRQ from user mode.
773 *
Andy Lutomirskif1075052015-11-12 12:59:00 -0800774 * We need to tell lockdep that IRQs are off. We can't do this until
775 * we fix gsbase, and we should do it before enter_from_user_mode
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100776 * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
Andy Lutomirskif1075052015-11-12 12:59:00 -0800777 * the simplest way to handle it is to just call it twice if
778 * we enter from user mode. There's no reason to optimize this since
779 * TRACE_IRQS_OFF is a no-op if lockdep is off.
780 */
781 TRACE_IRQS_OFF
782
Andy Lutomirski478dc892015-11-12 12:59:04 -0800783 CALL_enter_from_user_mode
Andy Lutomirski02bc7762015-07-03 12:44:31 -0700784
Denys Vlasenko76f5df42015-02-26 14:40:27 -08007851:
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100786 ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
Denys Vlasenkof6f64682015-01-08 17:25:15 +0100787 /* We entered an interrupt context - irqs are off: */
788 TRACE_IRQS_OFF
789
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100790 ret
Jiri Slabybc7b11c2019-10-11 13:51:03 +0200791SYM_CODE_END(interrupt_entry)
Andrea Righia50480c2018-12-06 10:56:48 +0100792_ASM_NOKPROBE(interrupt_entry)
Dominik Brodowski2ba64742018-02-20 22:01:09 +0100793
Dominik Brodowskif3d415e2018-02-20 22:01:13 +0100794
795/* Interrupt entry/exit. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200797/*
798 * The interrupt stubs push (~vector+0x80) onto the stack and
799 * then jump to common_spurious/interrupt.
800 */
Jiri Slabycc669362019-10-11 13:50:50 +0200801SYM_CODE_START_LOCAL(common_spurious)
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200802 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
803 call interrupt_entry
804 UNWIND_HINT_REGS indirect=1
805 call smp_spurious_interrupt /* rdi points to pt_regs */
806 jmp ret_from_intr
Jiri Slabycc669362019-10-11 13:50:50 +0200807SYM_CODE_END(common_spurious)
Thomas Gleixnerf8a8fe62019-06-28 13:11:54 +0200808_ASM_NOKPROBE(common_spurious)
809
810/* common_interrupt is a hotpath. Align it */
H. Peter Anvin939b7872008-11-11 13:51:52 -0800811 .p2align CONFIG_X86_L1_CACHE_SHIFT
Jiri Slabycc669362019-10-11 13:50:50 +0200812SYM_CODE_START_LOCAL(common_interrupt)
Ingo Molnar4d732132015-06-08 20:43:07 +0200813 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
Dominik Brodowski3aa99fc2018-02-20 22:01:11 +0100814 call interrupt_entry
815 UNWIND_HINT_REGS indirect=1
816 call do_IRQ /* rdi points to pt_regs */
Denys Vlasenko34061f12015-03-23 14:03:59 +0100817 /* 0(%rsp): old RSP */
Jan Beulich7effaa82005-09-12 18:49:24 +0200818ret_from_intr:
Jan Beulich2140a992017-02-03 02:03:25 -0700819 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700820 TRACE_IRQS_OFF
Frederic Weisbecker625dbc3b2011-01-06 15:22:47 +0100821
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -0500822 LEAVE_IRQ_STACK
Frederic Weisbecker625dbc3b2011-01-06 15:22:47 +0100823
Denys Vlasenko03335e92015-04-27 15:21:52 +0200824 testb $3, CS(%rsp)
Denys Vlasenkodde74f22015-04-27 15:21:51 +0200825 jz retint_kernel
Andy Lutomirski02bc7762015-07-03 12:44:31 -0700826
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 /* Interrupt came from user space */
Jiri Slaby30a24412019-10-11 11:22:13 +0200828.Lretint_user:
Andy Lutomirski02bc7762015-07-03 12:44:31 -0700829 mov %rsp,%rdi
830 call prepare_exit_to_usermode
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700831
Jiri Slaby26ba4e52019-10-11 13:50:57 +0200832SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700833#ifdef CONFIG_DEBUG_ENTRY
834 /* Assert that pt_regs indicates user mode. */
Borislav Petkov1e4c4f62017-11-02 13:09:26 +0100835 testb $3, CS(%rsp)
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700836 jnz 1f
837 ud2
8381:
839#endif
Dominik Brodowski502af0d2018-02-11 11:49:43 +0100840 POP_REGS pop_rdi=0
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100841
842 /*
843 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
844 * Save old stack pointer and switch to trampoline stack.
845 */
846 movq %rsp, %rdi
Andy Lutomirskic482fee2017-12-04 15:07:29 +0100847 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
Josh Poimboeuf1fb14362020-04-25 05:03:02 -0500848 UNWIND_HINT_EMPTY
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100849
850 /* Copy the IRET frame to the trampoline stack. */
851 pushq 6*8(%rdi) /* SS */
852 pushq 5*8(%rdi) /* RSP */
853 pushq 4*8(%rdi) /* EFLAGS */
854 pushq 3*8(%rdi) /* CS */
855 pushq 2*8(%rdi) /* RIP */
856
857 /* Push user RDI on the trampoline stack. */
858 pushq (%rdi)
859
860 /*
861 * We are on the trampoline stack. All regs except RDI are live.
862 * We can do future final exit work right here.
863 */
Alexander Popovafaef012018-08-17 01:16:58 +0300864 STACKLEAK_ERASE_NOCLOBBER
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100865
Peter Zijlstra6fd166a2017-12-04 15:07:59 +0100866 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
Dave Hansen8a093172017-12-04 15:07:35 +0100867
Andy Lutomirski3e3b9292017-12-04 15:07:24 +0100868 /* Restore RDI. */
869 popq %rdi
870 SWAPGS
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700871 INTERRUPT_RETURN
872
Ingo Molnar2601e642006-07-03 00:24:45 -0700873
Denys Vlasenko627276c2015-03-30 20:09:31 +0200874/* Returning to kernel space */
Denys Vlasenko6ba71b72015-03-31 19:00:05 +0200875retint_kernel:
Thomas Gleixner48593972019-07-26 23:19:42 +0200876#ifdef CONFIG_PREEMPTION
Denys Vlasenko627276c2015-03-30 20:09:31 +0200877 /* Interrupts are off */
878 /* Check if we need preemption */
Jan Beulich67098122018-07-02 04:47:57 -0600879 btl $9, EFLAGS(%rsp) /* were interrupts off? */
Denys Vlasenko6ba71b72015-03-31 19:00:05 +0200880 jnc 1f
Valentin Schneiderb5b447b2019-03-11 22:47:51 +0000881 cmpl $0, PER_CPU_VAR(__preempt_count)
Denys Vlasenko36acef22015-03-31 19:00:07 +0200882 jnz 1f
Denys Vlasenko627276c2015-03-30 20:09:31 +0200883 call preempt_schedule_irq
Denys Vlasenko6ba71b72015-03-31 19:00:05 +02008841:
Denys Vlasenko627276c2015-03-30 20:09:31 +0200885#endif
Ingo Molnar2601e642006-07-03 00:24:45 -0700886 /*
887 * The iretq could re-enable interrupts:
888 */
889 TRACE_IRQS_IRETQ
Denys Vlasenkofffbb5d2015-04-02 18:46:59 +0200890
Jiri Slaby26ba4e52019-10-11 13:50:57 +0200891SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700892#ifdef CONFIG_DEBUG_ENTRY
893 /* Assert that pt_regs indicates kernel mode. */
Borislav Petkov1e4c4f62017-11-02 13:09:26 +0100894 testb $3, CS(%rsp)
Andy Lutomirski26c4ef92017-11-02 00:58:59 -0700895 jz 1f
896 ud2
8971:
898#endif
Dominik Brodowski502af0d2018-02-11 11:49:43 +0100899 POP_REGS
Andy Lutomirskie8720452017-11-02 00:59:01 -0700900 addq $8, %rsp /* skip regs->orig_ax */
Mathieu Desnoyers10bcc802018-01-29 15:20:18 -0500901 /*
902 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
903 * when returning from IPI handler.
904 */
Andy Lutomirski7209a752014-07-23 08:34:11 -0700905 INTERRUPT_RETURN
906
Jiri Slabycc669362019-10-11 13:50:50 +0200907SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500908 UNWIND_HINT_IRET_REGS
H. Peter Anvin3891a042014-04-29 16:46:09 -0700909 /*
910 * Are we returning to a stack segment from the LDT? Note: in
911 * 64-bit mode SS:RSP on the exception stack is always valid.
912 */
H. Peter Anvin34273f42014-05-04 10:36:22 -0700913#ifdef CONFIG_X86_ESPFIX64
Ingo Molnar4d732132015-06-08 20:43:07 +0200914 testb $4, (SS-RIP)(%rsp)
915 jnz native_irq_return_ldt
H. Peter Anvin34273f42014-05-04 10:36:22 -0700916#endif
H. Peter Anvin3891a042014-04-29 16:46:09 -0700917
Jiri Slabycc669362019-10-11 13:50:50 +0200918SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
Andy Lutomirskib645af22014-11-22 18:00:33 -0800919 /*
920 * This may fault. Non-paranoid faults on return to userspace are
921 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
922 * Double-faults due to espfix64 are handled in do_double_fault.
923 * Other faults here are fatal.
924 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 iretq
Ingo Molnar3701d8632008-02-09 23:24:08 +0100926
H. Peter Anvin34273f42014-05-04 10:36:22 -0700927#ifdef CONFIG_X86_ESPFIX64
Andy Lutomirski7209a752014-07-23 08:34:11 -0700928native_irq_return_ldt:
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700929 /*
930 * We are running with user GSBASE. All GPRs contain their user
931 * values. We have a percpu ESPFIX stack that is eight slots
932 * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
933 * of the ESPFIX stack.
934 *
935 * We clobber RAX and RDI in this code. We stash RDI on the
936 * normal stack and RAX on the ESPFIX stack.
937 *
938 * The ESPFIX stack layout we set up looks like this:
939 *
940 * --- top of ESPFIX stack ---
941 * SS
942 * RSP
943 * RFLAGS
944 * CS
945 * RIP <-- RSP points here when we're done
946 * RAX <-- espfix_waddr points here
947 * --- bottom of ESPFIX stack ---
948 */
949
950 pushq %rdi /* Stash user RDI */
Dave Hansen8a093172017-12-04 15:07:35 +0100951 SWAPGS /* to kernel GS */
952 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
953
Ingo Molnar4d732132015-06-08 20:43:07 +0200954 movq PER_CPU_VAR(espfix_waddr), %rdi
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700955 movq %rax, (0*8)(%rdi) /* user RAX */
956 movq (1*8)(%rsp), %rax /* user RIP */
Ingo Molnar4d732132015-06-08 20:43:07 +0200957 movq %rax, (1*8)(%rdi)
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700958 movq (2*8)(%rsp), %rax /* user CS */
Ingo Molnar4d732132015-06-08 20:43:07 +0200959 movq %rax, (2*8)(%rdi)
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700960 movq (3*8)(%rsp), %rax /* user RFLAGS */
Ingo Molnar4d732132015-06-08 20:43:07 +0200961 movq %rax, (3*8)(%rdi)
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700962 movq (5*8)(%rsp), %rax /* user SS */
Ingo Molnar4d732132015-06-08 20:43:07 +0200963 movq %rax, (5*8)(%rdi)
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700964 movq (4*8)(%rsp), %rax /* user RSP */
Ingo Molnar4d732132015-06-08 20:43:07 +0200965 movq %rax, (4*8)(%rdi)
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700966 /* Now RAX == RSP. */
967
968 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700969
970 /*
971 * espfix_stack[31:16] == 0. The page tables are set up such that
972 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
973 * espfix_waddr for any X. That is, there are 65536 RO aliases of
974 * the same page. Set up RSP so that RSP[31:16] contains the
975 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
976 * still points to an RO alias of the ESPFIX stack.
977 */
Ingo Molnar4d732132015-06-08 20:43:07 +0200978 orq PER_CPU_VAR(espfix_stack), %rax
Dave Hansen8a093172017-12-04 15:07:35 +0100979
Peter Zijlstra6fd166a2017-12-04 15:07:59 +0100980 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
Dave Hansen8a093172017-12-04 15:07:35 +0100981 SWAPGS /* to user GS */
982 popq %rdi /* Restore user RDI */
983
Ingo Molnar4d732132015-06-08 20:43:07 +0200984 movq %rax, %rsp
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -0500985 UNWIND_HINT_IRET_REGS offset=8
Andy Lutomirski85063fa2016-09-12 15:05:51 -0700986
987 /*
988 * At this point, we cannot write to the stack any more, but we can
989 * still read.
990 */
991 popq %rax /* Restore user RAX */
992
993 /*
994 * RSP now points to an ordinary IRET frame, except that the page
995 * is read-only and RSP[31:16] are preloaded with the userspace
996 * values. We can now IRET back to userspace.
997 */
Ingo Molnar4d732132015-06-08 20:43:07 +0200998 jmp native_irq_return_iret
H. Peter Anvin34273f42014-05-04 10:36:22 -0700999#endif
Jiri Slabycc669362019-10-11 13:50:50 +02001000SYM_CODE_END(common_interrupt)
Andrea Righia50480c2018-12-06 10:56:48 +01001001_ASM_NOKPROBE(common_interrupt)
H. Peter Anvin3891a042014-04-29 16:46:09 -07001002
Masami Hiramatsu8222d712009-08-27 13:23:25 -04001003/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 * APIC interrupts.
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001005 */
Seiji Aguchicf910e82013-06-20 11:46:53 -04001006.macro apicinterrupt3 num sym do_sym
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001007SYM_CODE_START(\sym)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001008 UNWIND_HINT_IRET_REGS
Ingo Molnar4d732132015-06-08 20:43:07 +02001009 pushq $~(\num)
Dominik Brodowski3aa99fc2018-02-20 22:01:11 +01001010 call interrupt_entry
1011 UNWIND_HINT_REGS indirect=1
1012 call \do_sym /* rdi points to pt_regs */
Ingo Molnar4d732132015-06-08 20:43:07 +02001013 jmp ret_from_intr
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001014SYM_CODE_END(\sym)
Andrea Righia50480c2018-12-06 10:56:48 +01001015_ASM_NOKPROBE(\sym)
Alexander van Heukelum322648d2008-11-23 10:08:28 +01001016.endm
Jacob Shin89b831e2005-11-05 17:25:53 +01001017
Alexander Potapenko469f0022016-07-15 11:42:43 +02001018/* Make sure APIC interrupt handlers end up in the irqentry section: */
Masami Hiramatsu229a7182017-08-03 11:38:21 +09001019#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
1020#define POP_SECTION_IRQENTRY .popsection
Alexander Potapenko469f0022016-07-15 11:42:43 +02001021
Seiji Aguchicf910e82013-06-20 11:46:53 -04001022.macro apicinterrupt num sym do_sym
Alexander Potapenko469f0022016-07-15 11:42:43 +02001023PUSH_SECTION_IRQENTRY
Seiji Aguchicf910e82013-06-20 11:46:53 -04001024apicinterrupt3 \num \sym \do_sym
Alexander Potapenko469f0022016-07-15 11:42:43 +02001025POP_SECTION_IRQENTRY
Seiji Aguchicf910e82013-06-20 11:46:53 -04001026.endm
1027
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001028#ifdef CONFIG_SMP
Ingo Molnar4d732132015-06-08 20:43:07 +02001029apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
1030apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031#endif
1032
Nick Piggin03b48632009-01-20 04:36:04 +01001033#ifdef CONFIG_X86_UV
Ingo Molnar4d732132015-06-08 20:43:07 +02001034apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
Nick Piggin03b48632009-01-20 04:36:04 +01001035#endif
Ingo Molnar4d732132015-06-08 20:43:07 +02001036
1037apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
1038apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
Yang Zhangd78f2662013-04-11 19:25:11 +08001040#ifdef CONFIG_HAVE_KVM
Ingo Molnar4d732132015-06-08 20:43:07 +02001041apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
1042apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
Wincy Van210f84b2017-04-28 13:13:58 +08001043apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi
Yang Zhangd78f2662013-04-11 19:25:11 +08001044#endif
1045
Seiji Aguchi33e5ff62013-06-22 07:33:30 -04001046#ifdef CONFIG_X86_MCE_THRESHOLD
Ingo Molnar4d732132015-06-08 20:43:07 +02001047apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
Seiji Aguchi33e5ff62013-06-22 07:33:30 -04001048#endif
1049
Aravind Gopalakrishnan24fd78a2015-05-06 06:58:56 -05001050#ifdef CONFIG_X86_MCE_AMD
Ingo Molnar4d732132015-06-08 20:43:07 +02001051apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
Aravind Gopalakrishnan24fd78a2015-05-06 06:58:56 -05001052#endif
1053
Seiji Aguchi33e5ff62013-06-22 07:33:30 -04001054#ifdef CONFIG_X86_THERMAL_VECTOR
Ingo Molnar4d732132015-06-08 20:43:07 +02001055apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
Seiji Aguchi33e5ff62013-06-22 07:33:30 -04001056#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057
Alexander van Heukelum322648d2008-11-23 10:08:28 +01001058#ifdef CONFIG_SMP
Ingo Molnar4d732132015-06-08 20:43:07 +02001059apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
1060apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
1061apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
Alexander van Heukelum322648d2008-11-23 10:08:28 +01001062#endif
1063
Ingo Molnar4d732132015-06-08 20:43:07 +02001064apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
1065apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001066
Peter Zijlstrae360adb2010-10-14 14:01:34 +08001067#ifdef CONFIG_IRQ_WORK
Ingo Molnar4d732132015-06-08 20:43:07 +02001068apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
Ingo Molnar241771e2008-12-03 10:39:53 +01001069#endif
1070
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071/*
1072 * Exception entry points.
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001073 */
Andy Lutomirski577ed452014-05-21 15:07:09 -07001074
Thomas Gleixnercfa82a02020-02-25 23:16:10 +01001075idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1
Thomas Gleixner67f13862020-02-25 23:16:09 +01001076
Thomas Gleixnercfa82a02020-02-25 23:16:10 +01001077idtentry_mce_db X86_TRAP_DB debug do_debug
1078idtentry_df X86_TRAP_DF double_fault do_double_fault
Thomas Gleixner67f13862020-02-25 23:16:09 +01001079
1080#ifdef CONFIG_XEN_PV
Thomas Gleixnercfa82a02020-02-25 23:16:10 +01001081idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0
Thomas Gleixnercfa82a02020-02-25 23:16:10 +01001082idtentry X86_TRAP_DB xendebug do_debug has_error_code=0
Thomas Gleixner67f13862020-02-25 23:16:09 +01001083#endif
Ingo Molnar2601e642006-07-03 00:24:45 -07001084
Thomas Gleixnerb9f69762020-03-25 19:45:26 +01001085/*
1086 * Reload gs selector with exception handling
1087 * edi: new selector
1088 *
1089 * Is in entry.text as it shouldn't be instrumented.
1090 */
Thomas Gleixner410367e2020-03-04 23:32:15 +01001091SYM_FUNC_START(asm_load_gs_index)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001092 FRAME_BEGIN
Thomas Gleixnerc9317202020-05-12 14:54:14 +02001093 swapgs
Borislav Petkov42c748bb2016-04-07 17:31:50 -07001094.Lgs_change:
Ingo Molnar4d732132015-06-08 20:43:07 +02001095 movl %edi, %gs
Borislav Petkov96e5d282016-04-07 17:31:49 -070010962: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
Thomas Gleixnerc9317202020-05-12 14:54:14 +02001097 swapgs
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001098 FRAME_END
Cyrill Gorcunov9f1e87e2008-11-27 21:10:08 +03001099 ret
Thomas Gleixner410367e2020-03-04 23:32:15 +01001100SYM_FUNC_END(asm_load_gs_index)
1101EXPORT_SYMBOL(asm_load_gs_index)
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001102
Jiri Slaby98ededb2019-09-06 09:55:50 +02001103 _ASM_EXTABLE(.Lgs_change, .Lbad_gs)
Ingo Molnar4d732132015-06-08 20:43:07 +02001104 .section .fixup, "ax"
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 /* running with kernelgs */
Jiri Slabyef77e682019-10-11 13:50:45 +02001106SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
Thomas Gleixnerc9317202020-05-12 14:54:14 +02001107 swapgs /* switch back to user gs */
Andy Lutomirskib038c842016-04-26 12:23:27 -07001108.macro ZAP_GS
1109 /* This can't be a string because the preprocessor needs to see it. */
1110 movl $__USER_DS, %eax
1111 movl %eax, %gs
1112.endm
1113 ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
Ingo Molnar4d732132015-06-08 20:43:07 +02001114 xorl %eax, %eax
1115 movl %eax, %gs
1116 jmp 2b
Jiri Slabyef77e682019-10-11 13:50:45 +02001117SYM_CODE_END(.Lbad_gs)
Cyrill Gorcunov9f1e87e2008-11-27 21:10:08 +03001118 .previous
Alexander van Heukelum0bd7b792008-11-16 15:29:00 +01001119
Andi Kleen26995002006-08-02 22:37:28 +02001120/* Call softirq on interrupt stack. Interrupts are off. */
Thomas Gleixnerb9f69762020-03-25 19:45:26 +01001121.pushsection .text, "ax"
Jiri Slaby6dcc5622019-10-11 13:51:04 +02001122SYM_FUNC_START(do_softirq_own_stack)
Ingo Molnar4d732132015-06-08 20:43:07 +02001123 pushq %rbp
1124 mov %rsp, %rbp
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001125 ENTER_IRQ_STACK regs=0 old_rsp=%r11
Ingo Molnar4d732132015-06-08 20:43:07 +02001126 call __do_softirq
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001127 LEAVE_IRQ_STACK regs=0
Andi Kleen26995002006-08-02 22:37:28 +02001128 leaveq
Andi Kleened6b6762005-07-28 21:15:49 -07001129 ret
Jiri Slaby6dcc5622019-10-11 13:51:04 +02001130SYM_FUNC_END(do_softirq_own_stack)
Thomas Gleixnerb9f69762020-03-25 19:45:26 +01001131.popsection
Andi Kleen75154f42007-06-23 02:29:25 +02001132
Juergen Gross28c11b02018-08-28 09:40:12 +02001133#ifdef CONFIG_XEN_PV
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001134/*
Cyrill Gorcunov9f1e87e2008-11-27 21:10:08 +03001135 * A note on the "critical region" in our callback handler.
1136 * We want to avoid stacking callback handlers due to events occurring
1137 * during handling of the last event. To do this, we keep events disabled
1138 * until we've done all processing. HOWEVER, we must enable events before
1139 * popping the stack frame (can't be done atomically) and so it would still
1140 * be possible to get enough handler activations to overflow the stack.
1141 * Although unlikely, bugs of that kind are hard to track down, so we'd
1142 * like to avoid the possibility.
1143 * So, on entry to the handler we detect whether we interrupted an
1144 * existing activation in its critical region -- if so, we pop the current
1145 * activation and restart the handler using the previous one.
1146 */
Jiri Slabyef1e0312019-10-11 13:51:00 +02001147/* do_hypervisor_callback(struct *pt_regs) */
1148SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
Ingo Molnar4d732132015-06-08 20:43:07 +02001149
Cyrill Gorcunov9f1e87e2008-11-27 21:10:08 +03001150/*
1151 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1152 * see the correct pointer to the pt_regs
1153 */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001154 UNWIND_HINT_FUNC
Ingo Molnar4d732132015-06-08 20:43:07 +02001155 movq %rdi, %rsp /* we don't return, adjust the stack frame */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001156 UNWIND_HINT_REGS
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -05001157
1158 ENTER_IRQ_STACK old_rsp=%r10
Ingo Molnar4d732132015-06-08 20:43:07 +02001159 call xen_evtchn_do_upcall
Andy Lutomirski1d3e53e2017-07-11 10:33:38 -05001160 LEAVE_IRQ_STACK
1161
Thomas Gleixner48593972019-07-26 23:19:42 +02001162#ifndef CONFIG_PREEMPTION
Ingo Molnar4d732132015-06-08 20:43:07 +02001163 call xen_maybe_preempt_hcall
David Vrabelfdfd8112015-02-19 15:23:17 +00001164#endif
Ingo Molnar4d732132015-06-08 20:43:07 +02001165 jmp error_exit
Jiri Slabyef1e0312019-10-11 13:51:00 +02001166SYM_CODE_END(xen_do_hypervisor_callback)
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001167
1168/*
Cyrill Gorcunov9f1e87e2008-11-27 21:10:08 +03001169 * Hypervisor uses this for application faults while it executes.
1170 * We get here for two reasons:
1171 * 1. Fault while reloading DS, ES, FS or GS
1172 * 2. Fault while executing IRET
1173 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1174 * registers that could be reloaded and zeroed the others.
1175 * Category 2 we fix up by killing the current process. We cannot use the
1176 * normal Linux return path in this case because if we use the IRET hypercall
1177 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1178 * We distinguish between categories by comparing each saved segment register
1179 * with its current contents: any discrepancy means we in category 1.
1180 */
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001181SYM_CODE_START(xen_failsafe_callback)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001182 UNWIND_HINT_EMPTY
Ingo Molnar4d732132015-06-08 20:43:07 +02001183 movl %ds, %ecx
1184 cmpw %cx, 0x10(%rsp)
1185 jne 1f
1186 movl %es, %ecx
1187 cmpw %cx, 0x18(%rsp)
1188 jne 1f
1189 movl %fs, %ecx
1190 cmpw %cx, 0x20(%rsp)
1191 jne 1f
1192 movl %gs, %ecx
1193 cmpw %cx, 0x28(%rsp)
1194 jne 1f
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001195 /* All segments match their saved values => Category 2 (Bad IRET). */
Ingo Molnar4d732132015-06-08 20:43:07 +02001196 movq (%rsp), %rcx
1197 movq 8(%rsp), %r11
1198 addq $0x30, %rsp
1199 pushq $0 /* RIP */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001200 UNWIND_HINT_IRET_REGS offset=8
Thomas Gleixnerbe4c11a2020-02-25 23:16:25 +01001201 jmp asm_exc_general_protection
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -070012021: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
Ingo Molnar4d732132015-06-08 20:43:07 +02001203 movq (%rsp), %rcx
1204 movq 8(%rsp), %r11
1205 addq $0x30, %rsp
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001206 UNWIND_HINT_IRET_REGS
Ingo Molnar4d732132015-06-08 20:43:07 +02001207 pushq $-1 /* orig_ax = -1 => not a system call */
Dominik Brodowski3f01dae2018-02-11 11:49:45 +01001208 PUSH_AND_CLEAR_REGS
Josh Poimboeuf946c1912016-10-20 11:34:40 -05001209 ENCODE_FRAME_POINTER
Ingo Molnar4d732132015-06-08 20:43:07 +02001210 jmp error_exit
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001211SYM_CODE_END(xen_failsafe_callback)
Juergen Gross28c11b02018-08-28 09:40:12 +02001212#endif /* CONFIG_XEN_PV */
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001213
Juergen Gross28c11b02018-08-28 09:40:12 +02001214#ifdef CONFIG_XEN_PVHVM
Seiji Aguchicf910e82013-06-20 11:46:53 -04001215apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
Sheng Yang38e20b02010-05-14 12:40:51 +01001216 xen_hvm_callback_vector xen_evtchn_do_upcall
Juergen Gross28c11b02018-08-28 09:40:12 +02001217#endif
Sheng Yang38e20b02010-05-14 12:40:51 +01001218
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001219
K. Y. Srinivasanbc2b0332013-02-03 17:22:39 -08001220#if IS_ENABLED(CONFIG_HYPERV)
Seiji Aguchicf910e82013-06-20 11:46:53 -04001221apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
K. Y. Srinivasanbc2b0332013-02-03 17:22:39 -08001222 hyperv_callback_vector hyperv_vector_handler
Vitaly Kuznetsov93286262018-01-24 14:23:33 +01001223
1224apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
1225 hyperv_reenlightenment_vector hyperv_reenlightenment_intr
Michael Kelley248e7422018-03-04 22:17:18 -07001226
1227apicinterrupt3 HYPERV_STIMER0_VECTOR \
1228 hv_stimer0_callback_vector hv_stimer0_vector_handler
K. Y. Srinivasanbc2b0332013-02-03 17:22:39 -08001229#endif /* CONFIG_HYPERV */
1230
Zhao Yakui498ad392019-04-30 11:45:25 +08001231#if IS_ENABLED(CONFIG_ACRN_GUEST)
1232apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1233 acrn_hv_callback_vector acrn_hv_vector_handler
1234#endif
1235
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001236/*
Dominik Brodowski9e809d12018-02-14 18:59:23 +01001237 * Save all registers in pt_regs, and switch gs if needed.
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001238 * Use slow, but surefire "are we in kernel?" check.
1239 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1240 */
Jiri Slabyef1e0312019-10-11 13:51:00 +02001241SYM_CODE_START_LOCAL(paranoid_entry)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001242 UNWIND_HINT_FUNC
Denys Vlasenko1eeb2072015-02-26 14:40:33 -08001243 cld
Dominik Brodowski9e809d12018-02-14 18:59:23 +01001244 PUSH_AND_CLEAR_REGS save_ret=1
1245 ENCODE_FRAME_POINTER 8
Ingo Molnar4d732132015-06-08 20:43:07 +02001246 movl $1, %ebx
1247 movl $MSR_GS_BASE, %ecx
Denys Vlasenko1eeb2072015-02-26 14:40:33 -08001248 rdmsr
Ingo Molnar4d732132015-06-08 20:43:07 +02001249 testl %edx, %edx
1250 js 1f /* negative -> in kernel */
Denys Vlasenko1eeb2072015-02-26 14:40:33 -08001251 SWAPGS
Ingo Molnar4d732132015-06-08 20:43:07 +02001252 xorl %ebx, %ebx
Dave Hansen8a093172017-12-04 15:07:35 +01001253
12541:
Dave Hansen16561f22018-10-12 16:21:18 -07001255 /*
1256 * Always stash CR3 in %r14. This value will be restored,
Andy Lutomirskiae852492018-10-14 11:38:18 -07001257 * verbatim, at exit. Needed if paranoid_entry interrupted
1258 * another entry that already switched to the user CR3 value
1259 * but has not yet returned to userspace.
Dave Hansen16561f22018-10-12 16:21:18 -07001260 *
1261 * This is also why CS (stashed in the "iret frame" by the
1262 * hardware at entry) can not be used: this may be a return
Andy Lutomirskiae852492018-10-14 11:38:18 -07001263 * to kernel code, but with a user CR3 value.
Dave Hansen16561f22018-10-12 16:21:18 -07001264 */
Dave Hansen8a093172017-12-04 15:07:35 +01001265 SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
1266
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001267 /*
1268 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
1269 * unconditional CR3 write, even in the PTI case. So do an lfence
1270 * to prevent GS speculation, regardless of whether PTI is enabled.
1271 */
1272 FENCE_SWAPGS_KERNEL_ENTRY
1273
Dave Hansen8a093172017-12-04 15:07:35 +01001274 ret
Jiri Slabyef1e0312019-10-11 13:51:00 +02001275SYM_CODE_END(paranoid_entry)
Denys Vlasenko1eeb2072015-02-26 14:40:33 -08001276
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001277/*
1278 * "Paranoid" exit path from exception stack. This is invoked
1279 * only on return from non-NMI IST interrupts that came
1280 * from kernel space.
1281 *
1282 * We may be returning to very strange contexts (e.g. very early
1283 * in syscall entry), so checking for preemption here would
1284 * be complicated. Fortunately, we there's no good reason
1285 * to try to handle preemption here.
Ingo Molnar4d732132015-06-08 20:43:07 +02001286 *
1287 * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001288 */
Jiri Slabyef1e0312019-10-11 13:51:00 +02001289SYM_CODE_START_LOCAL(paranoid_exit)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001290 UNWIND_HINT_REGS
Jan Beulich2140a992017-02-03 02:03:25 -07001291 DISABLE_INTERRUPTS(CLBR_ANY)
Steven Rostedt5963e312012-05-30 11:54:53 -04001292 TRACE_IRQS_OFF_DEBUG
Ingo Molnar4d732132015-06-08 20:43:07 +02001293 testl %ebx, %ebx /* swapgs needed? */
Andy Lutomirskie53178322017-11-02 00:59:02 -07001294 jnz .Lparanoid_exit_no_swapgs
Denys Vlasenkof2db9382015-02-26 14:40:30 -08001295 TRACE_IRQS_IRETQ
Dave Hansen16561f22018-10-12 16:21:18 -07001296 /* Always restore stashed CR3 value (see paranoid_entry) */
Peter Zijlstra21e94452017-12-04 15:08:00 +01001297 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001298 SWAPGS_UNSAFE_STACK
Thomas Gleixner45c08382019-10-23 14:27:07 +02001299 jmp restore_regs_and_return_to_kernel
Andy Lutomirskie53178322017-11-02 00:59:02 -07001300.Lparanoid_exit_no_swapgs:
Denys Vlasenkof2db9382015-02-26 14:40:30 -08001301 TRACE_IRQS_IRETQ_DEBUG
Dave Hansen16561f22018-10-12 16:21:18 -07001302 /* Always restore stashed CR3 value (see paranoid_entry) */
Ingo Molnare4865752018-02-14 08:39:11 +01001303 RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
Andy Lutomirskie53178322017-11-02 00:59:02 -07001304 jmp restore_regs_and_return_to_kernel
Jiri Slabyef1e0312019-10-11 13:51:00 +02001305SYM_CODE_END(paranoid_exit)
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001306
1307/*
Dominik Brodowski9e809d12018-02-14 18:59:23 +01001308 * Save all registers in pt_regs, and switch GS if needed.
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001309 */
Jiri Slabyef1e0312019-10-11 13:51:00 +02001310SYM_CODE_START_LOCAL(error_entry)
Dominik Brodowski9e809d12018-02-14 18:59:23 +01001311 UNWIND_HINT_FUNC
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001312 cld
Dominik Brodowski9e809d12018-02-14 18:59:23 +01001313 PUSH_AND_CLEAR_REGS save_ret=1
1314 ENCODE_FRAME_POINTER 8
Denys Vlasenko03335e92015-04-27 15:21:52 +02001315 testb $3, CS+8(%rsp)
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001316 jz .Lerror_kernelspace
Andy Lutomirski539f5112015-06-09 12:36:01 -07001317
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001318 /*
1319 * We entered from user mode or we're pretending to have entered
1320 * from user mode due to an IRET fault.
1321 */
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001322 SWAPGS
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001323 FENCE_SWAPGS_USER_ENTRY
Dave Hansen8a093172017-12-04 15:07:35 +01001324 /* We have user CR3. Change to kernel CR3. */
1325 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
Andy Lutomirski539f5112015-06-09 12:36:01 -07001326
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001327.Lerror_entry_from_usermode_after_swapgs:
Andy Lutomirski7f2590a2017-12-04 15:07:23 +01001328 /* Put us onto the real thread stack. */
1329 popq %r12 /* save return addr in %12 */
1330 movq %rsp, %rdi /* arg0 = pt_regs pointer */
1331 call sync_regs
1332 movq %rax, %rsp /* switch stack */
1333 ENCODE_FRAME_POINTER
1334 pushq %r12
Andy Lutomirskif1075052015-11-12 12:59:00 -08001335 ret
Andy Lutomirski02bc7762015-07-03 12:44:31 -07001336
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001337.Lerror_entry_done_lfence:
1338 FENCE_SWAPGS_KERNEL_ENTRY
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001339.Lerror_entry_done:
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001340 ret
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001341
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001342 /*
1343 * There are two places in the kernel that can potentially fault with
1344 * usergs. Handle them here. B stepping K8s sometimes report a
1345 * truncated RIP for IRET exceptions returning to compat mode. Check
1346 * for these here too.
1347 */
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001348.Lerror_kernelspace:
Ingo Molnar4d732132015-06-08 20:43:07 +02001349 leaq native_irq_return_iret(%rip), %rcx
1350 cmpq %rcx, RIP+8(%rsp)
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001351 je .Lerror_bad_iret
Ingo Molnar4d732132015-06-08 20:43:07 +02001352 movl %ecx, %eax /* zero extend */
1353 cmpq %rax, RIP+8(%rsp)
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001354 je .Lbstep_iret
Borislav Petkov42c748bb2016-04-07 17:31:50 -07001355 cmpq $.Lgs_change, RIP+8(%rsp)
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001356 jne .Lerror_entry_done_lfence
Andy Lutomirski539f5112015-06-09 12:36:01 -07001357
1358 /*
Borislav Petkov42c748bb2016-04-07 17:31:50 -07001359 * hack: .Lgs_change can fail with user gsbase. If this happens, fix up
Andy Lutomirski539f5112015-06-09 12:36:01 -07001360 * gsbase and proceed. We'll fix up the exception and land in
Borislav Petkov42c748bb2016-04-07 17:31:50 -07001361 * .Lgs_change's error handler with kernel gsbase.
Andy Lutomirski539f5112015-06-09 12:36:01 -07001362 */
Wanpeng Li2fa5f042016-09-30 09:01:06 +08001363 SWAPGS
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001364 FENCE_SWAPGS_USER_ENTRY
Wanpeng Li2fa5f042016-09-30 09:01:06 +08001365 jmp .Lerror_entry_done
Brian Gerstae24ffe2009-10-12 10:18:23 -04001366
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001367.Lbstep_iret:
Brian Gerstae24ffe2009-10-12 10:18:23 -04001368 /* Fix truncated RIP */
Ingo Molnar4d732132015-06-08 20:43:07 +02001369 movq %rcx, RIP+8(%rsp)
Andy Lutomirskib645af22014-11-22 18:00:33 -08001370 /* fall through */
1371
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001372.Lerror_bad_iret:
Andy Lutomirski539f5112015-06-09 12:36:01 -07001373 /*
Dave Hansen8a093172017-12-04 15:07:35 +01001374 * We came from an IRET to user mode, so we have user
1375 * gsbase and CR3. Switch to kernel gsbase and CR3:
Andy Lutomirski539f5112015-06-09 12:36:01 -07001376 */
Andy Lutomirskib645af22014-11-22 18:00:33 -08001377 SWAPGS
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001378 FENCE_SWAPGS_USER_ENTRY
Dave Hansen8a093172017-12-04 15:07:35 +01001379 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
Andy Lutomirski539f5112015-06-09 12:36:01 -07001380
1381 /*
1382 * Pretend that the exception came from user mode: set up pt_regs
Andy Lutomirskib3681dd2018-07-22 11:05:09 -07001383 * as if we faulted immediately after IRET.
Andy Lutomirski539f5112015-06-09 12:36:01 -07001384 */
Ingo Molnar4d732132015-06-08 20:43:07 +02001385 mov %rsp, %rdi
1386 call fixup_bad_iret
1387 mov %rax, %rsp
Andy Lutomirskicb6f64e2015-07-03 12:44:27 -07001388 jmp .Lerror_entry_from_usermode_after_swapgs
Jiri Slabyef1e0312019-10-11 13:51:00 +02001389SYM_CODE_END(error_entry)
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001390
Jiri Slabyef1e0312019-10-11 13:51:00 +02001391SYM_CODE_START_LOCAL(error_exit)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001392 UNWIND_HINT_REGS
Jan Beulich2140a992017-02-03 02:03:25 -07001393 DISABLE_INTERRUPTS(CLBR_ANY)
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001394 TRACE_IRQS_OFF
Andy Lutomirskib3681dd2018-07-22 11:05:09 -07001395 testb $3, CS(%rsp)
1396 jz retint_kernel
Jiri Slaby30a24412019-10-11 11:22:13 +02001397 jmp .Lretint_user
Jiri Slabyef1e0312019-10-11 13:51:00 +02001398SYM_CODE_END(error_exit)
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001399
Thomas Gleixner424c7d02020-03-26 16:56:20 +01001400SYM_CODE_START_LOCAL(error_return)
1401 UNWIND_HINT_REGS
1402 DEBUG_ENTRY_ASSERT_IRQS_OFF
1403 testb $3, CS(%rsp)
1404 jz restore_regs_and_return_to_kernel
1405 jmp swapgs_restore_regs_and_return_to_usermode
1406SYM_CODE_END(error_return)
1407
Andy Lutomirski929bace2017-11-02 00:59:08 -07001408/*
1409 * Runs on exception stack. Xen PV does not go through this path at all,
1410 * so we can use real assembly here.
Dave Hansen8a093172017-12-04 15:07:35 +01001411 *
1412 * Registers:
1413 * %r14: Used to save/restore the CR3 of the interrupted context
1414 * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
Andy Lutomirski929bace2017-11-02 00:59:08 -07001415 */
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001416SYM_CODE_START(asm_exc_nmi)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001417 UNWIND_HINT_IRET_REGS
Andy Lutomirski929bace2017-11-02 00:59:08 -07001418
Andy Lutomirskifc57a7c2015-09-20 16:32:04 -07001419 /*
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001420 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1421 * the iretq it performs will take us out of NMI context.
1422 * This means that we can have nested NMIs where the next
1423 * NMI is using the top of the stack of the previous NMI. We
1424 * can't let it execute because the nested NMI will corrupt the
1425 * stack of the previous NMI. NMI handlers are not re-entrant
1426 * anyway.
1427 *
1428 * To handle this case we do the following:
1429 * Check the a special location on the stack that contains
1430 * a variable that is set when NMIs are executing.
1431 * The interrupted task's stack is also checked to see if it
1432 * is an NMI stack.
1433 * If the variable is not set and the stack is not the NMI
1434 * stack then:
1435 * o Set the special variable on the stack
Andy Lutomirski0b229302015-07-15 10:29:36 -07001436 * o Copy the interrupt frame into an "outermost" location on the
1437 * stack
1438 * o Copy the interrupt frame into an "iret" location on the stack
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001439 * o Continue processing the NMI
1440 * If the variable is set or the previous stack is the NMI stack:
Andy Lutomirski0b229302015-07-15 10:29:36 -07001441 * o Modify the "iret" location to jump to the repeat_nmi
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001442 * o return back to the first NMI
1443 *
1444 * Now on exit of the first NMI, we first clear the stack variable
1445 * The NMI stack will tell any nested NMIs at that point that it is
1446 * nested. Then we pop the stack normally with iret, and if there was
1447 * a nested NMI that updated the copy interrupt stack frame, a
1448 * jump will be made to the repeat_nmi code that will handle the second
1449 * NMI.
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001450 *
1451 * However, espfix prevents us from directly returning to userspace
1452 * with a single IRET instruction. Similarly, IRET to user mode
1453 * can fault. We therefore handle NMIs from user space like
1454 * other IST entries.
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001455 */
1456
Andy Lutomirskie93c1732017-08-07 19:43:13 -07001457 ASM_CLAC
1458
Denys Vlasenko146b2b02015-03-25 18:18:13 +01001459 /* Use %rdx as our temp variable throughout */
Ingo Molnar4d732132015-06-08 20:43:07 +02001460 pushq %rdx
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001461
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001462 testb $3, CS-RIP+8(%rsp)
1463 jz .Lnmi_from_kernel
Steven Rostedt45d5a162012-02-19 16:43:37 -05001464
1465 /*
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001466 * NMI from user mode. We need to run on the thread stack, but we
1467 * can't go through the normal entry paths: NMIs are masked, and
1468 * we don't want to enable interrupts, because then we'll end
1469 * up in an awkward situation in which IRQs are on but NMIs
1470 * are off.
Andy Lutomirski83c133c2015-09-20 16:32:05 -07001471 *
1472 * We also must not push anything to the stack before switching
1473 * stacks lest we corrupt the "NMI executing" variable.
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001474 */
1475
Andy Lutomirski929bace2017-11-02 00:59:08 -07001476 swapgs
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001477 cld
Josh Poimboeuf18ec54f2019-07-08 11:52:25 -05001478 FENCE_SWAPGS_USER_ENTRY
Dave Hansen8a093172017-12-04 15:07:35 +01001479 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001480 movq %rsp, %rdx
1481 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001482 UNWIND_HINT_IRET_REGS base=%rdx offset=8
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001483 pushq 5*8(%rdx) /* pt_regs->ss */
1484 pushq 4*8(%rdx) /* pt_regs->rsp */
1485 pushq 3*8(%rdx) /* pt_regs->flags */
1486 pushq 2*8(%rdx) /* pt_regs->cs */
1487 pushq 1*8(%rdx) /* pt_regs->rip */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001488 UNWIND_HINT_IRET_REGS
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001489 pushq $-1 /* pt_regs->orig_ax */
Dominik Brodowski30907fd2018-02-11 11:49:46 +01001490 PUSH_AND_CLEAR_REGS rdx=(%rdx)
Josh Poimboeuf946c1912016-10-20 11:34:40 -05001491 ENCODE_FRAME_POINTER
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001492
1493 /*
1494 * At this point we no longer need to worry about stack damage
1495 * due to nesting -- we're on the normal thread stack and we're
1496 * done with the NMI stack.
1497 */
1498
1499 movq %rsp, %rdi
1500 movq $-1, %rsi
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001501 call exc_nmi
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001502
1503 /*
1504 * Return back to user mode. We must *not* do the normal exit
Josh Poimboeuf946c1912016-10-20 11:34:40 -05001505 * work, because we don't want to enable interrupts.
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001506 */
Andy Lutomirski8a055d72017-11-02 00:59:00 -07001507 jmp swapgs_restore_regs_and_return_to_usermode
Andy Lutomirski9b6e6a82015-07-15 10:29:35 -07001508
1509.Lnmi_from_kernel:
1510 /*
Andy Lutomirski0b229302015-07-15 10:29:36 -07001511 * Here's what our stack frame will look like:
1512 * +---------------------------------------------------------+
1513 * | original SS |
1514 * | original Return RSP |
1515 * | original RFLAGS |
1516 * | original CS |
1517 * | original RIP |
1518 * +---------------------------------------------------------+
1519 * | temp storage for rdx |
1520 * +---------------------------------------------------------+
1521 * | "NMI executing" variable |
1522 * +---------------------------------------------------------+
1523 * | iret SS } Copied from "outermost" frame |
1524 * | iret Return RSP } on each loop iteration; overwritten |
1525 * | iret RFLAGS } by a nested NMI to force another |
1526 * | iret CS } iteration if needed. |
1527 * | iret RIP } |
1528 * +---------------------------------------------------------+
1529 * | outermost SS } initialized in first_nmi; |
1530 * | outermost Return RSP } will not be changed before |
1531 * | outermost RFLAGS } NMI processing is done. |
1532 * | outermost CS } Copied to "iret" frame on each |
1533 * | outermost RIP } iteration. |
1534 * +---------------------------------------------------------+
1535 * | pt_regs |
1536 * +---------------------------------------------------------+
1537 *
1538 * The "original" frame is used by hardware. Before re-enabling
1539 * NMIs, we need to be done with it, and we need to leave enough
1540 * space for the asm code here.
1541 *
1542 * We return by executing IRET while RSP points to the "iret" frame.
1543 * That will either return for real or it will loop back into NMI
1544 * processing.
1545 *
1546 * The "outermost" frame is copied to the "iret" frame on each
1547 * iteration of the loop, so each iteration starts with the "iret"
1548 * frame pointing to the final return target.
1549 */
1550
1551 /*
1552 * Determine whether we're a nested NMI.
1553 *
Andy Lutomirskia27507c2015-07-15 10:29:37 -07001554 * If we interrupted kernel code between repeat_nmi and
1555 * end_repeat_nmi, then we are a nested NMI. We must not
1556 * modify the "iret" frame because it's being written by
1557 * the outer NMI. That's okay; the outer NMI handler is
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001558 * about to about to call exc_nmi() anyway, so we can just
Andy Lutomirskia27507c2015-07-15 10:29:37 -07001559 * resume the outer NMI.
1560 */
1561
1562 movq $repeat_nmi, %rdx
1563 cmpq 8(%rsp), %rdx
1564 ja 1f
1565 movq $end_repeat_nmi, %rdx
1566 cmpq 8(%rsp), %rdx
1567 ja nested_nmi_out
15681:
1569
1570 /*
1571 * Now check "NMI executing". If it's set, then we're nested.
Andy Lutomirski0b229302015-07-15 10:29:36 -07001572 * This will not detect if we interrupted an outer NMI just
1573 * before IRET.
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001574 */
Ingo Molnar4d732132015-06-08 20:43:07 +02001575 cmpl $1, -8(%rsp)
1576 je nested_nmi
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001577
1578 /*
Andy Lutomirski0b229302015-07-15 10:29:36 -07001579 * Now test if the previous stack was an NMI stack. This covers
1580 * the case where we interrupt an outer NMI after it clears
Andy Lutomirski810bc072015-07-15 10:29:38 -07001581 * "NMI executing" but before IRET. We need to be careful, though:
1582 * there is one case in which RSP could point to the NMI stack
1583 * despite there being no NMI active: naughty userspace controls
1584 * RSP at the very beginning of the SYSCALL targets. We can
1585 * pull a fast one on naughty userspace, though: we program
1586 * SYSCALL to mask DF, so userspace cannot cause DF to be set
1587 * if it controls the kernel's RSP. We set DF before we clear
1588 * "NMI executing".
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001589 */
Denys Vlasenko0784b362015-04-01 16:50:57 +02001590 lea 6*8(%rsp), %rdx
1591 /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
1592 cmpq %rdx, 4*8(%rsp)
1593 /* If the stack pointer is above the NMI stack, this is a normal NMI */
1594 ja first_nmi
Ingo Molnar4d732132015-06-08 20:43:07 +02001595
Denys Vlasenko0784b362015-04-01 16:50:57 +02001596 subq $EXCEPTION_STKSZ, %rdx
1597 cmpq %rdx, 4*8(%rsp)
1598 /* If it is below the NMI stack, it is a normal NMI */
1599 jb first_nmi
Andy Lutomirski810bc072015-07-15 10:29:38 -07001600
1601 /* Ah, it is within the NMI stack. */
1602
1603 testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
1604 jz first_nmi /* RSP was user controlled. */
1605
1606 /* This is a nested NMI. */
Denys Vlasenko0784b362015-04-01 16:50:57 +02001607
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001608nested_nmi:
1609 /*
Andy Lutomirski0b229302015-07-15 10:29:36 -07001610 * Modify the "iret" frame to point to repeat_nmi, forcing another
1611 * iteration of NMI handling.
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001612 */
Andy Lutomirski23a781e2015-07-15 10:29:39 -07001613 subq $8, %rsp
Ingo Molnar4d732132015-06-08 20:43:07 +02001614 leaq -10*8(%rsp), %rdx
1615 pushq $__KERNEL_DS
1616 pushq %rdx
Ingo Molnar131484c2015-05-28 12:21:47 +02001617 pushfq
Ingo Molnar4d732132015-06-08 20:43:07 +02001618 pushq $__KERNEL_CS
1619 pushq $repeat_nmi
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001620
1621 /* Put stack back */
Ingo Molnar4d732132015-06-08 20:43:07 +02001622 addq $(6*8), %rsp
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001623
1624nested_nmi_out:
Ingo Molnar4d732132015-06-08 20:43:07 +02001625 popq %rdx
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001626
Andy Lutomirski0b229302015-07-15 10:29:36 -07001627 /* We are returning to kernel mode, so this cannot result in a fault. */
Andy Lutomirski929bace2017-11-02 00:59:08 -07001628 iretq
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001629
1630first_nmi:
Andy Lutomirski0b229302015-07-15 10:29:36 -07001631 /* Restore rdx. */
Ingo Molnar4d732132015-06-08 20:43:07 +02001632 movq (%rsp), %rdx
Jan Beulich62610912012-02-24 14:54:37 +00001633
Andy Lutomirski36f1a772015-07-15 10:29:40 -07001634 /* Make room for "NMI executing". */
1635 pushq $0
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001636
Andy Lutomirski0b229302015-07-15 10:29:36 -07001637 /* Leave room for the "iret" frame */
Ingo Molnar4d732132015-06-08 20:43:07 +02001638 subq $(5*8), %rsp
Salman Qazi28696f42012-10-01 17:29:25 -07001639
Andy Lutomirski0b229302015-07-15 10:29:36 -07001640 /* Copy the "original" frame to the "outermost" frame */
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001641 .rept 5
Ingo Molnar4d732132015-06-08 20:43:07 +02001642 pushq 11*8(%rsp)
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001643 .endr
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001644 UNWIND_HINT_IRET_REGS
Jan Beulich62610912012-02-24 14:54:37 +00001645
Steven Rostedt79fb4ad2012-02-24 15:55:13 -05001646 /* Everything up to here is safe from nested NMIs */
1647
Andy Lutomirskia97439a2015-07-15 10:29:41 -07001648#ifdef CONFIG_DEBUG_ENTRY
1649 /*
1650 * For ease of testing, unmask NMIs right away. Disabled by
1651 * default because IRET is very expensive.
1652 */
1653 pushq $0 /* SS */
1654 pushq %rsp /* RSP (minus 8 because of the previous push) */
1655 addq $8, (%rsp) /* Fix up RSP */
1656 pushfq /* RFLAGS */
1657 pushq $__KERNEL_CS /* CS */
1658 pushq $1f /* RIP */
Andy Lutomirski929bace2017-11-02 00:59:08 -07001659 iretq /* continues at repeat_nmi below */
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001660 UNWIND_HINT_IRET_REGS
Andy Lutomirskia97439a2015-07-15 10:29:41 -070016611:
1662#endif
1663
Andy Lutomirski0b229302015-07-15 10:29:36 -07001664repeat_nmi:
Jan Beulich62610912012-02-24 14:54:37 +00001665 /*
1666 * If there was a nested NMI, the first NMI's iret will return
1667 * here. But NMIs are still enabled and we can take another
1668 * nested NMI. The nested NMI checks the interrupted RIP to see
1669 * if it is between repeat_nmi and end_repeat_nmi, and if so
1670 * it will just return, as we are about to repeat an NMI anyway.
1671 * This makes it safe to copy to the stack frame that a nested
1672 * NMI will update.
Andy Lutomirski0b229302015-07-15 10:29:36 -07001673 *
1674 * RSP is pointing to "outermost RIP". gsbase is unknown, but, if
1675 * we're repeating an NMI, gsbase has the same value that it had on
1676 * the first iteration. paranoid_entry will load the kernel
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001677 * gsbase if needed before we call exc_nmi(). "NMI executing"
Andy Lutomirski36f1a772015-07-15 10:29:40 -07001678 * is zero.
Jan Beulich62610912012-02-24 14:54:37 +00001679 */
Andy Lutomirski36f1a772015-07-15 10:29:40 -07001680 movq $1, 10*8(%rsp) /* Set "NMI executing". */
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001681
Andy Lutomirski0b229302015-07-15 10:29:36 -07001682 /*
1683 * Copy the "outermost" frame to the "iret" frame. NMIs that nest
1684 * here must not modify the "iret" frame while we're writing to
1685 * it or it will end up containing garbage.
1686 */
Ingo Molnar4d732132015-06-08 20:43:07 +02001687 addq $(10*8), %rsp
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001688 .rept 5
Ingo Molnar4d732132015-06-08 20:43:07 +02001689 pushq -6*8(%rsp)
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001690 .endr
Ingo Molnar4d732132015-06-08 20:43:07 +02001691 subq $(5*8), %rsp
Jan Beulich62610912012-02-24 14:54:37 +00001692end_repeat_nmi:
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001693
1694 /*
Andy Lutomirski0b229302015-07-15 10:29:36 -07001695 * Everything below this point can be preempted by a nested NMI.
1696 * If this happens, then the inner NMI will change the "iret"
1697 * frame to point back to repeat_nmi.
Steven Rostedt3f3c8b82011-12-08 12:36:23 -05001698 */
Ingo Molnar4d732132015-06-08 20:43:07 +02001699 pushq $-1 /* ORIG_RAX: no syscall to restart */
Denys Vlasenko76f5df42015-02-26 14:40:27 -08001700
Steven Rostedt1fd466e2011-12-08 12:32:27 -05001701 /*
Denys Vlasenkoebfc4532015-02-26 14:40:34 -08001702 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
Steven Rostedt1fd466e2011-12-08 12:32:27 -05001703 * as we should not be calling schedule in NMI context.
1704 * Even with normal interrupts enabled. An NMI should not be
1705 * setting NEED_RESCHED or anything that normal interrupts and
1706 * exceptions might do.
1707 */
Ingo Molnar4d732132015-06-08 20:43:07 +02001708 call paranoid_entry
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001709 UNWIND_HINT_REGS
Steven Rostedt7fbb98c2012-06-07 10:21:21 -04001710
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001711 /* paranoidentry exc_nmi(), 0; without TRACE_IRQS_OFF */
Ingo Molnar4d732132015-06-08 20:43:07 +02001712 movq %rsp, %rdi
1713 movq $-1, %rsi
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001714 call exc_nmi
Steven Rostedt7fbb98c2012-06-07 10:21:21 -04001715
Dave Hansen16561f22018-10-12 16:21:18 -07001716 /* Always restore stashed CR3 value (see paranoid_entry) */
Peter Zijlstra21e94452017-12-04 15:08:00 +01001717 RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
Dave Hansen8a093172017-12-04 15:07:35 +01001718
Ingo Molnar4d732132015-06-08 20:43:07 +02001719 testl %ebx, %ebx /* swapgs needed? */
1720 jnz nmi_restore
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001721nmi_swapgs:
1722 SWAPGS_UNSAFE_STACK
1723nmi_restore:
Dominik Brodowski502af0d2018-02-11 11:49:43 +01001724 POP_REGS
Andy Lutomirski0b229302015-07-15 10:29:36 -07001725
Andy Lutomirski471ee482017-11-02 00:59:05 -07001726 /*
1727 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
1728 * at the "iret" frame.
1729 */
1730 addq $6*8, %rsp
Salman Qazi28696f42012-10-01 17:29:25 -07001731
Andy Lutomirski810bc072015-07-15 10:29:38 -07001732 /*
1733 * Clear "NMI executing". Set DF first so that we can easily
1734 * distinguish the remaining code between here and IRET from
Andy Lutomirski929bace2017-11-02 00:59:08 -07001735 * the SYSCALL entry and exit paths.
1736 *
1737 * We arguably should just inspect RIP instead, but I (Andy) wrote
1738 * this code when I had the misapprehension that Xen PV supported
1739 * NMIs, and Xen PV would break that approach.
Andy Lutomirski810bc072015-07-15 10:29:38 -07001740 */
1741 std
1742 movq $0, 5*8(%rsp) /* clear "NMI executing" */
Andy Lutomirski0b229302015-07-15 10:29:36 -07001743
1744 /*
Andy Lutomirski929bace2017-11-02 00:59:08 -07001745 * iretq reads the "iret" frame and exits the NMI stack in a
1746 * single instruction. We are returning to kernel mode, so this
1747 * cannot result in a fault. Similarly, we don't need to worry
1748 * about espfix64 on the way back to kernel mode.
Andy Lutomirski0b229302015-07-15 10:29:36 -07001749 */
Andy Lutomirski929bace2017-11-02 00:59:08 -07001750 iretq
Thomas Gleixner6271fef2020-02-25 23:33:25 +01001751SYM_CODE_END(asm_exc_nmi)
Alexander van Heukelumddeb8f22008-11-24 13:24:28 +01001752
Andy Lutomirskidffb3f92019-07-01 20:43:20 -07001753#ifndef CONFIG_IA32_EMULATION
1754/*
1755 * This handles SYSCALL from 32-bit code. There is no way to program
1756 * MSRs to fully disable 32-bit SYSCALL.
1757 */
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001758SYM_CODE_START(ignore_sysret)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001759 UNWIND_HINT_EMPTY
Ingo Molnar4d732132015-06-08 20:43:07 +02001760 mov $-ENOSYS, %eax
Jan Beulichb2b1d942019-12-16 11:40:03 +01001761 sysretl
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001762SYM_CODE_END(ignore_sysret)
Andy Lutomirskidffb3f92019-07-01 20:43:20 -07001763#endif
Andy Lutomirski2deb4be2016-07-14 13:22:55 -07001764
Thomas Gleixnerb9f69762020-03-25 19:45:26 +01001765.pushsection .text, "ax"
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001766SYM_CODE_START(rewind_stack_do_exit)
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001767 UNWIND_HINT_FUNC
Andy Lutomirski2deb4be2016-07-14 13:22:55 -07001768 /* Prevent any naive code from trying to unwind to our caller. */
1769 xorl %ebp, %ebp
1770
1771 movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
Josh Poimboeuf8c1f7552017-07-11 10:33:44 -05001772 leaq -PTREGS_SIZE(%rax), %rsp
Jann Hornf977df72020-04-25 05:03:04 -05001773 UNWIND_HINT_REGS
Andy Lutomirski2deb4be2016-07-14 13:22:55 -07001774
1775 call do_exit
Jiri Slabybc7b11c2019-10-11 13:51:03 +02001776SYM_CODE_END(rewind_stack_do_exit)
Thomas Gleixnerb9f69762020-03-25 19:45:26 +01001777.popsection