blob: 9060ba6497e2714c6bbb73aab34b4879bc9e630c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
Andi Kleen2e91a172006-09-26 10:52:29 +020023 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 */
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/linkage.h>
41#include <asm/segment.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
Sam Ravnborge2d5df92005-09-09 21:28:48 +020046#include <asm/asm-offsets.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
Andi Kleen5f8efbb2006-01-16 01:56:39 +010051#include <asm/page.h>
Ingo Molnar2601e642006-07-03 00:24:45 -070052#include <asm/irqflags.h>
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +010053#include <asm/paravirt.h>
Abhishek Sagar395a59d2008-06-21 23:47:27 +053054#include <asm/ftrace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
Roland McGrath86a1c342008-06-23 15:37:04 -070056/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h>
58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
59#define __AUDIT_ARCH_64BIT 0x80000000
60#define __AUDIT_ARCH_LE 0x40000000
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 .code64
63
Steven Rostedt606576c2008-10-06 19:06:12 -040064#ifdef CONFIG_FUNCTION_TRACER
Steven Rostedtd61f82d2008-05-12 21:20:43 +020065#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount)
Steven Rostedtd61f82d2008-05-12 21:20:43 +020067 retq
68END(mcount)
69
70ENTRY(ftrace_caller)
Steven Rostedt60a7ecf2008-11-05 16:05:44 -050071 cmpl $0, function_trace_stop
72 jne ftrace_stub
Steven Rostedtd61f82d2008-05-12 21:20:43 +020073
74 /* taken from glibc */
75 subq $0x38, %rsp
76 movq %rax, (%rsp)
77 movq %rcx, 8(%rsp)
78 movq %rdx, 16(%rsp)
79 movq %rsi, 24(%rsp)
80 movq %rdi, 32(%rsp)
81 movq %r8, 40(%rsp)
82 movq %r9, 48(%rsp)
83
84 movq 0x38(%rsp), %rdi
85 movq 8(%rbp), %rsi
Abhishek Sagar395a59d2008-06-21 23:47:27 +053086 subq $MCOUNT_INSN_SIZE, %rdi
Steven Rostedtd61f82d2008-05-12 21:20:43 +020087
88.globl ftrace_call
89ftrace_call:
90 call ftrace_stub
91
92 movq 48(%rsp), %r9
93 movq 40(%rsp), %r8
94 movq 32(%rsp), %rdi
95 movq 24(%rsp), %rsi
96 movq 16(%rsp), %rdx
97 movq 8(%rsp), %rcx
98 movq (%rsp), %rax
99 addq $0x38, %rsp
100
Frederic Weisbecker48d68b22008-12-02 00:20:39 +0100101#ifdef CONFIG_FUNCTION_GRAPH_TRACER
102.globl ftrace_graph_call
103ftrace_graph_call:
104 jmp ftrace_stub
105#endif
106
Steven Rostedtd61f82d2008-05-12 21:20:43 +0200107.globl ftrace_stub
108ftrace_stub:
109 retq
110END(ftrace_caller)
111
112#else /* ! CONFIG_DYNAMIC_FTRACE */
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200113ENTRY(mcount)
Steven Rostedt60a7ecf2008-11-05 16:05:44 -0500114 cmpl $0, function_trace_stop
115 jne ftrace_stub
116
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200117 cmpq $ftrace_stub, ftrace_trace_function
118 jnz trace
Frederic Weisbecker48d68b22008-12-02 00:20:39 +0100119
120#ifdef CONFIG_FUNCTION_GRAPH_TRACER
121 cmpq $ftrace_stub, ftrace_graph_return
122 jnz ftrace_graph_caller
123#endif
124
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200125.globl ftrace_stub
126ftrace_stub:
127 retq
128
129trace:
130 /* taken from glibc */
131 subq $0x38, %rsp
132 movq %rax, (%rsp)
133 movq %rcx, 8(%rsp)
134 movq %rdx, 16(%rsp)
135 movq %rsi, 24(%rsp)
136 movq %rdi, 32(%rsp)
137 movq %r8, 40(%rsp)
138 movq %r9, 48(%rsp)
139
140 movq 0x38(%rsp), %rdi
141 movq 8(%rbp), %rsi
Abhishek Sagar395a59d2008-06-21 23:47:27 +0530142 subq $MCOUNT_INSN_SIZE, %rdi
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200143
144 call *ftrace_trace_function
145
146 movq 48(%rsp), %r9
147 movq 40(%rsp), %r8
148 movq 32(%rsp), %rdi
149 movq 24(%rsp), %rsi
150 movq 16(%rsp), %rdx
151 movq 8(%rsp), %rcx
152 movq (%rsp), %rax
153 addq $0x38, %rsp
154
155 jmp ftrace_stub
156END(mcount)
Steven Rostedtd61f82d2008-05-12 21:20:43 +0200157#endif /* CONFIG_DYNAMIC_FTRACE */
Steven Rostedt606576c2008-10-06 19:06:12 -0400158#endif /* CONFIG_FUNCTION_TRACER */
Arnaldo Carvalho de Melo16444a82008-05-12 21:20:42 +0200159
Frederic Weisbecker48d68b22008-12-02 00:20:39 +0100160#ifdef CONFIG_FUNCTION_GRAPH_TRACER
161ENTRY(ftrace_graph_caller)
162 cmpl $0, function_trace_stop
163 jne ftrace_stub
164
165 subq $0x38, %rsp
166 movq %rax, (%rsp)
167 movq %rcx, 8(%rsp)
168 movq %rdx, 16(%rsp)
169 movq %rsi, 24(%rsp)
170 movq %rdi, 32(%rsp)
171 movq %r8, 40(%rsp)
172 movq %r9, 48(%rsp)
173
174 leaq 8(%rbp), %rdi
175 movq 0x38(%rsp), %rsi
Steven Rostedtbb4304c2008-12-02 15:34:09 -0500176 subq $MCOUNT_INSN_SIZE, %rsi
Frederic Weisbecker48d68b22008-12-02 00:20:39 +0100177
178 call prepare_ftrace_return
179
180 movq 48(%rsp), %r9
181 movq 40(%rsp), %r8
182 movq 32(%rsp), %rdi
183 movq 24(%rsp), %rsi
184 movq 16(%rsp), %rdx
185 movq 8(%rsp), %rcx
186 movq (%rsp), %rax
187 addq $0x38, %rsp
188 retq
189END(ftrace_graph_caller)
190
191
192.globl return_to_handler
193return_to_handler:
194 subq $80, %rsp
195
196 movq %rax, (%rsp)
197 movq %rcx, 8(%rsp)
198 movq %rdx, 16(%rsp)
199 movq %rsi, 24(%rsp)
200 movq %rdi, 32(%rsp)
201 movq %r8, 40(%rsp)
202 movq %r9, 48(%rsp)
203 movq %r10, 56(%rsp)
204 movq %r11, 64(%rsp)
205
206 call ftrace_return_to_handler
207
208 movq %rax, 72(%rsp)
209 movq 64(%rsp), %r11
210 movq 56(%rsp), %r10
211 movq 48(%rsp), %r9
212 movq 40(%rsp), %r8
213 movq 32(%rsp), %rdi
214 movq 24(%rsp), %rsi
215 movq 16(%rsp), %rdx
216 movq 8(%rsp), %rcx
217 movq (%rsp), %rax
218 addq $72, %rsp
219 retq
220#endif
221
222
Andi Kleendc37db42005-04-16 15:25:05 -0700223#ifndef CONFIG_PREEMPT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224#define retint_kernel retint_restore_args
225#endif
Ingo Molnar2601e642006-07-03 00:24:45 -0700226
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100227#ifdef CONFIG_PARAVIRT
Jeremy Fitzhardinge2be29982008-06-25 00:19:28 -0400228ENTRY(native_usergs_sysret64)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100229 swapgs
230 sysretq
231#endif /* CONFIG_PARAVIRT */
232
Ingo Molnar2601e642006-07-03 00:24:45 -0700233
234.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
235#ifdef CONFIG_TRACE_IRQFLAGS
236 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
237 jnc 1f
238 TRACE_IRQS_ON
2391:
240#endif
241.endm
242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243/*
244 * C code is not supposed to know about undefined top of stack. Every time
245 * a C function with an pt_regs argument is called from the SYSCALL based
246 * fast path FIXUP_TOP_OF_STACK is needed.
247 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
248 * manipulation.
249 */
250
251 /* %rsp:at FRAMEEND */
252 .macro FIXUP_TOP_OF_STACK tmp
253 movq %gs:pda_oldrsp,\tmp
254 movq \tmp,RSP(%rsp)
255 movq $__USER_DS,SS(%rsp)
256 movq $__USER_CS,CS(%rsp)
257 movq $-1,RCX(%rsp)
258 movq R11(%rsp),\tmp /* get eflags */
259 movq \tmp,EFLAGS(%rsp)
260 .endm
261
262 .macro RESTORE_TOP_OF_STACK tmp,offset=0
263 movq RSP-\offset(%rsp),\tmp
264 movq \tmp,%gs:pda_oldrsp
265 movq EFLAGS-\offset(%rsp),\tmp
266 movq \tmp,R11-\offset(%rsp)
267 .endm
268
269 .macro FAKE_STACK_FRAME child_rip
270 /* push in order ss, rsp, eflags, cs, rip */
Andi Kleen3829ee62005-07-28 21:15:48 -0700271 xorl %eax, %eax
Jeremy Fitzhardingee04e0a62008-06-25 00:19:25 -0400272 pushq $__KERNEL_DS /* ss */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200274 /*CFI_REL_OFFSET ss,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 pushq %rax /* rsp */
276 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200277 CFI_REL_OFFSET rsp,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 pushq $(1<<9) /* eflags - interrupts on */
279 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200280 /*CFI_REL_OFFSET rflags,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 pushq $__KERNEL_CS /* cs */
282 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200283 /*CFI_REL_OFFSET cs,0*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 pushq \child_rip /* rip */
285 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich7effaa82005-09-12 18:49:24 +0200286 CFI_REL_OFFSET rip,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 pushq %rax /* orig rax */
288 CFI_ADJUST_CFA_OFFSET 8
289 .endm
290
291 .macro UNFAKE_STACK_FRAME
292 addq $8*6, %rsp
293 CFI_ADJUST_CFA_OFFSET -(6*8)
294 .endm
295
Jan Beulich7effaa82005-09-12 18:49:24 +0200296 .macro CFI_DEFAULT_STACK start=1
297 .if \start
298 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200299 CFI_SIGNAL_FRAME
Jan Beulich7effaa82005-09-12 18:49:24 +0200300 CFI_DEF_CFA rsp,SS+8
301 .else
302 CFI_DEF_CFA_OFFSET SS+8
303 .endif
304 CFI_REL_OFFSET r15,R15
305 CFI_REL_OFFSET r14,R14
306 CFI_REL_OFFSET r13,R13
307 CFI_REL_OFFSET r12,R12
308 CFI_REL_OFFSET rbp,RBP
309 CFI_REL_OFFSET rbx,RBX
310 CFI_REL_OFFSET r11,R11
311 CFI_REL_OFFSET r10,R10
312 CFI_REL_OFFSET r9,R9
313 CFI_REL_OFFSET r8,R8
314 CFI_REL_OFFSET rax,RAX
315 CFI_REL_OFFSET rcx,RCX
316 CFI_REL_OFFSET rdx,RDX
317 CFI_REL_OFFSET rsi,RSI
318 CFI_REL_OFFSET rdi,RDI
319 CFI_REL_OFFSET rip,RIP
320 /*CFI_REL_OFFSET cs,CS*/
321 /*CFI_REL_OFFSET rflags,EFLAGS*/
322 CFI_REL_OFFSET rsp,RSP
323 /*CFI_REL_OFFSET ss,SS*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 .endm
325/*
326 * A newly forked process directly context switches into this.
327 */
328/* rdi: prev */
329ENTRY(ret_from_fork)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 CFI_DEFAULT_STACK
Andi Kleen658fdbe2006-09-26 10:52:41 +0200331 push kernel_eflags(%rip)
Alexander van Heukelume0a5a5d2008-07-22 18:14:16 +0200332 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen658fdbe2006-09-26 10:52:41 +0200333 popf # reset kernel eflags
Alexander van Heukelume0a5a5d2008-07-22 18:14:16 +0200334 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 call schedule_tail
336 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300337 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 jnz rff_trace
339rff_action:
340 RESTORE_REST
341 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
342 je int_ret_from_sys_call
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300343 testl $_TIF_IA32,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 jnz int_ret_from_sys_call
345 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
346 jmp ret_from_sys_call
347rff_trace:
348 movq %rsp,%rdi
349 call syscall_trace_leave
350 GET_THREAD_INFO(%rcx)
351 jmp rff_action
352 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200353END(ret_from_fork)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354
355/*
356 * System call entry. Upto 6 arguments in registers are supported.
357 *
358 * SYSCALL does not save anything on the stack and does not change the
359 * stack pointer.
360 */
361
362/*
363 * Register setup:
364 * rax system call number
365 * rdi arg0
366 * rcx return address for syscall/sysret, C arg3
367 * rsi arg1
368 * rdx arg2
369 * r10 arg3 (--> moved to rcx for C)
370 * r8 arg4
371 * r9 arg5
372 * r11 eflags for syscall/sysret, temporary for C
373 * r12-r15,rbp,rbx saved by C code, not touched.
374 *
375 * Interrupts are off on entry.
376 * Only called from user space.
377 *
378 * XXX if we had a free scratch register we could save the RSP into the stack frame
379 * and report it properly in ps. Unfortunately we haven't.
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200380 *
381 * When user can change the frames always force IRET. That is because
382 * it deals with uncanonical addresses better. SYSRET has trouble
383 * with them due to bugs in both AMD and Intel CPUs.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 */
385
386ENTRY(system_call)
Jan Beulich7effaa82005-09-12 18:49:24 +0200387 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200388 CFI_SIGNAL_FRAME
Jan Beulichdffead42006-06-26 13:57:38 +0200389 CFI_DEF_CFA rsp,PDA_STACKOFFSET
Jan Beulich7effaa82005-09-12 18:49:24 +0200390 CFI_REGISTER rip,rcx
391 /*CFI_REGISTER rflags,r11*/
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100392 SWAPGS_UNSAFE_STACK
393 /*
394 * A hypervisor implementation might want to use a label
395 * after the swapgs, so that it can do the swapgs
396 * for the guest and jump here on syscall.
397 */
398ENTRY(system_call_after_swapgs)
399
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 movq %rsp,%gs:pda_oldrsp
401 movq %gs:pda_kernelstack,%rsp
Ingo Molnar2601e642006-07-03 00:24:45 -0700402 /*
403 * No need to follow this irqs off/on section - it's straight
404 * and short:
405 */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100406 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 SAVE_ARGS 8,1
408 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
Jan Beulich7effaa82005-09-12 18:49:24 +0200409 movq %rcx,RIP-ARGOFFSET(%rsp)
410 CFI_REL_OFFSET rip,RIP-ARGOFFSET
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 GET_THREAD_INFO(%rcx)
Roland McGrathd4d67152008-07-09 02:38:07 -0700412 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 jnz tracesys
Roland McGrath86a1c342008-06-23 15:37:04 -0700414system_call_fastpath:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 cmpq $__NR_syscall_max,%rax
416 ja badsys
417 movq %r10,%rcx
418 call *sys_call_table(,%rax,8) # XXX: rip relative
419 movq %rax,RAX-ARGOFFSET(%rsp)
420/*
421 * Syscall return path ending with SYSRET (fast path)
422 * Has incomplete stack frame and undefined top of stack.
423 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424ret_from_sys_call:
Andi Kleen11b854b2005-04-16 15:25:02 -0700425 movl $_TIF_ALLWORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 /* edi: flagmask */
427sysret_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200428 LOCKDEP_SYS_EXIT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 GET_THREAD_INFO(%rcx)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100430 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700431 TRACE_IRQS_OFF
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300432 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 andl %edi,%edx
434 jnz sysret_careful
Jan Beulichbcddc012006-12-07 02:14:02 +0100435 CFI_REMEMBER_STATE
Ingo Molnar2601e642006-07-03 00:24:45 -0700436 /*
437 * sysretq will re-enable interrupts:
438 */
439 TRACE_IRQS_ON
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 movq RIP-ARGOFFSET(%rsp),%rcx
Jan Beulich7effaa82005-09-12 18:49:24 +0200441 CFI_REGISTER rip,rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 RESTORE_ARGS 0,-ARG_SKIP,1
Jan Beulich7effaa82005-09-12 18:49:24 +0200443 /*CFI_REGISTER rflags,r11*/
Jeremy Fitzhardingec7245da2008-06-25 00:19:27 -0400444 movq %gs:pda_oldrsp, %rsp
Jeremy Fitzhardinge2be29982008-06-25 00:19:28 -0400445 USERGS_SYSRET64
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
Jan Beulichbcddc012006-12-07 02:14:02 +0100447 CFI_RESTORE_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 /* Handle reschedules */
449 /* edx: work, edi: workmask */
450sysret_careful:
451 bt $TIF_NEED_RESCHED,%edx
452 jnc sysret_signal
Ingo Molnar2601e642006-07-03 00:24:45 -0700453 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100454 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200456 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 call schedule
458 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200459 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 jmp sysret_check
461
462 /* Handle a signal */
463sysret_signal:
Ingo Molnar2601e642006-07-03 00:24:45 -0700464 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100465 ENABLE_INTERRUPTS(CLBR_NONE)
Roland McGrath86a1c342008-06-23 15:37:04 -0700466#ifdef CONFIG_AUDITSYSCALL
467 bt $TIF_SYSCALL_AUDIT,%edx
468 jc sysret_audit
469#endif
Andi Kleen10ffdbb2005-05-16 21:53:19 -0700470 /* edx: work flags (arg3) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 leaq do_notify_resume(%rip),%rax
472 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
473 xorl %esi,%esi # oldset -> arg2
474 call ptregscall_common
Roland McGrath15e8f342008-06-23 20:41:12 -0700475 movl $_TIF_WORK_MASK,%edi
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200476 /* Use IRET because user could have changed frame. This
477 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100478 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700479 TRACE_IRQS_OFF
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200480 jmp int_with_check
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
Jan Beulich7effaa82005-09-12 18:49:24 +0200482badsys:
483 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
484 jmp ret_from_sys_call
485
Roland McGrath86a1c342008-06-23 15:37:04 -0700486#ifdef CONFIG_AUDITSYSCALL
487 /*
488 * Fast path for syscall audit without full syscall trace.
489 * We just call audit_syscall_entry() directly, and then
490 * jump back to the normal fast path.
491 */
492auditsys:
493 movq %r10,%r9 /* 6th arg: 4th syscall arg */
494 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
495 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
496 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
497 movq %rax,%rsi /* 2nd arg: syscall number */
498 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
499 call audit_syscall_entry
500 LOAD_ARGS 0 /* reload call-clobbered registers */
501 jmp system_call_fastpath
502
503 /*
504 * Return fast path for syscall audit. Call audit_syscall_exit()
505 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
506 * masked off.
507 */
508sysret_audit:
509 movq %rax,%rsi /* second arg, syscall return value */
510 cmpq $0,%rax /* is it < 0? */
511 setl %al /* 1 if so, 0 if not */
512 movzbl %al,%edi /* zero-extend that into %edi */
513 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
514 call audit_syscall_exit
515 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
516 jmp sysret_check
517#endif /* CONFIG_AUDITSYSCALL */
518
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 /* Do syscall tracing */
520tracesys:
Roland McGrath86a1c342008-06-23 15:37:04 -0700521#ifdef CONFIG_AUDITSYSCALL
522 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
523 jz auditsys
524#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 SAVE_REST
Roland McGratha31f8dd2008-03-16 21:59:11 -0700526 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 FIXUP_TOP_OF_STACK %rdi
528 movq %rsp,%rdi
529 call syscall_trace_enter
Roland McGrathd4d67152008-07-09 02:38:07 -0700530 /*
531 * Reload arg registers from stack in case ptrace changed them.
532 * We don't reload %rax because syscall_trace_enter() returned
533 * the value it wants us to use in the table lookup.
534 */
535 LOAD_ARGS ARGOFFSET, 1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 RESTORE_REST
537 cmpq $__NR_syscall_max,%rax
Roland McGratha31f8dd2008-03-16 21:59:11 -0700538 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 movq %r10,%rcx /* fixup for C */
540 call *sys_call_table(,%rax,8)
Roland McGratha31f8dd2008-03-16 21:59:11 -0700541 movq %rax,RAX-ARGOFFSET(%rsp)
Andi Kleen7bf36bb2006-04-07 19:50:00 +0200542 /* Use IRET because user could have changed frame */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544/*
545 * Syscall return path ending with IRET.
546 * Has correct top of stack, but partial stack frame.
Jan Beulichbcddc012006-12-07 02:14:02 +0100547 */
548 .globl int_ret_from_sys_call
Roland McGrath5cbf1562008-06-24 01:13:31 -0700549 .globl int_with_check
Jan Beulichbcddc012006-12-07 02:14:02 +0100550int_ret_from_sys_call:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100551 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700552 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 testl $3,CS-ARGOFFSET(%rsp)
554 je retint_restore_args
555 movl $_TIF_ALLWORK_MASK,%edi
556 /* edi: mask to check */
557int_with_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200558 LOCKDEP_SYS_EXIT_IRQ
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300560 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 andl %edi,%edx
562 jnz int_careful
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300563 andl $~TS_COMPAT,TI_status(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 jmp retint_swapgs
565
566 /* Either reschedule or signal or syscall exit tracking needed. */
567 /* First do a reschedule test. */
568 /* edx: work, edi: workmask */
569int_careful:
570 bt $TIF_NEED_RESCHED,%edx
571 jnc int_very_careful
Ingo Molnar2601e642006-07-03 00:24:45 -0700572 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100573 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200575 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 call schedule
577 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200578 CFI_ADJUST_CFA_OFFSET -8
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100579 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700580 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 jmp int_with_check
582
583 /* handle signals and tracing -- both require a full stack frame */
584int_very_careful:
Ingo Molnar2601e642006-07-03 00:24:45 -0700585 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100586 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 SAVE_REST
588 /* Check for syscall exit trace */
Roland McGrathd4d67152008-07-09 02:38:07 -0700589 testl $_TIF_WORK_SYSCALL_EXIT,%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 jz int_signal
591 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200592 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 leaq 8(%rsp),%rdi # &ptregs -> arg1
594 call syscall_trace_leave
595 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200596 CFI_ADJUST_CFA_OFFSET -8
Roland McGrathd4d67152008-07-09 02:38:07 -0700597 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 jmp int_restore_rest
599
600int_signal:
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +0100601 testl $_TIF_DO_NOTIFY_MASK,%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 jz 1f
603 movq %rsp,%rdi # &ptregs -> arg1
604 xorl %esi,%esi # oldset -> arg2
605 call do_notify_resume
Roland McGratheca91e72008-07-10 14:50:39 -07006061: movl $_TIF_WORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607int_restore_rest:
608 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100609 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700610 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 jmp int_with_check
612 CFI_ENDPROC
Jan Beulichbcddc012006-12-07 02:14:02 +0100613END(system_call)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
615/*
616 * Certain special system calls that need to save a complete full stack frame.
617 */
618
619 .macro PTREGSCALL label,func,arg
620 .globl \label
621\label:
622 leaq \func(%rip),%rax
623 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
624 jmp ptregscall_common
Jan Beulich4b787e02006-06-26 13:56:55 +0200625END(\label)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 .endm
627
Jan Beulich7effaa82005-09-12 18:49:24 +0200628 CFI_STARTPROC
629
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 PTREGSCALL stub_clone, sys_clone, %r8
631 PTREGSCALL stub_fork, sys_fork, %rdi
632 PTREGSCALL stub_vfork, sys_vfork, %rdi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
634 PTREGSCALL stub_iopl, sys_iopl, %rsi
635
636ENTRY(ptregscall_common)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 popq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200638 CFI_ADJUST_CFA_OFFSET -8
639 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 SAVE_REST
641 movq %r11, %r15
Jan Beulich7effaa82005-09-12 18:49:24 +0200642 CFI_REGISTER rip, r15
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 FIXUP_TOP_OF_STACK %r11
644 call *%rax
645 RESTORE_TOP_OF_STACK %r11
646 movq %r15, %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200647 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 RESTORE_REST
649 pushq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200650 CFI_ADJUST_CFA_OFFSET 8
651 CFI_REL_OFFSET rip, 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 ret
653 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200654END(ptregscall_common)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
656ENTRY(stub_execve)
657 CFI_STARTPROC
658 popq %r11
Jan Beulich7effaa82005-09-12 18:49:24 +0200659 CFI_ADJUST_CFA_OFFSET -8
660 CFI_REGISTER rip, r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 SAVE_REST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 FIXUP_TOP_OF_STACK %r11
Ingo Molnar5d119b22008-02-26 12:55:57 +0100663 movq %rsp, %rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 call sys_execve
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 RESTORE_TOP_OF_STACK %r11
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 movq %rax,RAX(%rsp)
667 RESTORE_REST
668 jmp int_ret_from_sys_call
669 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200670END(stub_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
672/*
673 * sigreturn is special because it needs to restore all registers on return.
674 * This cannot be done with SYSRET, so use the IRET return path instead.
675 */
676ENTRY(stub_rt_sigreturn)
677 CFI_STARTPROC
Jan Beulich7effaa82005-09-12 18:49:24 +0200678 addq $8, %rsp
679 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 SAVE_REST
681 movq %rsp,%rdi
682 FIXUP_TOP_OF_STACK %r11
683 call sys_rt_sigreturn
684 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
685 RESTORE_REST
686 jmp int_ret_from_sys_call
687 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200688END(stub_rt_sigreturn)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
Jan Beulich7effaa82005-09-12 18:49:24 +0200690/*
691 * initial frame state for interrupts and exceptions
692 */
693 .macro _frame ref
694 CFI_STARTPROC simple
Jan Beulichadf14232006-09-26 10:52:41 +0200695 CFI_SIGNAL_FRAME
Jan Beulich7effaa82005-09-12 18:49:24 +0200696 CFI_DEF_CFA rsp,SS+8-\ref
697 /*CFI_REL_OFFSET ss,SS-\ref*/
698 CFI_REL_OFFSET rsp,RSP-\ref
699 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
700 /*CFI_REL_OFFSET cs,CS-\ref*/
701 CFI_REL_OFFSET rip,RIP-\ref
702 .endm
703
704/* initial frame state for interrupts (and exceptions without error code) */
705#define INTR_FRAME _frame RIP
706/* initial frame state for exceptions with error code (and interrupts with
707 vector already pushed) */
708#define XCPT_FRAME _frame ORIG_RAX
709
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710/*
711 * Interrupt entry/exit.
712 *
713 * Interrupt entry points save only callee clobbered registers in fast path.
714 *
715 * Entry runs with interrupts off.
716 */
717
718/* 0(%rsp): interrupt number */
719 .macro interrupt func
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 cld
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 SAVE_ARGS
722 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200723 pushq %rbp
Glauber Costa097a0782008-08-14 17:33:12 -0300724 /*
725 * Save rbp twice: One is for marking the stack frame, as usual, and the
726 * other, to fill pt_regs properly. This is because bx comes right
727 * before the last saved register in that structure, and not bp. If the
728 * base pointer were in the place bx is today, this would not be needed.
729 */
730 movq %rbp, -8(%rsp)
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200731 CFI_ADJUST_CFA_OFFSET 8
732 CFI_REL_OFFSET rbp, 0
733 movq %rsp,%rbp
734 CFI_DEF_CFA_REGISTER rbp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 testl $3,CS(%rdi)
736 je 1f
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100737 SWAPGS
Andi Kleen96e54042006-09-26 10:52:39 +0200738 /* irqcount is used to check if a CPU is already on an interrupt
739 stack or not. While this is essentially redundant with preempt_count
740 it is a little cheaper to use a separate counter in the PDA
741 (short of moving irq_enter into assembly, which would be too
742 much work) */
7431: incl %gs:pda_irqcount
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200744 cmoveq %gs:pda_irqstackptr,%rsp
Andi Kleen26995002006-08-02 22:37:28 +0200745 push %rbp # backlink for old unwinder
Ingo Molnar2601e642006-07-03 00:24:45 -0700746 /*
747 * We entered an interrupt context - irqs are off:
748 */
749 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 call \func
751 .endm
752
753ENTRY(common_interrupt)
Jan Beulich7effaa82005-09-12 18:49:24 +0200754 XCPT_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 interrupt do_IRQ
756 /* 0(%rsp): oldrsp-ARGOFFSET */
Jan Beulich7effaa82005-09-12 18:49:24 +0200757ret_from_intr:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100758 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700759 TRACE_IRQS_OFF
Andi Kleen3829ee62005-07-28 21:15:48 -0700760 decl %gs:pda_irqcount
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200761 leaveq
Jan Beulich7effaa82005-09-12 18:49:24 +0200762 CFI_DEF_CFA_REGISTER rsp
Jan Beulich1de9c3f2006-06-26 13:57:35 +0200763 CFI_ADJUST_CFA_OFFSET -8
Jan Beulich7effaa82005-09-12 18:49:24 +0200764exit_intr:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 GET_THREAD_INFO(%rcx)
766 testl $3,CS-ARGOFFSET(%rsp)
767 je retint_kernel
768
769 /* Interrupt came from user space */
770 /*
771 * Has a correct top of stack, but a partial stack frame
772 * %rcx: thread info. Interrupts off.
773 */
774retint_with_reschedule:
775 movl $_TIF_WORK_MASK,%edi
Jan Beulich7effaa82005-09-12 18:49:24 +0200776retint_check:
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200777 LOCKDEP_SYS_EXIT_IRQ
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300778 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 andl %edi,%edx
Jan Beulich7effaa82005-09-12 18:49:24 +0200780 CFI_REMEMBER_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 jnz retint_careful
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200782
783retint_swapgs: /* return to user-space */
Ingo Molnar2601e642006-07-03 00:24:45 -0700784 /*
785 * The iretq could re-enable interrupts:
786 */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100787 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700788 TRACE_IRQS_IRETQ
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100789 SWAPGS
Ingo Molnar2601e642006-07-03 00:24:45 -0700790 jmp restore_args
791
Peter Zijlstra10cd7062007-10-11 22:11:12 +0200792retint_restore_args: /* return to kernel space */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100793 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -0700794 /*
795 * The iretq could re-enable interrupts:
796 */
797 TRACE_IRQS_IRETQ
798restore_args:
Ingo Molnar3701d8632008-02-09 23:24:08 +0100799 RESTORE_ARGS 0,8,0
800
Adrian Bunkf7f3d792008-02-13 23:29:53 +0200801irq_return:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100802 INTERRUPT_RETURN
Ingo Molnar3701d8632008-02-09 23:24:08 +0100803
804 .section __ex_table, "a"
805 .quad irq_return, bad_iret
806 .previous
807
808#ifdef CONFIG_PARAVIRT
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100809ENTRY(native_iret)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 iretq
811
812 .section __ex_table,"a"
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100813 .quad native_iret, bad_iret
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 .previous
Ingo Molnar3701d8632008-02-09 23:24:08 +0100815#endif
816
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 .section .fixup,"ax"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818bad_iret:
Roland McGrath3aa4b372008-02-06 22:39:43 +0100819 /*
820 * The iret traps when the %cs or %ss being restored is bogus.
821 * We've lost the original trap vector and error code.
822 * #GPF is the most likely one to get for an invalid selector.
823 * So pretend we completed the iret and took the #GPF in user mode.
824 *
825 * We are now running with the kernel GS after exception recovery.
826 * But error_entry expects us to have user GS to match the user %cs,
827 * so swap back.
828 */
829 pushq $0
830
831 SWAPGS
832 jmp general_protection
833
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100834 .previous
835
Jan Beulich7effaa82005-09-12 18:49:24 +0200836 /* edi: workmask, edx: work */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837retint_careful:
Jan Beulich7effaa82005-09-12 18:49:24 +0200838 CFI_RESTORE_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 bt $TIF_NEED_RESCHED,%edx
840 jnc retint_signal
Ingo Molnar2601e642006-07-03 00:24:45 -0700841 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100842 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 pushq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200844 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 call schedule
846 popq %rdi
Jan Beulich7effaa82005-09-12 18:49:24 +0200847 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 GET_THREAD_INFO(%rcx)
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100849 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700850 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 jmp retint_check
852
853retint_signal:
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +0100854 testl $_TIF_DO_NOTIFY_MASK,%edx
Andi Kleen10ffdbb2005-05-16 21:53:19 -0700855 jz retint_swapgs
Ingo Molnar2601e642006-07-03 00:24:45 -0700856 TRACE_IRQS_ON
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100857 ENABLE_INTERRUPTS(CLBR_NONE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 SAVE_REST
859 movq $-1,ORIG_RAX(%rsp)
Andi Kleen3829ee62005-07-28 21:15:48 -0700860 xorl %esi,%esi # oldset
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 movq %rsp,%rdi # &pt_regs
862 call do_notify_resume
863 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100864 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -0700865 TRACE_IRQS_OFF
Andi Kleenbe9e6872005-05-01 08:58:51 -0700866 GET_THREAD_INFO(%rcx)
Roland McGratheca91e72008-07-10 14:50:39 -0700867 jmp retint_with_reschedule
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868
869#ifdef CONFIG_PREEMPT
870 /* Returning to kernel space. Check if we need preemption */
871 /* rcx: threadinfo. interrupts off. */
Andi Kleenb06baba2006-09-26 10:52:29 +0200872ENTRY(retint_kernel)
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300873 cmpl $0,TI_preempt_count(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 jnz retint_restore_args
Glauber Costa26ccb8a2008-06-24 11:19:35 -0300875 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 jnc retint_restore_args
877 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
878 jnc retint_restore_args
879 call preempt_schedule_irq
880 jmp exit_intr
881#endif
Jan Beulich4b787e02006-06-26 13:56:55 +0200882
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +0200884END(common_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885
886/*
887 * APIC interrupts.
888 */
889 .macro apicinterrupt num,func
Jan Beulich7effaa82005-09-12 18:49:24 +0200890 INTR_FRAME
Rusty Russell19eadf92006-06-27 02:53:44 -0700891 pushq $~(\num)
Jan Beulich7effaa82005-09-12 18:49:24 +0200892 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 interrupt \func
894 jmp ret_from_intr
895 CFI_ENDPROC
896 .endm
897
898ENTRY(thermal_interrupt)
899 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200900END(thermal_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
Jacob Shin89b831e2005-11-05 17:25:53 +0100902ENTRY(threshold_interrupt)
903 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200904END(threshold_interrupt)
Jacob Shin89b831e2005-11-05 17:25:53 +0100905
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906#ifdef CONFIG_SMP
907ENTRY(reschedule_interrupt)
908 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200909END(reschedule_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
Andi Kleene5bc8b62005-09-12 18:49:24 +0200911 .macro INVALIDATE_ENTRY num
912ENTRY(invalidate_interrupt\num)
913 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200914END(invalidate_interrupt\num)
Andi Kleene5bc8b62005-09-12 18:49:24 +0200915 .endm
916
917 INVALIDATE_ENTRY 0
918 INVALIDATE_ENTRY 1
919 INVALIDATE_ENTRY 2
920 INVALIDATE_ENTRY 3
921 INVALIDATE_ENTRY 4
922 INVALIDATE_ENTRY 5
923 INVALIDATE_ENTRY 6
924 INVALIDATE_ENTRY 7
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
926ENTRY(call_function_interrupt)
927 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200928END(call_function_interrupt)
Jens Axboe3b16cf82008-06-26 11:21:54 +0200929ENTRY(call_function_single_interrupt)
930 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
931END(call_function_single_interrupt)
Eric W. Biederman61014292007-02-23 04:40:58 -0700932ENTRY(irq_move_cleanup_interrupt)
933 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
934END(irq_move_cleanup_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935#endif
936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937ENTRY(apic_timer_interrupt)
938 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200939END(apic_timer_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
Cliff Wickman18129242008-06-02 08:56:14 -0500941ENTRY(uv_bau_message_intr1)
942 apicinterrupt 220,uv_bau_message_interrupt
943END(uv_bau_message_intr1)
944
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945ENTRY(error_interrupt)
946 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200947END(error_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
949ENTRY(spurious_interrupt)
950 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
Jan Beulich4b787e02006-06-26 13:56:55 +0200951END(spurious_interrupt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953/*
954 * Exception entry points.
955 */
956 .macro zeroentry sym
Jan Beulich7effaa82005-09-12 18:49:24 +0200957 INTR_FRAME
Jeremy Fitzhardingefab58422008-06-25 00:19:31 -0400958 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 pushq $0 /* push error code/oldrax */
Jan Beulich7effaa82005-09-12 18:49:24 +0200960 CFI_ADJUST_CFA_OFFSET 8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 pushq %rax /* push real oldrax to the rdi slot */
Jan Beulich7effaa82005-09-12 18:49:24 +0200962 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich37550902007-05-02 19:27:05 +0200963 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 leaq \sym(%rip),%rax
965 jmp error_entry
Jan Beulich7effaa82005-09-12 18:49:24 +0200966 CFI_ENDPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 .endm
968
969 .macro errorentry sym
Jan Beulich7effaa82005-09-12 18:49:24 +0200970 XCPT_FRAME
Jeremy Fitzhardingefab58422008-06-25 00:19:31 -0400971 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 pushq %rax
Jan Beulich7effaa82005-09-12 18:49:24 +0200973 CFI_ADJUST_CFA_OFFSET 8
Jan Beulich37550902007-05-02 19:27:05 +0200974 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 leaq \sym(%rip),%rax
976 jmp error_entry
Jan Beulich7effaa82005-09-12 18:49:24 +0200977 CFI_ENDPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 .endm
979
980 /* error code is on the stack already */
981 /* handle NMI like exceptions that can happen everywhere */
Ingo Molnar2601e642006-07-03 00:24:45 -0700982 .macro paranoidentry sym, ist=0, irqtrace=1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 SAVE_ALL
984 cld
985 movl $1,%ebx
986 movl $MSR_GS_BASE,%ecx
987 rdmsr
988 testl %edx,%edx
989 js 1f
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +0100990 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 xorl %ebx,%ebx
Jan Beulichb556b352006-01-11 22:43:00 +01009921:
993 .if \ist
994 movq %gs:pda_data_offset, %rbp
995 .endif
Alexander van Heukelum7e61a792008-09-26 14:03:03 +0200996 .if \irqtrace
997 TRACE_IRQS_OFF
998 .endif
Jan Beulichb556b352006-01-11 22:43:00 +0100999 movq %rsp,%rdi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 movq ORIG_RAX(%rsp),%rsi
1001 movq $-1,ORIG_RAX(%rsp)
Jan Beulichb556b352006-01-11 22:43:00 +01001002 .if \ist
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001003 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
Jan Beulichb556b352006-01-11 22:43:00 +01001004 .endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 call \sym
Jan Beulichb556b352006-01-11 22:43:00 +01001006 .if \ist
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001007 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
Jan Beulichb556b352006-01-11 22:43:00 +01001008 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001009 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -07001010 .if \irqtrace
1011 TRACE_IRQS_OFF
1012 .endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 .endm
Ingo Molnar2601e642006-07-03 00:24:45 -07001014
1015 /*
1016 * "Paranoid" exit path from exception stack.
1017 * Paranoid because this is used by NMIs and cannot take
1018 * any kernel state for granted.
1019 * We don't do kernel preemption checks here, because only
1020 * NMI should be common and it does not enable IRQs and
1021 * cannot get reschedule ticks.
1022 *
1023 * "trace" is 0 for the NMI handler only, because irq-tracing
1024 * is fundamentally NMI-unsafe. (we cannot change the soft and
1025 * hard flags at once, atomically)
1026 */
1027 .macro paranoidexit trace=1
1028 /* ebx: no swapgs flag */
1029paranoid_exit\trace:
1030 testl %ebx,%ebx /* swapgs needed? */
1031 jnz paranoid_restore\trace
1032 testl $3,CS(%rsp)
1033 jnz paranoid_userspace\trace
1034paranoid_swapgs\trace:
Andi Kleen7a0a2df2006-09-26 10:52:37 +02001035 .if \trace
Ingo Molnar2601e642006-07-03 00:24:45 -07001036 TRACE_IRQS_IRETQ 0
Andi Kleen7a0a2df2006-09-26 10:52:37 +02001037 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001038 SWAPGS_UNSAFE_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -07001039paranoid_restore\trace:
1040 RESTORE_ALL 8
Ingo Molnar3701d8632008-02-09 23:24:08 +01001041 jmp irq_return
Ingo Molnar2601e642006-07-03 00:24:45 -07001042paranoid_userspace\trace:
1043 GET_THREAD_INFO(%rcx)
Glauber Costa26ccb8a2008-06-24 11:19:35 -03001044 movl TI_flags(%rcx),%ebx
Ingo Molnar2601e642006-07-03 00:24:45 -07001045 andl $_TIF_WORK_MASK,%ebx
1046 jz paranoid_swapgs\trace
1047 movq %rsp,%rdi /* &pt_regs */
1048 call sync_regs
1049 movq %rax,%rsp /* switch stack for scheduling */
1050 testl $_TIF_NEED_RESCHED,%ebx
1051 jnz paranoid_schedule\trace
1052 movl %ebx,%edx /* arg3: thread flags */
1053 .if \trace
1054 TRACE_IRQS_ON
1055 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001056 ENABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -07001057 xorl %esi,%esi /* arg2: oldset */
1058 movq %rsp,%rdi /* arg1: &pt_regs */
1059 call do_notify_resume
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001060 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -07001061 .if \trace
1062 TRACE_IRQS_OFF
1063 .endif
1064 jmp paranoid_userspace\trace
1065paranoid_schedule\trace:
1066 .if \trace
1067 TRACE_IRQS_ON
1068 .endif
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001069 ENABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -07001070 call schedule
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001071 DISABLE_INTERRUPTS(CLBR_ANY)
Ingo Molnar2601e642006-07-03 00:24:45 -07001072 .if \trace
1073 TRACE_IRQS_OFF
1074 .endif
1075 jmp paranoid_userspace\trace
1076 CFI_ENDPROC
1077 .endm
1078
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079/*
1080 * Exception entry point. This expects an error code/orig_rax on the stack
1081 * and the exception handler in %rax.
1082 */
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001083KPROBE_ENTRY(error_entry)
Jan Beulich7effaa82005-09-12 18:49:24 +02001084 _frame RDI
Jan Beulich37550902007-05-02 19:27:05 +02001085 CFI_REL_OFFSET rax,0
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086 /* rdi slot contains rax, oldrax contains error code */
1087 cld
1088 subq $14*8,%rsp
1089 CFI_ADJUST_CFA_OFFSET (14*8)
1090 movq %rsi,13*8(%rsp)
1091 CFI_REL_OFFSET rsi,RSI
1092 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
Jan Beulich37550902007-05-02 19:27:05 +02001093 CFI_REGISTER rax,rsi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 movq %rdx,12*8(%rsp)
1095 CFI_REL_OFFSET rdx,RDX
1096 movq %rcx,11*8(%rsp)
1097 CFI_REL_OFFSET rcx,RCX
1098 movq %rsi,10*8(%rsp) /* store rax */
1099 CFI_REL_OFFSET rax,RAX
1100 movq %r8, 9*8(%rsp)
1101 CFI_REL_OFFSET r8,R8
1102 movq %r9, 8*8(%rsp)
1103 CFI_REL_OFFSET r9,R9
1104 movq %r10,7*8(%rsp)
1105 CFI_REL_OFFSET r10,R10
1106 movq %r11,6*8(%rsp)
1107 CFI_REL_OFFSET r11,R11
1108 movq %rbx,5*8(%rsp)
1109 CFI_REL_OFFSET rbx,RBX
1110 movq %rbp,4*8(%rsp)
1111 CFI_REL_OFFSET rbp,RBP
1112 movq %r12,3*8(%rsp)
1113 CFI_REL_OFFSET r12,R12
1114 movq %r13,2*8(%rsp)
1115 CFI_REL_OFFSET r13,R13
1116 movq %r14,1*8(%rsp)
1117 CFI_REL_OFFSET r14,R14
1118 movq %r15,(%rsp)
1119 CFI_REL_OFFSET r15,R15
1120 xorl %ebx,%ebx
1121 testl $3,CS(%rsp)
1122 je error_kernelspace
1123error_swapgs:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001124 SWAPGS
Alexander van Heukelum6b11d4e2008-09-26 14:03:02 +02001125error_sti:
1126 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 movq %rdi,RDI(%rsp)
Jan Beulich37550902007-05-02 19:27:05 +02001128 CFI_REL_OFFSET rdi,RDI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 movq %rsp,%rdi
1130 movq ORIG_RAX(%rsp),%rsi /* get error code */
1131 movq $-1,ORIG_RAX(%rsp)
1132 call *%rax
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001133 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1134error_exit:
1135 movl %ebx,%eax
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 RESTORE_REST
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001137 DISABLE_INTERRUPTS(CLBR_NONE)
Ingo Molnar2601e642006-07-03 00:24:45 -07001138 TRACE_IRQS_OFF
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 GET_THREAD_INFO(%rcx)
1140 testl %eax,%eax
1141 jne retint_kernel
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001142 LOCKDEP_SYS_EXIT_IRQ
Glauber Costa26ccb8a2008-06-24 11:19:35 -03001143 movl TI_flags(%rcx),%edx
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 movl $_TIF_WORK_MASK,%edi
1145 andl %edi,%edx
1146 jnz retint_careful
Peter Zijlstra10cd7062007-10-11 22:11:12 +02001147 jmp retint_swapgs
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 CFI_ENDPROC
1149
1150error_kernelspace:
1151 incl %ebx
1152 /* There are two places in the kernel that can potentially fault with
1153 usergs. Handle them here. The exception handlers after
1154 iret run with kernel gs again, so don't set the user space flag.
1155 B stepping K8s sometimes report an truncated RIP for IRET
1156 exceptions returning to compat mode. Check for these here too. */
Vegard Nossum9d8ad5d2008-06-27 17:22:17 +02001157 leaq irq_return(%rip),%rcx
1158 cmpq %rcx,RIP(%rsp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 je error_swapgs
Vegard Nossum9d8ad5d2008-06-27 17:22:17 +02001160 movl %ecx,%ecx /* zero extend */
1161 cmpq %rcx,RIP(%rsp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 je error_swapgs
1163 cmpq $gs_change,RIP(%rsp)
1164 je error_swapgs
1165 jmp error_sti
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001166KPROBE_END(error_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
1168 /* Reload gs selector with exception handling */
1169 /* edi: new selector */
Jeremy Fitzhardinge9f9d4892008-06-25 00:19:32 -04001170ENTRY(native_load_gs_index)
Jan Beulich7effaa82005-09-12 18:49:24 +02001171 CFI_STARTPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 pushf
Jan Beulich7effaa82005-09-12 18:49:24 +02001173 CFI_ADJUST_CFA_OFFSET 8
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001174 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1175 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176gs_change:
1177 movl %edi,%gs
11782: mfence /* workaround */
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001179 SWAPGS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 popf
Jan Beulich7effaa82005-09-12 18:49:24 +02001181 CFI_ADJUST_CFA_OFFSET -8
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 ret
Jan Beulich7effaa82005-09-12 18:49:24 +02001183 CFI_ENDPROC
Jeremy Fitzhardinge9f9d4892008-06-25 00:19:32 -04001184ENDPROC(native_load_gs_index)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 .section __ex_table,"a"
1187 .align 8
1188 .quad gs_change,bad_gs
1189 .previous
1190 .section .fixup,"ax"
1191 /* running with kernelgs */
1192bad_gs:
Glauber de Oliveira Costa72fe4852008-01-30 13:32:08 +01001193 SWAPGS /* switch back to user gs */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 xorl %eax,%eax
1195 movl %eax,%gs
1196 jmp 2b
1197 .previous
1198
1199/*
1200 * Create a kernel thread.
1201 *
1202 * C extern interface:
1203 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1204 *
1205 * asm input arguments:
1206 * rdi: fn, rsi: arg, rdx: flags
1207 */
1208ENTRY(kernel_thread)
1209 CFI_STARTPROC
1210 FAKE_STACK_FRAME $child_rip
1211 SAVE_ALL
1212
1213 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1214 movq %rdx,%rdi
1215 orq kernel_thread_flags(%rip),%rdi
1216 movq $-1, %rsi
1217 movq %rsp, %rdx
1218
1219 xorl %r8d,%r8d
1220 xorl %r9d,%r9d
1221
1222 # clone now
1223 call do_fork
1224 movq %rax,RAX(%rsp)
1225 xorl %edi,%edi
1226
1227 /*
1228 * It isn't worth to check for reschedule here,
1229 * so internally to the x86_64 port you can rely on kernel_thread()
1230 * not to reschedule the child before returning, this avoids the need
1231 * of hacks for example to fork off the per-CPU idle tasks.
1232 * [Hopefully no generic code relies on the reschedule -AK]
1233 */
1234 RESTORE_ALL
1235 UNFAKE_STACK_FRAME
1236 ret
1237 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001238ENDPROC(kernel_thread)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239
1240child_rip:
Andi Kleenc05991e2006-08-30 19:37:08 +02001241 pushq $0 # fake return address
1242 CFI_STARTPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 /*
1244 * Here we are in the child and the registers are set as they were
1245 * at kernel_thread() invocation in the parent.
1246 */
1247 movq %rdi, %rax
1248 movq %rsi, %rdi
1249 call *%rax
1250 # exit
Andrey Mirkin1c5b5cf2007-10-17 18:04:33 +02001251 mov %eax, %edi
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 call do_exit
Andi Kleenc05991e2006-08-30 19:37:08 +02001253 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001254ENDPROC(child_rip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
1256/*
1257 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1258 *
1259 * C extern interface:
1260 * extern long execve(char *name, char **argv, char **envp)
1261 *
1262 * asm input arguments:
1263 * rdi: name, rsi: argv, rdx: envp
1264 *
1265 * We want to fallback into:
Ingo Molnar5d119b22008-02-26 12:55:57 +01001266 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 *
1268 * do_sys_execve asm fallback arguments:
Ingo Molnar5d119b22008-02-26 12:55:57 +01001269 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 */
Arnd Bergmann3db03b42006-10-02 02:18:31 -07001271ENTRY(kernel_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 CFI_STARTPROC
1273 FAKE_STACK_FRAME $0
1274 SAVE_ALL
Ingo Molnar5d119b22008-02-26 12:55:57 +01001275 movq %rsp,%rcx
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 call sys_execve
1277 movq %rax, RAX(%rsp)
1278 RESTORE_REST
1279 testq %rax,%rax
1280 je int_ret_from_sys_call
1281 RESTORE_ARGS
1282 UNFAKE_STACK_FRAME
1283 ret
1284 CFI_ENDPROC
Arnd Bergmann3db03b42006-10-02 02:18:31 -07001285ENDPROC(kernel_execve)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001287KPROBE_ENTRY(page_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 errorentry do_page_fault
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001289KPROBE_END(page_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290
1291ENTRY(coprocessor_error)
1292 zeroentry do_coprocessor_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001293END(coprocessor_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
1295ENTRY(simd_coprocessor_error)
1296 zeroentry do_simd_coprocessor_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001297END(simd_coprocessor_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
1299ENTRY(device_not_available)
Alexander van Heukelume407d6202008-09-30 18:41:36 +02001300 zeroentry do_device_not_available
Jan Beulich4b787e02006-06-26 13:56:55 +02001301END(device_not_available)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
1303 /* runs on exception stack */
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001304KPROBE_ENTRY(debug)
Jan Beulich7effaa82005-09-12 18:49:24 +02001305 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001306 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 pushq $0
1308 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001309 paranoidentry do_debug, DEBUG_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -07001310 paranoidexit
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001311KPROBE_END(debug)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
1313 /* runs on exception stack */
Andi Kleeneddb6fb2006-02-03 21:50:41 +01001314KPROBE_ENTRY(nmi)
Jan Beulich7effaa82005-09-12 18:49:24 +02001315 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001316 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 pushq $-1
Jan Beulich7effaa82005-09-12 18:49:24 +02001318 CFI_ADJUST_CFA_OFFSET 8
Ingo Molnar2601e642006-07-03 00:24:45 -07001319 paranoidentry do_nmi, 0, 0
1320#ifdef CONFIG_TRACE_IRQFLAGS
1321 paranoidexit 0
1322#else
1323 jmp paranoid_exit1
1324 CFI_ENDPROC
1325#endif
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001326KPROBE_END(nmi)
Andi Kleen6fefb0d2005-04-16 15:25:03 -07001327
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001328KPROBE_ENTRY(int3)
Jan Beulichb556b352006-01-11 22:43:00 +01001329 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001330 PARAVIRT_ADJUST_EXCEPTION_FRAME
Jan Beulichb556b352006-01-11 22:43:00 +01001331 pushq $0
1332 CFI_ADJUST_CFA_OFFSET 8
Andi Kleen5f8efbb2006-01-16 01:56:39 +01001333 paranoidentry do_int3, DEBUG_STACK
Ingo Molnar2601e642006-07-03 00:24:45 -07001334 jmp paranoid_exit1
Jan Beulichb556b352006-01-11 22:43:00 +01001335 CFI_ENDPROC
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001336KPROBE_END(int3)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338ENTRY(overflow)
1339 zeroentry do_overflow
Jan Beulich4b787e02006-06-26 13:56:55 +02001340END(overflow)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
1342ENTRY(bounds)
1343 zeroentry do_bounds
Jan Beulich4b787e02006-06-26 13:56:55 +02001344END(bounds)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345
1346ENTRY(invalid_op)
1347 zeroentry do_invalid_op
Jan Beulich4b787e02006-06-26 13:56:55 +02001348END(invalid_op)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
1350ENTRY(coprocessor_segment_overrun)
1351 zeroentry do_coprocessor_segment_overrun
Jan Beulich4b787e02006-06-26 13:56:55 +02001352END(coprocessor_segment_overrun)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 /* runs on exception stack */
1355ENTRY(double_fault)
Jan Beulich7effaa82005-09-12 18:49:24 +02001356 XCPT_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001357 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 paranoidentry do_double_fault
Ingo Molnar2601e642006-07-03 00:24:45 -07001359 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001361END(double_fault)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362
1363ENTRY(invalid_TSS)
1364 errorentry do_invalid_TSS
Jan Beulich4b787e02006-06-26 13:56:55 +02001365END(invalid_TSS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366
1367ENTRY(segment_not_present)
1368 errorentry do_segment_not_present
Jan Beulich4b787e02006-06-26 13:56:55 +02001369END(segment_not_present)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
1371 /* runs on exception stack */
1372ENTRY(stack_segment)
Jan Beulich7effaa82005-09-12 18:49:24 +02001373 XCPT_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001374 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 paranoidentry do_stack_segment
Ingo Molnar2601e642006-07-03 00:24:45 -07001376 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001378END(stack_segment)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
Prasanna S Panchamukhi0f2fbdc2005-09-06 15:19:28 -07001380KPROBE_ENTRY(general_protection)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 errorentry do_general_protection
Prasanna S.Pd28c4392006-09-26 10:52:34 +02001382KPROBE_END(general_protection)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383
1384ENTRY(alignment_check)
1385 errorentry do_alignment_check
Jan Beulich4b787e02006-06-26 13:56:55 +02001386END(alignment_check)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388ENTRY(divide_error)
1389 zeroentry do_divide_error
Jan Beulich4b787e02006-06-26 13:56:55 +02001390END(divide_error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
1392ENTRY(spurious_interrupt_bug)
1393 zeroentry do_spurious_interrupt_bug
Jan Beulich4b787e02006-06-26 13:56:55 +02001394END(spurious_interrupt_bug)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
1396#ifdef CONFIG_X86_MCE
1397 /* runs on exception stack */
1398ENTRY(machine_check)
Jan Beulich7effaa82005-09-12 18:49:24 +02001399 INTR_FRAME
Jeremy Fitzhardinge09402942008-07-12 02:22:12 -07001400 PARAVIRT_ADJUST_EXCEPTION_FRAME
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 pushq $0
1402 CFI_ADJUST_CFA_OFFSET 8
1403 paranoidentry do_machine_check
Ingo Molnar2601e642006-07-03 00:24:45 -07001404 jmp paranoid_exit1
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001406END(machine_check)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407#endif
1408
Andi Kleen26995002006-08-02 22:37:28 +02001409/* Call softirq on interrupt stack. Interrupts are off. */
Andi Kleened6b6762005-07-28 21:15:49 -07001410ENTRY(call_softirq)
Jan Beulich7effaa82005-09-12 18:49:24 +02001411 CFI_STARTPROC
Andi Kleen26995002006-08-02 22:37:28 +02001412 push %rbp
1413 CFI_ADJUST_CFA_OFFSET 8
1414 CFI_REL_OFFSET rbp,0
1415 mov %rsp,%rbp
1416 CFI_DEF_CFA_REGISTER rbp
Andi Kleened6b6762005-07-28 21:15:49 -07001417 incl %gs:pda_irqcount
Andi Kleen26995002006-08-02 22:37:28 +02001418 cmove %gs:pda_irqstackptr,%rsp
1419 push %rbp # backlink for old unwinder
Andi Kleened6b6762005-07-28 21:15:49 -07001420 call __do_softirq
Andi Kleen26995002006-08-02 22:37:28 +02001421 leaveq
Jan Beulich7effaa82005-09-12 18:49:24 +02001422 CFI_DEF_CFA_REGISTER rsp
Andi Kleen26995002006-08-02 22:37:28 +02001423 CFI_ADJUST_CFA_OFFSET -8
Andi Kleened6b6762005-07-28 21:15:49 -07001424 decl %gs:pda_irqcount
Andi Kleened6b6762005-07-28 21:15:49 -07001425 ret
Jan Beulich7effaa82005-09-12 18:49:24 +02001426 CFI_ENDPROC
Jan Beulich4b787e02006-06-26 13:56:55 +02001427ENDPROC(call_softirq)
Andi Kleen75154f42007-06-23 02:29:25 +02001428
1429KPROBE_ENTRY(ignore_sysret)
1430 CFI_STARTPROC
1431 mov $-ENOSYS,%eax
1432 sysret
1433 CFI_ENDPROC
1434ENDPROC(ignore_sysret)
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001435
1436#ifdef CONFIG_XEN
1437ENTRY(xen_hypervisor_callback)
1438 zeroentry xen_do_hypervisor_callback
1439END(xen_hypervisor_callback)
1440
1441/*
1442# A note on the "critical region" in our callback handler.
1443# We want to avoid stacking callback handlers due to events occurring
1444# during handling of the last event. To do this, we keep events disabled
1445# until we've done all processing. HOWEVER, we must enable events before
1446# popping the stack frame (can't be done atomically) and so it would still
1447# be possible to get enough handler activations to overflow the stack.
1448# Although unlikely, bugs of that kind are hard to track down, so we'd
1449# like to avoid the possibility.
1450# So, on entry to the handler we detect whether we interrupted an
1451# existing activation in its critical region -- if so, we pop the current
1452# activation and restart the handler using the previous one.
1453*/
1454ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1455 CFI_STARTPROC
1456/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1457 see the correct pointer to the pt_regs */
1458 movq %rdi, %rsp # we don't return, adjust the stack frame
1459 CFI_ENDPROC
1460 CFI_DEFAULT_STACK
146111: incl %gs:pda_irqcount
1462 movq %rsp,%rbp
1463 CFI_DEF_CFA_REGISTER rbp
1464 cmovzq %gs:pda_irqstackptr,%rsp
1465 pushq %rbp # backlink for old unwinder
1466 call xen_evtchn_do_upcall
1467 popq %rsp
1468 CFI_DEF_CFA_REGISTER rsp
1469 decl %gs:pda_irqcount
1470 jmp error_exit
1471 CFI_ENDPROC
1472END(do_hypervisor_callback)
1473
1474/*
1475# Hypervisor uses this for application faults while it executes.
1476# We get here for two reasons:
1477# 1. Fault while reloading DS, ES, FS or GS
1478# 2. Fault while executing IRET
1479# Category 1 we do not need to fix up as Xen has already reloaded all segment
1480# registers that could be reloaded and zeroed the others.
1481# Category 2 we fix up by killing the current process. We cannot use the
1482# normal Linux return path in this case because if we use the IRET hypercall
1483# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1484# We distinguish between categories by comparing each saved segment register
1485# with its current contents: any discrepancy means we in category 1.
1486*/
1487ENTRY(xen_failsafe_callback)
Jeremy Fitzhardinge4a5c3e72008-07-08 15:07:09 -07001488 framesz = (RIP-0x30) /* workaround buggy gas */
1489 _frame framesz
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001490 CFI_REL_OFFSET rcx, 0
1491 CFI_REL_OFFSET r11, 8
1492 movw %ds,%cx
1493 cmpw %cx,0x10(%rsp)
1494 CFI_REMEMBER_STATE
1495 jne 1f
1496 movw %es,%cx
1497 cmpw %cx,0x18(%rsp)
1498 jne 1f
1499 movw %fs,%cx
1500 cmpw %cx,0x20(%rsp)
1501 jne 1f
1502 movw %gs,%cx
1503 cmpw %cx,0x28(%rsp)
1504 jne 1f
1505 /* All segments match their saved values => Category 2 (Bad IRET). */
1506 movq (%rsp),%rcx
1507 CFI_RESTORE rcx
1508 movq 8(%rsp),%r11
1509 CFI_RESTORE r11
1510 addq $0x30,%rsp
1511 CFI_ADJUST_CFA_OFFSET -0x30
Jeremy Fitzhardinge4a5c3e72008-07-08 15:07:09 -07001512 pushq $0
1513 CFI_ADJUST_CFA_OFFSET 8
1514 pushq %r11
1515 CFI_ADJUST_CFA_OFFSET 8
1516 pushq %rcx
1517 CFI_ADJUST_CFA_OFFSET 8
1518 jmp general_protection
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001519 CFI_RESTORE_STATE
15201: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1521 movq (%rsp),%rcx
1522 CFI_RESTORE rcx
1523 movq 8(%rsp),%r11
1524 CFI_RESTORE r11
1525 addq $0x30,%rsp
1526 CFI_ADJUST_CFA_OFFSET -0x30
1527 pushq $0
1528 CFI_ADJUST_CFA_OFFSET 8
1529 SAVE_ALL
1530 jmp error_exit
1531 CFI_ENDPROC
Jeremy Fitzhardinge3d75e1b2008-07-08 15:06:49 -07001532END(xen_failsafe_callback)
1533
1534#endif /* CONFIG_XEN */