Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | /* |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 3 | * Compatibility mode system call entry point for x86-64. |
| 4 | * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 5 | * Copyright 2000-2002 Andi Kleen, SuSE Labs. |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 6 | */ |
Ingo Molnar | d36f947 | 2015-06-03 18:29:26 +0200 | [diff] [blame] | 7 | #include "calling.h" |
Sam Ravnborg | e2d5df9 | 2005-09-09 21:28:48 +0200 | [diff] [blame] | 8 | #include <asm/asm-offsets.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <asm/current.h> |
| 10 | #include <asm/errno.h> |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 11 | #include <asm/ia32_unistd.h> |
| 12 | #include <asm/thread_info.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 13 | #include <asm/segment.h> |
Ingo Molnar | 2601e64 | 2006-07-03 00:24:45 -0700 | [diff] [blame] | 14 | #include <asm/irqflags.h> |
H. Peter Anvin | 1ce6f86 | 2012-04-20 12:19:50 -0700 | [diff] [blame] | 15 | #include <asm/asm.h> |
H. Peter Anvin | 63bcff2 | 2012-09-21 12:43:12 -0700 | [diff] [blame] | 16 | #include <asm/smap.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #include <linux/linkage.h> |
Eric Paris | d7e7528 | 2012-01-03 14:23:06 -0500 | [diff] [blame] | 18 | #include <linux/err.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 | |
Jiri Olsa | ea71454 | 2011-03-07 19:10:39 +0100 | [diff] [blame] | 20 | .section .entry.text, "ax" |
| 21 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 22 | /* |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 23 | * 32-bit SYSENTER entry. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 24 | * |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 25 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here |
| 26 | * on 64-bit kernels running on Intel CPUs. |
| 27 | * |
| 28 | * The SYSENTER instruction, in principle, should *only* occur in the |
| 29 | * vDSO. In practice, a small number of Android devices were shipped |
| 30 | * with a copy of Bionic that inlined a SYSENTER instruction. This |
| 31 | * never happened in any of Google's Bionic versions -- it only happened |
| 32 | * in a narrow range of Intel-provided versions. |
| 33 | * |
| 34 | * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs. |
| 35 | * IF and VM in RFLAGS are cleared (IOW: interrupts are off). |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 36 | * SYSENTER does not save anything on the stack, |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 37 | * and does not save old RIP (!!!), RSP, or RFLAGS. |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 38 | * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 | * Arguments: |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 40 | * eax system call number |
| 41 | * ebx arg1 |
| 42 | * ecx arg2 |
| 43 | * edx arg3 |
| 44 | * esi arg4 |
| 45 | * edi arg5 |
| 46 | * ebp user stack |
| 47 | * 0(%ebp) arg6 |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 48 | */ |
Ingo Molnar | 4c8cd0c | 2015-06-08 08:33:56 +0200 | [diff] [blame] | 49 | ENTRY(entry_SYSENTER_compat) |
Andy Lutomirski | b611acf | 2015-10-05 17:47:55 -0700 | [diff] [blame] | 50 | /* Interrupts are off on entry. */ |
Andy Lutomirski | 1a79797 | 2017-12-04 15:07:12 +0100 | [diff] [blame] | 51 | SWAPGS |
Dave Hansen | 8a09317 | 2017-12-04 15:07:35 +0100 | [diff] [blame] | 52 | |
| 53 | /* We are about to clobber %rsp anyway, clobbering here is OK */ |
| 54 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
| 55 | |
Denys Vlasenko | 3a23208 | 2015-04-24 17:31:35 +0200 | [diff] [blame] | 56 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
Denys Vlasenko | a232e3d | 2015-03-27 11:36:20 +0100 | [diff] [blame] | 57 | |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 58 | /* |
| 59 | * User tracing code (ptrace or signal handlers) might assume that |
| 60 | * the saved RAX contains a 32-bit number when we're invoking a 32-bit |
| 61 | * syscall. Just in case the high bits are nonzero, zero-extend |
| 62 | * the syscall number. (This could almost certainly be deleted |
| 63 | * with no ill effects.) |
| 64 | */ |
Denys Vlasenko | 4ee8ec1 | 2015-03-27 11:36:21 +0100 | [diff] [blame] | 65 | movl %eax, %eax |
| 66 | |
Denys Vlasenko | 4c9c0e9 | 2015-03-31 19:00:04 +0200 | [diff] [blame] | 67 | /* Construct struct pt_regs on stack */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 68 | pushq $__USER32_DS /* pt_regs->ss */ |
Andy Lutomirski | 30bfa7b | 2015-12-16 23:18:48 -0800 | [diff] [blame] | 69 | pushq %rbp /* pt_regs->sp (stashed in bp) */ |
Andy Lutomirski | b611acf | 2015-10-05 17:47:55 -0700 | [diff] [blame] | 70 | |
| 71 | /* |
| 72 | * Push flags. This is nasty. First, interrupts are currently |
| 73 | * off, but we need pt_regs->flags to have IF set. Second, even |
| 74 | * if TF was set when SYSENTER started, it's clear by now. We fix |
| 75 | * that later using TIF_SINGLESTEP. |
| 76 | */ |
| 77 | pushfq /* pt_regs->flags (except IF = 0) */ |
| 78 | orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 79 | pushq $__USER32_CS /* pt_regs->cs */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 80 | pushq $0 /* pt_regs->ip = 0 (placeholder) */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 81 | pushq %rax /* pt_regs->orig_ax */ |
| 82 | pushq %rdi /* pt_regs->di */ |
| 83 | pushq %rsi /* pt_regs->si */ |
| 84 | pushq %rdx /* pt_regs->dx */ |
Andy Lutomirski | 30bfa7b | 2015-12-16 23:18:48 -0800 | [diff] [blame] | 85 | pushq %rcx /* pt_regs->cx */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 86 | pushq $-ENOSYS /* pt_regs->ax */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 87 | pushq $0 /* pt_regs->r8 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 88 | xorl %r8d, %r8d /* nospec r8 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 89 | pushq $0 /* pt_regs->r9 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 90 | xorl %r9d, %r9d /* nospec r9 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 91 | pushq $0 /* pt_regs->r10 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 92 | xorl %r10d, %r10d /* nospec r10 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 93 | pushq $0 /* pt_regs->r11 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 94 | xorl %r11d, %r11d /* nospec r11 */ |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 95 | pushq %rbx /* pt_regs->rbx */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 96 | xorl %ebx, %ebx /* nospec rbx */ |
Andy Lutomirski | 30bfa7b | 2015-12-16 23:18:48 -0800 | [diff] [blame] | 97 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 98 | xorl %ebp, %ebp /* nospec rbp */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 99 | pushq $0 /* pt_regs->r12 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 100 | xorl %r12d, %r12d /* nospec r12 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 101 | pushq $0 /* pt_regs->r13 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 102 | xorl %r13d, %r13d /* nospec r13 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 103 | pushq $0 /* pt_regs->r14 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 104 | xorl %r14d, %r14d /* nospec r14 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 105 | pushq $0 /* pt_regs->r15 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 106 | xorl %r15d, %r15d /* nospec r15 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 107 | cld |
Denys Vlasenko | 4c9c0e9 | 2015-03-31 19:00:04 +0200 | [diff] [blame] | 108 | |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 109 | /* |
Andy Lutomirski | e786041 | 2016-03-09 19:00:25 -0800 | [diff] [blame] | 110 | * SYSENTER doesn't filter flags, so we need to clear NT and AC |
Andy Lutomirski | 8c7aa69 | 2014-10-01 11:49:04 -0700 | [diff] [blame] | 111 | * ourselves. To save a few cycles, we can check whether |
Andy Lutomirski | e786041 | 2016-03-09 19:00:25 -0800 | [diff] [blame] | 112 | * either was set instead of doing an unconditional popfq. |
Andy Lutomirski | b611acf | 2015-10-05 17:47:55 -0700 | [diff] [blame] | 113 | * This needs to happen before enabling interrupts so that |
| 114 | * we don't get preempted with NT set. |
Borislav Petkov | 374a3a3 | 2015-10-09 19:08:59 +0200 | [diff] [blame] | 115 | * |
Andy Lutomirski | f2b3757 | 2016-03-09 19:00:30 -0800 | [diff] [blame] | 116 | * If TF is set, we will single-step all the way to here -- do_debug |
| 117 | * will ignore all the traps. (Yes, this is slow, but so is |
| 118 | * single-stepping in general. This allows us to avoid having |
| 119 | * a more complicated code to handle the case where a user program |
| 120 | * forces us to single-step through the SYSENTER entry code.) |
| 121 | * |
Borislav Petkov | f74acf0 | 2015-12-12 11:27:57 +0100 | [diff] [blame] | 122 | * NB.: .Lsysenter_fix_flags is a label with the code under it moved |
Borislav Petkov | 374a3a3 | 2015-10-09 19:08:59 +0200 | [diff] [blame] | 123 | * out-of-line as an optimization: NT is unlikely to be set in the |
| 124 | * majority of the cases and instead of polluting the I$ unnecessarily, |
| 125 | * we're keeping that code behind a branch which will predict as |
| 126 | * not-taken and therefore its instructions won't be fetched. |
Andy Lutomirski | 8c7aa69 | 2014-10-01 11:49:04 -0700 | [diff] [blame] | 127 | */ |
Andy Lutomirski | f2b3757 | 2016-03-09 19:00:30 -0800 | [diff] [blame] | 128 | testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp) |
Borislav Petkov | f74acf0 | 2015-12-12 11:27:57 +0100 | [diff] [blame] | 129 | jnz .Lsysenter_fix_flags |
| 130 | .Lsysenter_flags_fixed: |
Andy Lutomirski | 8c7aa69 | 2014-10-01 11:49:04 -0700 | [diff] [blame] | 131 | |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 132 | /* |
| 133 | * User mode is traced as though IRQs are on, and SYSENTER |
| 134 | * turned them off. |
| 135 | */ |
| 136 | TRACE_IRQS_OFF |
Andy Lutomirski | e62a254 | 2015-10-05 17:48:02 -0700 | [diff] [blame] | 137 | |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 138 | movq %rsp, %rdi |
| 139 | call do_fast_syscall_32 |
Boris Ostrovsky | 91e2eea | 2015-11-19 16:55:45 -0500 | [diff] [blame] | 140 | /* XEN PV guests always use IRET path */ |
| 141 | ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ |
| 142 | "jmp .Lsyscall_32_done", X86_FEATURE_XENPV |
Andy Lutomirski | 7841b40 | 2015-10-05 17:48:12 -0700 | [diff] [blame] | 143 | jmp sysret32_from_system_call |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 144 | |
Borislav Petkov | f74acf0 | 2015-12-12 11:27:57 +0100 | [diff] [blame] | 145 | .Lsysenter_fix_flags: |
Andy Lutomirski | b611acf | 2015-10-05 17:47:55 -0700 | [diff] [blame] | 146 | pushq $X86_EFLAGS_FIXED |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 147 | popfq |
Borislav Petkov | f74acf0 | 2015-12-12 11:27:57 +0100 | [diff] [blame] | 148 | jmp .Lsysenter_flags_fixed |
Andy Lutomirski | f2b3757 | 2016-03-09 19:00:30 -0800 | [diff] [blame] | 149 | GLOBAL(__end_entry_SYSENTER_compat) |
Ingo Molnar | 4c8cd0c | 2015-06-08 08:33:56 +0200 | [diff] [blame] | 150 | ENDPROC(entry_SYSENTER_compat) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 151 | |
| 152 | /* |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 153 | * 32-bit SYSCALL entry. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | * |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 155 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here |
| 156 | * on 64-bit kernels running on AMD CPUs. |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 157 | * |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 158 | * The SYSCALL instruction, in principle, should *only* occur in the |
| 159 | * vDSO. In practice, it appears that this really is the case. |
| 160 | * As evidence: |
| 161 | * |
| 162 | * - The calling convention for SYSCALL has changed several times without |
| 163 | * anyone noticing. |
| 164 | * |
| 165 | * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything |
| 166 | * user task that did SYSCALL without immediately reloading SS |
| 167 | * would randomly crash. |
| 168 | * |
| 169 | * - Most programmers do not directly target AMD CPUs, and the 32-bit |
| 170 | * SYSCALL instruction does not exist on Intel CPUs. Even on AMD |
| 171 | * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels |
| 172 | * because the SYSCALL instruction in legacy/native 32-bit mode (as |
| 173 | * opposed to compat mode) is sufficiently poorly designed as to be |
| 174 | * essentially unusable. |
| 175 | * |
| 176 | * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves |
| 177 | * RFLAGS to R11, then loads new SS, CS, and RIP from previously |
| 178 | * programmed MSRs. RFLAGS gets masked by a value from another MSR |
| 179 | * (so CLD and CLAC are not needed). SYSCALL does not save anything on |
| 180 | * the stack and does not change RSP. |
| 181 | * |
| 182 | * Note: RFLAGS saving+masking-with-MSR happens only in Long mode |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 183 | * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 184 | * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 185 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes |
| 186 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). |
| 187 | * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 188 | * Arguments: |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 189 | * eax system call number |
| 190 | * ecx return address |
| 191 | * ebx arg1 |
| 192 | * ebp arg2 (note: not saved in the stack frame, should not be touched) |
| 193 | * edx arg3 |
| 194 | * esi arg4 |
| 195 | * edi arg5 |
| 196 | * esp user stack |
| 197 | * 0(%esp) arg6 |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 198 | */ |
Ingo Molnar | 2cd2355 | 2015-06-08 08:28:07 +0200 | [diff] [blame] | 199 | ENTRY(entry_SYSCALL_compat) |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 200 | /* Interrupts are off on entry. */ |
Andy Lutomirski | 8a9949b | 2017-08-07 20:59:21 -0700 | [diff] [blame] | 201 | swapgs |
Andy Lutomirski | e62a254 | 2015-10-05 17:48:02 -0700 | [diff] [blame] | 202 | |
Thomas Gleixner | d7732ba | 2018-01-03 19:52:04 +0100 | [diff] [blame] | 203 | /* Stash user ESP */ |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 204 | movl %esp, %r8d |
Thomas Gleixner | d7732ba | 2018-01-03 19:52:04 +0100 | [diff] [blame] | 205 | |
| 206 | /* Use %rsp as scratch reg. User ESP is stashed in r8 */ |
| 207 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
| 208 | |
| 209 | /* Switch to the kernel stack */ |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 210 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
Denys Vlasenko | a232e3d | 2015-03-27 11:36:20 +0100 | [diff] [blame] | 211 | |
Denys Vlasenko | 4c9c0e9 | 2015-03-31 19:00:04 +0200 | [diff] [blame] | 212 | /* Construct struct pt_regs on stack */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 213 | pushq $__USER32_DS /* pt_regs->ss */ |
| 214 | pushq %r8 /* pt_regs->sp */ |
| 215 | pushq %r11 /* pt_regs->flags */ |
| 216 | pushq $__USER32_CS /* pt_regs->cs */ |
| 217 | pushq %rcx /* pt_regs->ip */ |
Andy Lutomirski | 8a9949b | 2017-08-07 20:59:21 -0700 | [diff] [blame] | 218 | GLOBAL(entry_SYSCALL_compat_after_hwframe) |
| 219 | movl %eax, %eax /* discard orig_ax high bits */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 220 | pushq %rax /* pt_regs->orig_ax */ |
| 221 | pushq %rdi /* pt_regs->di */ |
| 222 | pushq %rsi /* pt_regs->si */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 223 | xorl %esi, %esi /* nospec si */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 224 | pushq %rdx /* pt_regs->dx */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 225 | xorl %edx, %edx /* nospec dx */ |
Andy Lutomirski | 30bfa7b | 2015-12-16 23:18:48 -0800 | [diff] [blame] | 226 | pushq %rbp /* pt_regs->cx (stashed in bp) */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 227 | xorl %ecx, %ecx /* nospec cx */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 228 | pushq $-ENOSYS /* pt_regs->ax */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 229 | pushq $0 /* pt_regs->r8 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 230 | xorl %r8d, %r8d /* nospec r8 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 231 | pushq $0 /* pt_regs->r9 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 232 | xorl %r9d, %r9d /* nospec r9 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 233 | pushq $0 /* pt_regs->r10 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 234 | xorl %r10d, %r10d /* nospec r10 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 235 | pushq $0 /* pt_regs->r11 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 236 | xorl %r11d, %r11d /* nospec r11 */ |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 237 | pushq %rbx /* pt_regs->rbx */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 238 | xorl %ebx, %ebx /* nospec rbx */ |
Andy Lutomirski | 30bfa7b | 2015-12-16 23:18:48 -0800 | [diff] [blame] | 239 | pushq %rbp /* pt_regs->rbp (will be overwritten) */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 240 | xorl %ebp, %ebp /* nospec rbp */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 241 | pushq $0 /* pt_regs->r12 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 242 | xorl %r12d, %r12d /* nospec r12 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 243 | pushq $0 /* pt_regs->r13 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 244 | xorl %r13d, %r13d /* nospec r13 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 245 | pushq $0 /* pt_regs->r14 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 246 | xorl %r14d, %r14d /* nospec r14 */ |
Denys Vlasenko | 778843f | 2016-05-02 16:56:50 +0200 | [diff] [blame] | 247 | pushq $0 /* pt_regs->r15 = 0 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 248 | xorl %r15d, %r15d /* nospec r15 */ |
Denys Vlasenko | 4c9c0e9 | 2015-03-31 19:00:04 +0200 | [diff] [blame] | 249 | |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 250 | /* |
| 251 | * User mode is traced as though IRQs are on, and SYSENTER |
| 252 | * turned them off. |
| 253 | */ |
| 254 | TRACE_IRQS_OFF |
| 255 | |
| 256 | movq %rsp, %rdi |
| 257 | call do_fast_syscall_32 |
Boris Ostrovsky | 91e2eea | 2015-11-19 16:55:45 -0500 | [diff] [blame] | 258 | /* XEN PV guests always use IRET path */ |
| 259 | ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ |
| 260 | "jmp .Lsyscall_32_done", X86_FEATURE_XENPV |
Andy Lutomirski | 7841b40 | 2015-10-05 17:48:12 -0700 | [diff] [blame] | 261 | |
| 262 | /* Opportunistic SYSRET */ |
| 263 | sysret32_from_system_call: |
Alexander Popov | afaef01 | 2018-08-17 01:16:58 +0300 | [diff] [blame] | 264 | /* |
| 265 | * We are not going to return to userspace from the trampoline |
| 266 | * stack. So let's erase the thread stack right now. |
| 267 | */ |
| 268 | STACKLEAK_ERASE |
Andy Lutomirski | 7841b40 | 2015-10-05 17:48:12 -0700 | [diff] [blame] | 269 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ |
| 270 | movq RBX(%rsp), %rbx /* pt_regs->rbx */ |
| 271 | movq RBP(%rsp), %rbp /* pt_regs->rbp */ |
| 272 | movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ |
| 273 | movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */ |
| 274 | addq $RAX, %rsp /* Skip r8-r15 */ |
| 275 | popq %rax /* pt_regs->rax */ |
| 276 | popq %rdx /* Skip pt_regs->cx */ |
| 277 | popq %rdx /* pt_regs->dx */ |
| 278 | popq %rsi /* pt_regs->si */ |
| 279 | popq %rdi /* pt_regs->di */ |
| 280 | |
| 281 | /* |
| 282 | * USERGS_SYSRET32 does: |
| 283 | * GSBASE = user's GS base |
| 284 | * EIP = ECX |
| 285 | * RFLAGS = R11 |
| 286 | * CS = __USER32_CS |
| 287 | * SS = __USER_DS |
| 288 | * |
| 289 | * ECX will not match pt_regs->cx, but we're returning to a vDSO |
| 290 | * trampoline that will fix up RCX, so this is okay. |
| 291 | * |
| 292 | * R12-R15 are callee-saved, so they contain whatever was in them |
| 293 | * when the system call started, which is already known to user |
| 294 | * code. We zero R8-R10 to avoid info leaks. |
| 295 | */ |
Dave Hansen | 8a09317 | 2017-12-04 15:07:35 +0100 | [diff] [blame] | 296 | movq RSP-ORIG_RAX(%rsp), %rsp |
| 297 | |
| 298 | /* |
| 299 | * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored |
| 300 | * on the process stack which is not mapped to userspace and |
| 301 | * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 |
| 302 | * switch until after after the last reference to the process |
| 303 | * stack. |
| 304 | * |
Peter Zijlstra | 6fd166a | 2017-12-04 15:07:59 +0100 | [diff] [blame] | 305 | * %r8/%r9 are zeroed before the sysret, thus safe to clobber. |
Dave Hansen | 8a09317 | 2017-12-04 15:07:35 +0100 | [diff] [blame] | 306 | */ |
Peter Zijlstra | 6fd166a | 2017-12-04 15:07:59 +0100 | [diff] [blame] | 307 | SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 |
Dave Hansen | 8a09317 | 2017-12-04 15:07:35 +0100 | [diff] [blame] | 308 | |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 309 | xorl %r8d, %r8d |
| 310 | xorl %r9d, %r9d |
| 311 | xorl %r10d, %r10d |
Boris Ostrovsky | 75ef821 | 2015-11-19 16:55:47 -0500 | [diff] [blame] | 312 | swapgs |
| 313 | sysretl |
Ingo Molnar | 2cd2355 | 2015-06-08 08:28:07 +0200 | [diff] [blame] | 314 | END(entry_SYSCALL_compat) |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 315 | |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 316 | /* |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 317 | * 32-bit legacy system call entry. |
| 318 | * |
| 319 | * 32-bit x86 Linux system calls traditionally used the INT $0x80 |
| 320 | * instruction. INT $0x80 lands here. |
| 321 | * |
| 322 | * This entry point can be used by 32-bit and 64-bit programs to perform |
| 323 | * 32-bit system calls. Instances of INT $0x80 can be found inline in |
| 324 | * various programs and libraries. It is also used by the vDSO's |
| 325 | * __kernel_vsyscall fallback for hardware that doesn't support a faster |
| 326 | * entry method. Restarted 32-bit system calls also fall back to INT |
| 327 | * $0x80 regardless of what instruction was originally used to do the |
| 328 | * system call. |
| 329 | * |
| 330 | * This is considered a slow path. It is not used by most libc |
| 331 | * implementations on modern hardware except during process startup. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 332 | * |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 333 | * Arguments: |
| 334 | * eax system call number |
| 335 | * ebx arg1 |
| 336 | * ecx arg2 |
| 337 | * edx arg3 |
| 338 | * esi arg4 |
| 339 | * edi arg5 |
Andy Lutomirski | fda57b2 | 2016-03-09 19:00:35 -0800 | [diff] [blame] | 340 | * ebp arg6 |
Denys Vlasenko | b87cf63 | 2015-02-26 14:40:32 -0800 | [diff] [blame] | 341 | */ |
Ingo Molnar | 2cd2355 | 2015-06-08 08:28:07 +0200 | [diff] [blame] | 342 | ENTRY(entry_INT80_compat) |
Denys Vlasenko | a232e3d | 2015-03-27 11:36:20 +0100 | [diff] [blame] | 343 | /* |
| 344 | * Interrupts are off on entry. |
Denys Vlasenko | a232e3d | 2015-03-27 11:36:20 +0100 | [diff] [blame] | 345 | */ |
Andy Lutomirski | 3d44d51 | 2016-02-24 12:18:49 -0800 | [diff] [blame] | 346 | ASM_CLAC /* Do this early to minimize exposure */ |
Jeremy Fitzhardinge | 6680415 | 2008-06-25 00:19:29 -0400 | [diff] [blame] | 347 | SWAPGS |
Denys Vlasenko | a232e3d | 2015-03-27 11:36:20 +0100 | [diff] [blame] | 348 | |
Andy Lutomirski | ee08c6b | 2015-10-05 17:48:09 -0700 | [diff] [blame] | 349 | /* |
| 350 | * User tracing code (ptrace or signal handlers) might assume that |
| 351 | * the saved RAX contains a 32-bit number when we're invoking a 32-bit |
| 352 | * syscall. Just in case the high bits are nonzero, zero-extend |
| 353 | * the syscall number. (This could almost certainly be deleted |
| 354 | * with no ill effects.) |
| 355 | */ |
Ingo Molnar | 54ad726 | 2015-06-05 13:02:28 +0200 | [diff] [blame] | 356 | movl %eax, %eax |
Denys Vlasenko | 4ee8ec1 | 2015-03-27 11:36:21 +0100 | [diff] [blame] | 357 | |
Dominik Brodowski | f3d415e | 2018-02-20 22:01:13 +0100 | [diff] [blame] | 358 | /* switch to thread stack expects orig_ax and rdi to be pushed */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 359 | pushq %rax /* pt_regs->orig_ax */ |
Dominik Brodowski | f3d415e | 2018-02-20 22:01:13 +0100 | [diff] [blame] | 360 | pushq %rdi /* pt_regs->di */ |
Andy Lutomirski | 7f2590a | 2017-12-04 15:07:23 +0100 | [diff] [blame] | 361 | |
Dominik Brodowski | f3d415e | 2018-02-20 22:01:13 +0100 | [diff] [blame] | 362 | /* Need to switch before accessing the thread stack. */ |
| 363 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi |
Jan Beulich | fc24d75 | 2019-01-15 09:58:16 -0700 | [diff] [blame] | 364 | /* In the Xen PV case we already run on the thread stack. */ |
| 365 | ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV |
Dominik Brodowski | f3d415e | 2018-02-20 22:01:13 +0100 | [diff] [blame] | 366 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
| 367 | |
| 368 | pushq 6*8(%rdi) /* regs->ss */ |
| 369 | pushq 5*8(%rdi) /* regs->rsp */ |
| 370 | pushq 4*8(%rdi) /* regs->eflags */ |
| 371 | pushq 3*8(%rdi) /* regs->cs */ |
| 372 | pushq 2*8(%rdi) /* regs->ip */ |
| 373 | pushq 1*8(%rdi) /* regs->orig_ax */ |
Dominik Brodowski | 91c5f0d | 2018-03-06 22:18:09 +0100 | [diff] [blame] | 374 | pushq (%rdi) /* pt_regs->di */ |
Jan Beulich | fc24d75 | 2019-01-15 09:58:16 -0700 | [diff] [blame] | 375 | .Lint80_keep_stack: |
| 376 | |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 377 | pushq %rsi /* pt_regs->si */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 378 | xorl %esi, %esi /* nospec si */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 379 | pushq %rdx /* pt_regs->dx */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 380 | xorl %edx, %edx /* nospec dx */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 381 | pushq %rcx /* pt_regs->cx */ |
Dominik Brodowski | 6dc936f | 2018-04-05 11:53:06 +0200 | [diff] [blame] | 382 | xorl %ecx, %ecx /* nospec cx */ |
Ingo Molnar | 131484c | 2015-05-28 12:21:47 +0200 | [diff] [blame] | 383 | pushq $-ENOSYS /* pt_regs->ax */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 384 | pushq %r8 /* pt_regs->r8 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 385 | xorl %r8d, %r8d /* nospec r8 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 386 | pushq %r9 /* pt_regs->r9 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 387 | xorl %r9d, %r9d /* nospec r9 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 388 | pushq %r10 /* pt_regs->r10*/ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 389 | xorl %r10d, %r10d /* nospec r10 */ |
Andy Lutomirski | 22cd978 | 2018-06-26 22:45:52 -0700 | [diff] [blame] | 390 | pushq %r11 /* pt_regs->r11 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 391 | xorl %r11d, %r11d /* nospec r11 */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 392 | pushq %rbx /* pt_regs->rbx */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 393 | xorl %ebx, %ebx /* nospec rbx */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 394 | pushq %rbp /* pt_regs->rbp */ |
Dan Williams | 6b8cf5c | 2018-02-05 17:18:17 -0800 | [diff] [blame] | 395 | xorl %ebp, %ebp /* nospec rbp */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 396 | pushq %r12 /* pt_regs->r12 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 397 | xorl %r12d, %r12d /* nospec r12 */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 398 | pushq %r13 /* pt_regs->r13 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 399 | xorl %r13d, %r13d /* nospec r13 */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 400 | pushq %r14 /* pt_regs->r14 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 401 | xorl %r14d, %r14d /* nospec r14 */ |
Andy Lutomirski | 8169aff | 2015-10-05 17:48:05 -0700 | [diff] [blame] | 402 | pushq %r15 /* pt_regs->r15 */ |
Dominik Brodowski | ced5d0b | 2018-02-14 18:59:24 +0100 | [diff] [blame] | 403 | xorl %r15d, %r15d /* nospec r15 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 404 | cld |
Denys Vlasenko | 4c9c0e9 | 2015-03-31 19:00:04 +0200 | [diff] [blame] | 405 | |
Denys Vlasenko | 73cbf68 | 2015-06-02 21:04:02 +0200 | [diff] [blame] | 406 | /* |
Andy Lutomirski | ee08c6b | 2015-10-05 17:48:09 -0700 | [diff] [blame] | 407 | * User mode is traced as though IRQs are on, and the interrupt |
| 408 | * gate turned them off. |
Denys Vlasenko | 73cbf68 | 2015-06-02 21:04:02 +0200 | [diff] [blame] | 409 | */ |
Andy Lutomirski | ee08c6b | 2015-10-05 17:48:09 -0700 | [diff] [blame] | 410 | TRACE_IRQS_OFF |
| 411 | |
| 412 | movq %rsp, %rdi |
Andy Lutomirski | a798f09 | 2016-03-09 13:24:32 -0800 | [diff] [blame] | 413 | call do_int80_syscall_32 |
Andy Lutomirski | a474e67 | 2015-10-05 17:48:11 -0700 | [diff] [blame] | 414 | .Lsyscall_32_done: |
Andy Lutomirski | ee08c6b | 2015-10-05 17:48:09 -0700 | [diff] [blame] | 415 | |
| 416 | /* Go back to user mode. */ |
| 417 | TRACE_IRQS_ON |
Andy Lutomirski | 8a055d7 | 2017-11-02 00:59:00 -0700 | [diff] [blame] | 418 | jmp swapgs_restore_regs_and_return_to_usermode |
Ingo Molnar | 2cd2355 | 2015-06-08 08:28:07 +0200 | [diff] [blame] | 419 | END(entry_INT80_compat) |