Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 2 | /* |
Juergen Gross | edcb5cf | 2017-08-16 19:31:56 +0200 | [diff] [blame] | 3 | * Asm versions of Xen pv-ops, suitable for direct use. |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 4 | * |
| 5 | * We only bother with direct forms (ie, vcpu in pda) of the |
Juergen Gross | edcb5cf | 2017-08-16 19:31:56 +0200 | [diff] [blame] | 6 | * operations here; the indirect forms are better handled in C. |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 7 | */ |
| 8 | |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 9 | #include <asm/thread_info.h> |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 10 | #include <asm/processor-flags.h> |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 11 | #include <asm/segment.h> |
H. Peter Anvin | 8f6380b | 2012-04-20 12:19:52 -0700 | [diff] [blame] | 12 | #include <asm/asm.h> |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 13 | |
| 14 | #include <xen/interface/xen.h> |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 15 | |
Juergen Gross | edcb5cf | 2017-08-16 19:31:56 +0200 | [diff] [blame] | 16 | #include <linux/linkage.h> |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 17 | |
Juergen Gross | edcb5cf | 2017-08-16 19:31:56 +0200 | [diff] [blame] | 18 | /* Pseudo-flag used for virtual NMI, which we don't implement yet */ |
| 19 | #define XEN_EFLAGS_NMI 0x80000000 |
Jeremy Fitzhardinge | 6487673 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 20 | |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 21 | /* |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 22 | * This is run where a normal iret would be run, with the same stack setup: |
| 23 | * 8: eflags |
| 24 | * 4: cs |
| 25 | * esp-> 0: eip |
| 26 | * |
| 27 | * This attempts to make sure that any pending events are dealt with |
| 28 | * on return to usermode, but there is a small window in which an |
| 29 | * event can happen just before entering usermode. If the nested |
| 30 | * interrupt ends up setting one of the TIF_WORK_MASK pending work |
| 31 | * flags, they will not be tested again before returning to |
| 32 | * usermode. This means that a process can end up with pending work, |
| 33 | * which will be unprocessed until the process enters and leaves the |
| 34 | * kernel again, which could be an unbounded amount of time. This |
| 35 | * means that a pending signal or reschedule event could be |
| 36 | * indefinitely delayed. |
| 37 | * |
| 38 | * The fix is to notice a nested interrupt in the critical window, and |
| 39 | * if one occurs, then fold the nested interrupt into the current |
| 40 | * interrupt stack frame, and re-process it iteratively rather than |
| 41 | * recursively. This means that it will exit via the normal path, and |
| 42 | * all pending work will be dealt with appropriately. |
| 43 | * |
| 44 | * Because the nested interrupt handler needs to deal with the current |
| 45 | * stack state in whatever form its in, we keep things simple by only |
| 46 | * using a single register which is pushed/popped on the stack. |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 47 | */ |
Boris Ostrovsky | 4461bbc | 2014-04-10 12:17:09 -0400 | [diff] [blame] | 48 | |
| 49 | .macro POP_FS |
| 50 | 1: |
| 51 | popw %fs |
| 52 | .pushsection .fixup, "ax" |
| 53 | 2: movw $0, (%esp) |
| 54 | jmp 1b |
| 55 | .popsection |
| 56 | _ASM_EXTABLE(1b,2b) |
| 57 | .endm |
| 58 | |
Jiri Slaby | 78762b0 | 2019-10-11 13:51:05 +0200 | [diff] [blame] | 59 | SYM_CODE_START(xen_iret) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 60 | /* test eflags for special cases */ |
| 61 | testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) |
| 62 | jnz hyper_iret |
| 63 | |
| 64 | push %eax |
| 65 | ESP_OFFSET=4 # bytes pushed onto stack |
| 66 | |
Boris Ostrovsky | 4461bbc | 2014-04-10 12:17:09 -0400 | [diff] [blame] | 67 | /* Store vcpu_info pointer for easy access */ |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 68 | #ifdef CONFIG_SMP |
Boris Ostrovsky | 4461bbc | 2014-04-10 12:17:09 -0400 | [diff] [blame] | 69 | pushw %fs |
| 70 | movl $(__KERNEL_PERCPU), %eax |
| 71 | movl %eax, %fs |
| 72 | movl %fs:xen_vcpu, %eax |
| 73 | POP_FS |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 74 | #else |
Jan Beulich | 13d2b4d | 2013-01-24 13:11:10 +0000 | [diff] [blame] | 75 | movl %ss:xen_vcpu, %eax |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 76 | #endif |
| 77 | |
| 78 | /* check IF state we're restoring */ |
| 79 | testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) |
| 80 | |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 81 | /* |
| 82 | * Maybe enable events. Once this happens we could get a |
| 83 | * recursive event, so the critical region starts immediately |
| 84 | * afterwards. However, if that happens we don't end up |
| 85 | * resuming the code, so we don't have to be worried about |
| 86 | * being preempted to another CPU. |
| 87 | */ |
Jan Beulich | 13d2b4d | 2013-01-24 13:11:10 +0000 | [diff] [blame] | 88 | setz %ss:XEN_vcpu_info_mask(%eax) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 89 | xen_iret_start_crit: |
| 90 | |
| 91 | /* check for unmasked and pending */ |
Jan Beulich | 13d2b4d | 2013-01-24 13:11:10 +0000 | [diff] [blame] | 92 | cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 93 | |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 94 | /* |
| 95 | * If there's something pending, mask events again so we can |
Igor Mammedov | d198d49 | 2011-09-01 13:46:55 +0200 | [diff] [blame] | 96 | * jump back into xen_hypervisor_callback. Otherwise do not |
| 97 | * touch XEN_vcpu_info_mask. |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 98 | */ |
Igor Mammedov | d198d49 | 2011-09-01 13:46:55 +0200 | [diff] [blame] | 99 | jne 1f |
Jan Beulich | 13d2b4d | 2013-01-24 13:11:10 +0000 | [diff] [blame] | 100 | movb $1, %ss:XEN_vcpu_info_mask(%eax) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 101 | |
Igor Mammedov | d198d49 | 2011-09-01 13:46:55 +0200 | [diff] [blame] | 102 | 1: popl %eax |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 103 | |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 104 | /* |
| 105 | * From this point on the registers are restored and the stack |
| 106 | * updated, so we don't need to worry about it if we're |
| 107 | * preempted |
| 108 | */ |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 109 | iret_restore_end: |
| 110 | |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 111 | /* |
| 112 | * Jump to hypervisor_callback after fixing up the stack. |
| 113 | * Events are masked, so jumping out of the critical region is |
| 114 | * OK. |
| 115 | */ |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 116 | je xen_hypervisor_callback |
| 117 | |
Jeremy Fitzhardinge | 90e9f53 | 2008-03-17 16:37:12 -0700 | [diff] [blame] | 118 | 1: iret |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 119 | xen_iret_end_crit: |
H. Peter Anvin | 8f6380b | 2012-04-20 12:19:52 -0700 | [diff] [blame] | 120 | _ASM_EXTABLE(1b, iret_exc) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 121 | |
| 122 | hyper_iret: |
| 123 | /* put this out of line since its very rarely used */ |
| 124 | jmp hypercall_page + __HYPERVISOR_iret * 32 |
Jiri Slaby | 78762b0 | 2019-10-11 13:51:05 +0200 | [diff] [blame] | 125 | SYM_CODE_END(xen_iret) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 126 | |
| 127 | .globl xen_iret_start_crit, xen_iret_end_crit |
| 128 | |
| 129 | /* |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 130 | * This is called by xen_hypervisor_callback in entry_32.S when it sees |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 131 | * that the EIP at the time of interrupt was between |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 132 | * xen_iret_start_crit and xen_iret_end_crit. |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 133 | * |
| 134 | * The stack format at this point is: |
| 135 | * ---------------- |
| 136 | * ss : (ss/esp may be present if we came from usermode) |
| 137 | * esp : |
| 138 | * eflags } outer exception info |
| 139 | * cs } |
| 140 | * eip } |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 141 | * ---------------- |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 142 | * eax : outer eax if it hasn't been restored |
| 143 | * ---------------- |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 144 | * eflags } |
| 145 | * cs } nested exception info |
| 146 | * eip } |
| 147 | * return address : (into xen_hypervisor_callback) |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 148 | * |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 149 | * In order to deliver the nested exception properly, we need to discard the |
| 150 | * nested exception frame such that when we handle the exception, we do it |
| 151 | * in the context of the outer exception rather than starting a new one. |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 152 | * |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 153 | * The only caveat is that if the outer eax hasn't been restored yet (i.e. |
| 154 | * it's still on stack), we need to restore its value here. |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 155 | */ |
Jiri Slaby | 78762b0 | 2019-10-11 13:51:05 +0200 | [diff] [blame] | 156 | SYM_CODE_START(xen_iret_crit_fixup) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 157 | /* |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 158 | * Paranoia: Make sure we're really coming from kernel space. |
| 159 | * One could imagine a case where userspace jumps into the |
| 160 | * critical range address, but just before the CPU delivers a |
Jan Beulich | 922eea2 | 2019-11-11 15:32:59 +0100 | [diff] [blame] | 161 | * PF, it decides to deliver an interrupt instead. Unlikely? |
| 162 | * Definitely. Easy to avoid? Yes. |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 163 | */ |
Jan Beulich | 922eea2 | 2019-11-11 15:32:59 +0100 | [diff] [blame] | 164 | testb $2, 2*4(%esp) /* nested CS */ |
| 165 | jnz 2f |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 166 | |
Tejun Heo | 130ace1 | 2009-02-06 00:57:48 +0900 | [diff] [blame] | 167 | /* |
| 168 | * If eip is before iret_restore_end then stack |
| 169 | * hasn't been restored yet. |
| 170 | */ |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 171 | cmpl $iret_restore_end, 1*4(%esp) |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 172 | jae 1f |
| 173 | |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 174 | movl 4*4(%esp), %eax /* load outer EAX */ |
| 175 | ret $4*4 /* discard nested EIP, CS, and EFLAGS as |
| 176 | * well as the just restored EAX */ |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 177 | |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 178 | 1: |
| 179 | ret $3*4 /* discard nested EIP, CS, and EFLAGS */ |
Jeremy Fitzhardinge | 9ec2b80 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 180 | |
Jan Beulich | 29b810f | 2019-11-11 15:32:12 +0100 | [diff] [blame] | 181 | 2: |
| 182 | ret |
Jiri Slaby | 78762b0 | 2019-10-11 13:51:05 +0200 | [diff] [blame] | 183 | SYM_CODE_END(xen_iret_crit_fixup) |