blob: f17d8c9050c01ccedec89ae27d8184404817c569 [file] [log] [blame]
Joerg Roedel883b0a92020-03-24 10:41:52 +01001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM support
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9 *
10 * Authors:
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
13 */
14
15#define pr_fmt(fmt) "SVM: " fmt
16
17#include <linux/kvm_types.h>
18#include <linux/kvm_host.h>
19#include <linux/kernel.h>
20
21#include <asm/msr-index.h>
Paolo Bonzini5679b802020-05-04 11:28:25 -040022#include <asm/debugreg.h>
Joerg Roedel883b0a92020-03-24 10:41:52 +010023
24#include "kvm_emulate.h"
25#include "trace.h"
26#include "mmu.h"
27#include "x86.h"
Paolo Bonzinicc440cd2020-05-13 13:36:32 -040028#include "cpuid.h"
Paolo Bonzini5b6724082020-05-16 08:50:35 -040029#include "lapic.h"
Joerg Roedel883b0a92020-03-24 10:41:52 +010030#include "svm.h"
31
Sean Christopherson11f0cbf2021-02-03 16:01:17 -080032#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
33
Joerg Roedel883b0a92020-03-24 10:41:52 +010034static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
35 struct x86_exception *fault)
36{
37 struct vcpu_svm *svm = to_svm(vcpu);
38
39 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
40 /*
41 * TODO: track the cause of the nested page fault, and
42 * correctly fill in the high bits of exit_info_1.
43 */
44 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
45 svm->vmcb->control.exit_code_hi = 0;
46 svm->vmcb->control.exit_info_1 = (1ULL << 32);
47 svm->vmcb->control.exit_info_2 = fault->address;
48 }
49
50 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
51 svm->vmcb->control.exit_info_1 |= fault->error_code;
52
Joerg Roedel883b0a92020-03-24 10:41:52 +010053 nested_svm_vmexit(svm);
54}
55
Paolo Bonzinia04aead2021-02-18 07:16:59 -050056static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
57{
58 struct vcpu_svm *svm = to_svm(vcpu);
59 WARN_ON(!is_guest_mode(vcpu));
60
61 if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
62 !svm->nested.nested_run_pending) {
63 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
64 svm->vmcb->control.exit_code_hi = 0;
65 svm->vmcb->control.exit_info_1 = fault->error_code;
66 svm->vmcb->control.exit_info_2 = fault->address;
67 nested_svm_vmexit(svm);
68 } else {
69 kvm_inject_page_fault(vcpu, fault);
70 }
71}
72
Joerg Roedel883b0a92020-03-24 10:41:52 +010073static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
74{
75 struct vcpu_svm *svm = to_svm(vcpu);
Paolo Bonzinie670bf62020-05-13 13:16:12 -040076 u64 cr3 = svm->nested.ctl.nested_cr3;
Joerg Roedel883b0a92020-03-24 10:41:52 +010077 u64 pdpte;
78 int ret;
79
Sean Christopherson2732be92021-02-03 16:01:07 -080080 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
Joerg Roedel883b0a92020-03-24 10:41:52 +010081 offset_in_page(cr3) + index * 8, 8);
82 if (ret)
83 return 0;
84 return pdpte;
85}
86
87static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
88{
89 struct vcpu_svm *svm = to_svm(vcpu);
90
Paolo Bonzinie670bf62020-05-13 13:16:12 -040091 return svm->nested.ctl.nested_cr3;
Joerg Roedel883b0a92020-03-24 10:41:52 +010092}
93
94static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
95{
Paolo Bonzini929d1cf2020-05-19 06:18:31 -040096 struct vcpu_svm *svm = to_svm(vcpu);
Paolo Bonzini929d1cf2020-05-19 06:18:31 -040097
Joerg Roedel883b0a92020-03-24 10:41:52 +010098 WARN_ON(mmu_is_nested(vcpu));
99
100 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
Sean Christopherson31e96bc2021-06-22 10:57:00 -0700101
102 /*
103 * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note,
104 * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
105 * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required.
106 */
Cathy Avery4995a362021-01-13 07:07:52 -0500107 kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
108 svm->vmcb01.ptr->save.efer,
Vitaly Kuznetsov0f04a2a2020-07-10 16:11:49 +0200109 svm->nested.ctl.nested_cr3);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100110 vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
111 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
112 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100113 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
114 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
115}
116
117static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
118{
119 vcpu->arch.mmu = &vcpu->arch.root_mmu;
120 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
121}
122
123void recalc_intercepts(struct vcpu_svm *svm)
124{
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400125 struct vmcb_control_area *c, *h, *g;
Babu Mogerc45ad722020-09-11 14:27:58 -0500126 unsigned int i;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100127
Joerg Roedel06e78522020-06-25 10:03:23 +0200128 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100129
130 if (!is_guest_mode(&svm->vcpu))
131 return;
132
133 c = &svm->vmcb->control;
Cathy Avery4995a362021-01-13 07:07:52 -0500134 h = &svm->vmcb01.ptr->control;
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400135 g = &svm->nested.ctl;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100136
Babu Mogerc45ad722020-09-11 14:27:58 -0500137 for (i = 0; i < MAX_INTERCEPT; i++)
138 c->intercepts[i] = h->intercepts[i];
139
Paolo Bonzinie9fd7612020-05-13 13:28:23 -0400140 if (g->int_ctl & V_INTR_MASKING_MASK) {
Joerg Roedel883b0a92020-03-24 10:41:52 +0100141 /* We only want the cr8 intercept bits of L1 */
Babu Moger03bfeeb2020-09-11 14:28:05 -0500142 vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
143 vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100144
145 /*
146 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
147 * affect any interrupt we may want to inject; therefore,
148 * interrupt window vmexits are irrelevant to L0.
149 */
Babu Mogerc62e2e92020-09-11 14:28:28 -0500150 vmcb_clr_intercept(c, INTERCEPT_VINTR);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100151 }
152
153 /* We don't want to see VMMCALLs from a nested guest */
Babu Mogerc62e2e92020-09-11 14:28:28 -0500154 vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100155
Babu Mogerc45ad722020-09-11 14:27:58 -0500156 for (i = 0; i < MAX_INTERCEPT; i++)
157 c->intercepts[i] |= g->intercepts[i];
Joerg Roedel883b0a92020-03-24 10:41:52 +0100158}
159
Paolo Bonzini2f675912020-05-18 15:21:22 -0400160static void copy_vmcb_control_area(struct vmcb_control_area *dst,
161 struct vmcb_control_area *from)
Joerg Roedel883b0a92020-03-24 10:41:52 +0100162{
Babu Mogerc45ad722020-09-11 14:27:58 -0500163 unsigned int i;
164
165 for (i = 0; i < MAX_INTERCEPT; i++)
166 dst->intercepts[i] = from->intercepts[i];
167
Joerg Roedel883b0a92020-03-24 10:41:52 +0100168 dst->iopm_base_pa = from->iopm_base_pa;
169 dst->msrpm_base_pa = from->msrpm_base_pa;
170 dst->tsc_offset = from->tsc_offset;
Paolo Bonzini6c0238c2020-05-20 08:02:17 -0400171 /* asid not copied, it is handled manually for svm->vmcb. */
Joerg Roedel883b0a92020-03-24 10:41:52 +0100172 dst->tlb_ctl = from->tlb_ctl;
173 dst->int_ctl = from->int_ctl;
174 dst->int_vector = from->int_vector;
175 dst->int_state = from->int_state;
176 dst->exit_code = from->exit_code;
177 dst->exit_code_hi = from->exit_code_hi;
178 dst->exit_info_1 = from->exit_info_1;
179 dst->exit_info_2 = from->exit_info_2;
180 dst->exit_int_info = from->exit_int_info;
181 dst->exit_int_info_err = from->exit_int_info_err;
182 dst->nested_ctl = from->nested_ctl;
183 dst->event_inj = from->event_inj;
184 dst->event_inj_err = from->event_inj_err;
185 dst->nested_cr3 = from->nested_cr3;
186 dst->virt_ext = from->virt_ext;
187 dst->pause_filter_count = from->pause_filter_count;
188 dst->pause_filter_thresh = from->pause_filter_thresh;
189}
190
191static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
192{
193 /*
194 * This function merges the msr permission bitmaps of kvm and the
195 * nested vmcb. It is optimized in that it only merges the parts where
196 * the kvm msr permission bitmap may contain zero bits
197 */
198 int i;
199
Babu Mogerc62e2e92020-09-11 14:28:28 -0500200 if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100201 return true;
202
203 for (i = 0; i < MSRPM_OFFSETS; i++) {
204 u32 value, p;
205 u64 offset;
206
207 if (msrpm_offsets[i] == 0xffffffff)
208 break;
209
210 p = msrpm_offsets[i];
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400211 offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100212
213 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
214 return false;
215
216 svm->nested.msrpm[p] = svm->msrpm[p] | value;
217 }
218
219 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
220
221 return true;
222}
223
Krish Sadhukhanee695f22021-04-12 17:56:08 -0400224/*
225 * Bits 11:0 of bitmap address are ignored by hardware
226 */
227static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
228{
229 u64 addr = PAGE_ALIGN(pa);
230
231 return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
232 kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
233}
234
235static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
236 struct vmcb_control_area *control)
Paolo Bonzinica46d732020-05-18 13:02:15 -0400237{
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800238 if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
Paolo Bonzinica46d732020-05-18 13:02:15 -0400239 return false;
240
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800241 if (CC(control->asid == 0))
Paolo Bonzinica46d732020-05-18 13:02:15 -0400242 return false;
243
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800244 if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
Paolo Bonzinica46d732020-05-18 13:02:15 -0400245 return false;
246
Krish Sadhukhanee695f22021-04-12 17:56:08 -0400247 if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
248 MSRPM_SIZE)))
249 return false;
250 if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
251 IOPM_SIZE)))
252 return false;
253
Paolo Bonzinica46d732020-05-18 13:02:15 -0400254 return true;
255}
256
Paolo Bonzini63129752021-03-02 14:40:39 -0500257static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000258 struct vmcb_save_area *save)
Joerg Roedel883b0a92020-03-24 10:41:52 +0100259{
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000260 /*
261 * These checks are also performed by KVM_SET_SREGS,
262 * except that EFER.LMA is not checked by SVM against
263 * CR0.PG && EFER.LME.
264 */
265 if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800266 if (CC(!(save->cr4 & X86_CR4_PAE)) ||
267 CC(!(save->cr0 & X86_CR0_PE)) ||
268 CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
Krish Sadhukhan761e4162020-07-08 00:39:56 +0000269 return false;
270 }
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000271
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800272 if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
273 return false;
274
275 return true;
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000276}
277
278/* Common checks that apply to both L1 and L2 state. */
Paolo Bonzini63129752021-03-02 14:40:39 -0500279static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000280 struct vmcb_save_area *save)
281{
Paolo Bonzini3c346c02021-03-31 06:28:01 -0400282 /*
283 * FIXME: these should be done after copying the fields,
284 * to avoid TOC/TOU races. For these save area checks
285 * the possible damage is limited since kvm_set_cr0 and
286 * kvm_set_cr4 handle failure; EFER_SVME is an exception
287 * so it is force-set later in nested_prepare_vmcb_save.
288 */
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800289 if (CC(!(save->efer & EFER_SVME)))
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000290 return false;
291
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800292 if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
293 CC(save->cr0 & ~0xffffffffULL))
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000294 return false;
295
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800296 if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000297 return false;
298
Paolo Bonzini63129752021-03-02 14:40:39 -0500299 if (!nested_vmcb_check_cr3_cr4(vcpu, save))
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000300 return false;
301
Paolo Bonzini63129752021-03-02 14:40:39 -0500302 if (CC(!kvm_valid_efer(vcpu, save->efer)))
Krish Sadhukhan6906e062020-10-06 19:06:52 +0000303 return false;
304
305 return true;
306}
307
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500308static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
309 struct vmcb_control_area *control)
Paolo Bonzini3e06f012020-05-13 13:07:26 -0400310{
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400311 copy_vmcb_control_area(&svm->nested.ctl, control);
Paolo Bonzini3e06f012020-05-13 13:07:26 -0400312
Paolo Bonzinicc440cd2020-05-13 13:36:32 -0400313 /* Copy it here because nested_svm_check_controls will check it. */
314 svm->nested.ctl.asid = control->asid;
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400315 svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
316 svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
Paolo Bonzini3e06f012020-05-13 13:07:26 -0400317}
318
Paolo Bonzini2d8a42b2020-05-22 03:50:14 -0400319/*
320 * Synchronize fields that are written by the processor, so that
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500321 * they can be copied back into the vmcb12.
Paolo Bonzini2d8a42b2020-05-22 03:50:14 -0400322 */
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500323void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
Paolo Bonzini2d8a42b2020-05-22 03:50:14 -0400324{
325 u32 mask;
326 svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
327 svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
328
329 /* Only a few fields of int_ctl are written by the processor. */
330 mask = V_IRQ_MASK | V_TPR_MASK;
331 if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
Joerg Roedela284ba52020-06-25 10:03:24 +0200332 svm_is_intercept(svm, INTERCEPT_VINTR)) {
Paolo Bonzini2d8a42b2020-05-22 03:50:14 -0400333 /*
334 * In order to request an interrupt window, L0 is usurping
335 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
336 * even if it was clear in L1's VMCB. Restoring it would be
337 * wrong. However, in this case V_IRQ will remain true until
338 * interrupt_window_interception calls svm_clear_vintr and
339 * restores int_ctl. We can just leave it aside.
340 */
341 mask &= ~V_IRQ_MASK;
342 }
343 svm->nested.ctl.int_ctl &= ~mask;
344 svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
345}
346
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400347/*
348 * Transfer any event that L0 or L1 wanted to inject into L2 to
349 * EXIT_INT_INFO.
350 */
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500351static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
352 struct vmcb *vmcb12)
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400353{
354 struct kvm_vcpu *vcpu = &svm->vcpu;
355 u32 exit_int_info = 0;
356 unsigned int nr;
357
358 if (vcpu->arch.exception.injected) {
359 nr = vcpu->arch.exception.nr;
360 exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
361
362 if (vcpu->arch.exception.has_error_code) {
363 exit_int_info |= SVM_EVTINJ_VALID_ERR;
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300364 vmcb12->control.exit_int_info_err =
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400365 vcpu->arch.exception.error_code;
366 }
367
368 } else if (vcpu->arch.nmi_injected) {
369 exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
370
371 } else if (vcpu->arch.interrupt.injected) {
372 nr = vcpu->arch.interrupt.nr;
373 exit_int_info = nr | SVM_EVTINJ_VALID;
374
375 if (vcpu->arch.interrupt.soft)
376 exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
377 else
378 exit_int_info |= SVM_EVTINJ_TYPE_INTR;
379 }
380
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300381 vmcb12->control.exit_int_info = exit_int_info;
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400382}
383
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200384static inline bool nested_npt_enabled(struct vcpu_svm *svm)
385{
386 return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
387}
388
Sean Christophersond2e56012021-06-09 16:42:26 -0700389static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
390{
391 /*
392 * TODO: optimize unconditional TLB flush/MMU sync. A partial list of
393 * things to fix before this can be conditional:
394 *
395 * - Flush TLBs for both L1 and L2 remote TLB flush
396 * - Honor L1's request to flush an ASID on nested VMRUN
397 * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
398 * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
399 * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
400 *
401 * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
402 * NPT guest-physical mappings on VMRUN.
403 */
404 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
405 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
406}
407
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200408/*
Vitaly Kuznetsovd82aaef2020-07-10 16:11:56 +0200409 * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
410 * if we are emulating VM-Entry into a guest with NPT enabled.
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200411 */
412static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
Maxim Levitskyb222b0b2021-06-07 12:01:59 +0300413 bool nested_npt, bool reload_pdptrs)
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200414{
Sean Christopherson11f0cbf2021-02-03 16:01:17 -0800415 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200416 return -EINVAL;
417
Maxim Levitskyb222b0b2021-06-07 12:01:59 +0300418 if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
Sean Christophersona36dbec62021-06-07 12:01:57 +0300419 CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
420 return -EINVAL;
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200421
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200422 if (!nested_npt)
Sean Christophersonb5129102021-06-09 16:42:27 -0700423 kvm_mmu_new_pgd(vcpu, cr3);
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200424
425 vcpu->arch.cr3 = cr3;
426 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
427
Sean Christophersonc9060662021-06-09 16:42:33 -0700428 kvm_init_mmu(vcpu);
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200429
430 return 0;
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200431}
432
Cathy Avery4995a362021-01-13 07:07:52 -0500433void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
434{
435 if (!svm->nested.vmcb02.ptr)
436 return;
437
438 /* FIXME: merge g_pat from vmcb01 and vmcb12. */
439 svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
440}
441
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500442static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
Joerg Roedel883b0a92020-03-24 10:41:52 +0100443{
Cathy Avery81733962021-03-01 15:08:44 -0500444 bool new_vmcb12 = false;
445
Cathy Avery4995a362021-01-13 07:07:52 -0500446 nested_vmcb02_compute_g_pat(svm);
447
Joerg Roedel883b0a92020-03-24 10:41:52 +0100448 /* Load the nested guest state */
Cathy Avery81733962021-03-01 15:08:44 -0500449 if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
450 new_vmcb12 = true;
451 svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
452 }
453
454 if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
455 svm->vmcb->save.es = vmcb12->save.es;
456 svm->vmcb->save.cs = vmcb12->save.cs;
457 svm->vmcb->save.ss = vmcb12->save.ss;
458 svm->vmcb->save.ds = vmcb12->save.ds;
459 svm->vmcb->save.cpl = vmcb12->save.cpl;
460 vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
461 }
462
463 if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
464 svm->vmcb->save.gdtr = vmcb12->save.gdtr;
465 svm->vmcb->save.idtr = vmcb12->save.idtr;
466 vmcb_mark_dirty(svm->vmcb, VMCB_DT);
467 }
Paolo Bonzini4bb170a2020-11-16 06:38:19 -0500468
Paolo Bonzini8cce12b2020-11-27 12:46:36 -0500469 kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
Paolo Bonzini3c346c02021-03-31 06:28:01 -0400470
471 /*
472 * Force-set EFER_SVME even though it is checked earlier on the
473 * VMCB12, because the guest can flip the bit between the check
474 * and now. Clearing EFER_SVME would call svm_free_nested.
475 */
476 svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
477
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300478 svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
479 svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
Paolo Bonzini4bb170a2020-11-16 06:38:19 -0500480
481 svm->vcpu.arch.cr2 = vmcb12->save.cr2;
Cathy Avery81733962021-03-01 15:08:44 -0500482
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300483 kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
484 kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
485 kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100486
487 /* In case we don't even reach vcpu_run, the fields are not updated */
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300488 svm->vmcb->save.rax = vmcb12->save.rax;
489 svm->vmcb->save.rsp = vmcb12->save.rsp;
490 svm->vmcb->save.rip = vmcb12->save.rip;
Paolo Bonzini4bb170a2020-11-16 06:38:19 -0500491
Cathy Avery81733962021-03-01 15:08:44 -0500492 /* These bits will be set properly on the first execution when new_vmc12 is true */
493 if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
494 svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
495 svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
496 vmcb_mark_dirty(svm->vmcb, VMCB_DR);
497 }
Paolo Bonzinif241d712020-05-18 10:56:43 -0400498}
Joerg Roedel883b0a92020-03-24 10:41:52 +0100499
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500500static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
Paolo Bonzinif241d712020-05-18 10:56:43 -0400501{
Paolo Bonzini91b71302020-05-22 12:28:52 -0400502 const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
Sean Christophersond2e56012021-06-09 16:42:26 -0700503 struct kvm_vcpu *vcpu = &svm->vcpu;
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200504
Paolo Bonzini7c3ecfc2020-11-16 06:13:15 -0500505 /*
506 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
507 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
508 */
Cathy Avery4995a362021-01-13 07:07:52 -0500509
Paolo Bonzini7c3ecfc2020-11-16 06:13:15 -0500510 /*
511 * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
512 * avic_physical_id.
513 */
514 WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
515
516 /* Copied from vmcb01. msrpm_base can be overwritten later. */
517 svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
518 svm->vmcb->control.iopm_base_pa = svm->vmcb01.ptr->control.iopm_base_pa;
519 svm->vmcb->control.msrpm_base_pa = svm->vmcb01.ptr->control.msrpm_base_pa;
520
521 /* Done at vmrun: asid. */
522
523 /* Also overwritten later if necessary. */
524 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
525
526 /* nested_cr3. */
Vitaly Kuznetsov62156f62020-07-10 16:11:53 +0200527 if (nested_npt_enabled(svm))
Sean Christophersond2e56012021-06-09 16:42:26 -0700528 nested_svm_init_mmu_context(vcpu);
Paolo Bonzini69cb8772020-05-22 05:27:46 -0400529
Sean Christophersond2e56012021-06-09 16:42:26 -0700530 svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset =
531 vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100532
Paolo Bonzini91b71302020-05-22 12:28:52 -0400533 svm->vmcb->control.int_ctl =
534 (svm->nested.ctl.int_ctl & ~mask) |
Cathy Avery4995a362021-01-13 07:07:52 -0500535 (svm->vmcb01.ptr->control.int_ctl & mask);
Paolo Bonzini91b71302020-05-22 12:28:52 -0400536
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400537 svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
538 svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
539 svm->vmcb->control.int_state = svm->nested.ctl.int_state;
540 svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
541 svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100542
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400543 svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count;
544 svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100545
Sean Christophersond2e56012021-06-09 16:42:26 -0700546 nested_svm_transition_tlb_flush(vcpu);
547
Joerg Roedel883b0a92020-03-24 10:41:52 +0100548 /* Enter Guest-Mode */
Sean Christophersond2e56012021-06-09 16:42:26 -0700549 enter_guest_mode(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100550
551 /*
Paolo Bonzini4bb170a2020-11-16 06:38:19 -0500552 * Merge guest and host intercepts - must be called with vcpu in
553 * guest-mode to take effect.
Joerg Roedel883b0a92020-03-24 10:41:52 +0100554 */
555 recalc_intercepts(svm);
Paolo Bonzinif241d712020-05-18 10:56:43 -0400556}
557
Babu Mogerd00b99c2021-02-17 10:56:04 -0500558static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
559{
560 /*
561 * Some VMCB state is shared between L1 and L2 and thus has to be
562 * moved at the time of nested vmrun and vmexit.
563 *
564 * VMLOAD/VMSAVE state would also belong in this category, but KVM
565 * always performs VMLOAD and VMSAVE from the VMCB01.
566 */
567 to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
568}
569
Paolo Bonzini63129752021-03-02 14:40:39 -0500570int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300571 struct vmcb *vmcb12)
Paolo Bonzinif241d712020-05-18 10:56:43 -0400572{
Paolo Bonzini63129752021-03-02 14:40:39 -0500573 struct vcpu_svm *svm = to_svm(vcpu);
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200574 int ret;
575
Maxim Levitsky954f4192021-02-17 16:57:13 +0200576 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
577 vmcb12->save.rip,
578 vmcb12->control.int_ctl,
579 vmcb12->control.event_inj,
580 vmcb12->control.nested_ctl);
581
582 trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
583 vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
584 vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
585 vmcb12->control.intercepts[INTERCEPT_WORD3],
586 vmcb12->control.intercepts[INTERCEPT_WORD4],
587 vmcb12->control.intercepts[INTERCEPT_WORD5]);
588
589
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300590 svm->nested.vmcb12_gpa = vmcb12_gpa;
Cathy Avery4995a362021-01-13 07:07:52 -0500591
592 WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
593
Babu Mogerd00b99c2021-02-17 10:56:04 -0500594 nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
Cathy Avery4995a362021-01-13 07:07:52 -0500595
596 svm_switch_vmcb(svm, &svm->nested.vmcb02);
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500597 nested_vmcb02_prepare_control(svm);
598 nested_vmcb02_prepare_save(svm, vmcb12);
Paolo Bonzinif241d712020-05-18 10:56:43 -0400599
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300600 ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
Maxim Levitskyb222b0b2021-06-07 12:01:59 +0300601 nested_npt_enabled(svm), true);
Vitaly Kuznetsova506fdd2020-07-10 16:11:55 +0200602 if (ret)
603 return ret;
604
Paolo Bonzinia04aead2021-02-18 07:16:59 -0500605 if (!npt_enabled)
Paolo Bonzini63129752021-03-02 14:40:39 -0500606 vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
Paolo Bonzinia04aead2021-02-18 07:16:59 -0500607
Paolo Bonziniffdf7f92020-05-22 12:18:27 -0400608 svm_set_gif(svm, true);
Vitaly Kuznetsov59cd9bc2020-07-10 16:11:52 +0200609
610 return 0;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100611}
612
Paolo Bonzini63129752021-03-02 14:40:39 -0500613int nested_svm_vmrun(struct kvm_vcpu *vcpu)
Joerg Roedel883b0a92020-03-24 10:41:52 +0100614{
Paolo Bonzini63129752021-03-02 14:40:39 -0500615 struct vcpu_svm *svm = to_svm(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100616 int ret;
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300617 struct vmcb *vmcb12;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100618 struct kvm_host_map map;
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300619 u64 vmcb12_gpa;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100620
Paolo Bonzini63129752021-03-02 14:40:39 -0500621 if (is_smm(vcpu)) {
622 kvm_queue_exception(vcpu, UD_VECTOR);
Paolo Bonzini7c67f5462020-04-23 10:52:48 -0400623 return 1;
624 }
Joerg Roedel883b0a92020-03-24 10:41:52 +0100625
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300626 vmcb12_gpa = svm->vmcb->save.rax;
Paolo Bonzini63129752021-03-02 14:40:39 -0500627 ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100628 if (ret == -EINVAL) {
Paolo Bonzini63129752021-03-02 14:40:39 -0500629 kvm_inject_gp(vcpu, 0);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100630 return 1;
631 } else if (ret) {
Paolo Bonzini63129752021-03-02 14:40:39 -0500632 return kvm_skip_emulated_instruction(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100633 }
634
Paolo Bonzini63129752021-03-02 14:40:39 -0500635 ret = kvm_skip_emulated_instruction(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100636
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300637 vmcb12 = map.hva;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100638
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300639 if (WARN_ON_ONCE(!svm->nested.initialized))
640 return -EINVAL;
641
Paolo Bonzinicb9b6a12021-03-31 07:35:52 -0400642 nested_load_control_from_vmcb12(svm, &vmcb12->control);
643
644 if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
Krish Sadhukhanee695f22021-04-12 17:56:08 -0400645 !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300646 vmcb12->control.exit_code = SVM_EXIT_ERR;
647 vmcb12->control.exit_code_hi = 0;
648 vmcb12->control.exit_info_1 = 0;
649 vmcb12->control.exit_info_2 = 0;
Paolo Bonzini69c9dfa2020-05-13 12:57:26 -0400650 goto out;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100651 }
652
Joerg Roedel883b0a92020-03-24 10:41:52 +0100653
654 /* Clear internal status */
Paolo Bonzini63129752021-03-02 14:40:39 -0500655 kvm_clear_exception_queue(vcpu);
656 kvm_clear_interrupt_queue(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100657
658 /*
Cathy Avery4995a362021-01-13 07:07:52 -0500659 * Since vmcb01 is not in use, we can use it to store some of the L1
660 * state.
Joerg Roedel883b0a92020-03-24 10:41:52 +0100661 */
Paolo Bonzini63129752021-03-02 14:40:39 -0500662 svm->vmcb01.ptr->save.efer = vcpu->arch.efer;
663 svm->vmcb01.ptr->save.cr0 = kvm_read_cr0(vcpu);
664 svm->vmcb01.ptr->save.cr4 = vcpu->arch.cr4;
665 svm->vmcb01.ptr->save.rflags = kvm_get_rflags(vcpu);
666 svm->vmcb01.ptr->save.rip = kvm_rip_read(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100667
Cathy Avery4995a362021-01-13 07:07:52 -0500668 if (!npt_enabled)
Paolo Bonzini63129752021-03-02 14:40:39 -0500669 svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100670
Paolo Bonzinif74f9412020-04-23 13:22:27 -0400671 svm->nested.nested_run_pending = 1;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100672
Paolo Bonzini63129752021-03-02 14:40:39 -0500673 if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
Vitaly Kuznetsov59cd9bc2020-07-10 16:11:52 +0200674 goto out_exit_err;
Vitaly Kuznetsovebdb3db2020-07-10 16:11:51 +0200675
Vitaly Kuznetsov59cd9bc2020-07-10 16:11:52 +0200676 if (nested_svm_vmrun_msrpm(svm))
677 goto out;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100678
Vitaly Kuznetsov59cd9bc2020-07-10 16:11:52 +0200679out_exit_err:
680 svm->nested.nested_run_pending = 0;
681
682 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
683 svm->vmcb->control.exit_code_hi = 0;
684 svm->vmcb->control.exit_info_1 = 0;
685 svm->vmcb->control.exit_info_2 = 0;
686
687 nested_svm_vmexit(svm);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100688
Paolo Bonzini69c9dfa2020-05-13 12:57:26 -0400689out:
Paolo Bonzini63129752021-03-02 14:40:39 -0500690 kvm_vcpu_unmap(vcpu, &map, true);
Paolo Bonzini69c9dfa2020-05-13 12:57:26 -0400691
Joerg Roedel883b0a92020-03-24 10:41:52 +0100692 return ret;
693}
694
695void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
696{
697 to_vmcb->save.fs = from_vmcb->save.fs;
698 to_vmcb->save.gs = from_vmcb->save.gs;
699 to_vmcb->save.tr = from_vmcb->save.tr;
700 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
701 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
702 to_vmcb->save.star = from_vmcb->save.star;
703 to_vmcb->save.lstar = from_vmcb->save.lstar;
704 to_vmcb->save.cstar = from_vmcb->save.cstar;
705 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
706 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
707 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
708 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
709}
710
711int nested_svm_vmexit(struct vcpu_svm *svm)
712{
Paolo Bonzini63129752021-03-02 14:40:39 -0500713 struct kvm_vcpu *vcpu = &svm->vcpu;
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300714 struct vmcb *vmcb12;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100715 struct vmcb *vmcb = svm->vmcb;
716 struct kvm_host_map map;
Paolo Bonzini63129752021-03-02 14:40:39 -0500717 int rc;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100718
Sean Christophersoncb6a32c2021-03-02 09:45:14 -0800719 /* Triple faults in L2 should never escape. */
720 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
721
Paolo Bonzini63129752021-03-02 14:40:39 -0500722 rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100723 if (rc) {
724 if (rc == -EINVAL)
Paolo Bonzini63129752021-03-02 14:40:39 -0500725 kvm_inject_gp(vcpu, 0);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100726 return 1;
727 }
728
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300729 vmcb12 = map.hva;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100730
731 /* Exit Guest-Mode */
Paolo Bonzini63129752021-03-02 14:40:39 -0500732 leave_guest_mode(vcpu);
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300733 svm->nested.vmcb12_gpa = 0;
Paolo Bonzini2d8a42b2020-05-22 03:50:14 -0400734 WARN_ON_ONCE(svm->nested.nested_run_pending);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100735
Paolo Bonzini63129752021-03-02 14:40:39 -0500736 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Maxim Levitskyf2c7ef32021-01-07 11:38:51 +0200737
Paolo Bonzini38c0b192020-04-23 13:13:09 -0400738 /* in case we halted in L2 */
739 svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
740
Joerg Roedel883b0a92020-03-24 10:41:52 +0100741 /* Give the current vmcb to the guest */
Joerg Roedel883b0a92020-03-24 10:41:52 +0100742
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300743 vmcb12->save.es = vmcb->save.es;
744 vmcb12->save.cs = vmcb->save.cs;
745 vmcb12->save.ss = vmcb->save.ss;
746 vmcb12->save.ds = vmcb->save.ds;
747 vmcb12->save.gdtr = vmcb->save.gdtr;
748 vmcb12->save.idtr = vmcb->save.idtr;
749 vmcb12->save.efer = svm->vcpu.arch.efer;
Paolo Bonzini63129752021-03-02 14:40:39 -0500750 vmcb12->save.cr0 = kvm_read_cr0(vcpu);
751 vmcb12->save.cr3 = kvm_read_cr3(vcpu);
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300752 vmcb12->save.cr2 = vmcb->save.cr2;
753 vmcb12->save.cr4 = svm->vcpu.arch.cr4;
Paolo Bonzini63129752021-03-02 14:40:39 -0500754 vmcb12->save.rflags = kvm_get_rflags(vcpu);
755 vmcb12->save.rip = kvm_rip_read(vcpu);
756 vmcb12->save.rsp = kvm_rsp_read(vcpu);
757 vmcb12->save.rax = kvm_rax_read(vcpu);
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300758 vmcb12->save.dr7 = vmcb->save.dr7;
759 vmcb12->save.dr6 = svm->vcpu.arch.dr6;
760 vmcb12->save.cpl = vmcb->save.cpl;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100761
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300762 vmcb12->control.int_state = vmcb->control.int_state;
763 vmcb12->control.exit_code = vmcb->control.exit_code;
764 vmcb12->control.exit_code_hi = vmcb->control.exit_code_hi;
765 vmcb12->control.exit_info_1 = vmcb->control.exit_info_1;
766 vmcb12->control.exit_info_2 = vmcb->control.exit_info_2;
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400767
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300768 if (vmcb12->control.exit_code != SVM_EXIT_ERR)
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -0500769 nested_save_pending_event_to_vmcb12(svm, vmcb12);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100770
771 if (svm->nrips_enabled)
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300772 vmcb12->control.next_rip = vmcb->control.next_rip;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100773
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300774 vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
775 vmcb12->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
776 vmcb12->control.event_inj = svm->nested.ctl.event_inj;
777 vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100778
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300779 vmcb12->control.pause_filter_count =
Joerg Roedel883b0a92020-03-24 10:41:52 +0100780 svm->vmcb->control.pause_filter_count;
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300781 vmcb12->control.pause_filter_thresh =
Joerg Roedel883b0a92020-03-24 10:41:52 +0100782 svm->vmcb->control.pause_filter_thresh;
783
Babu Mogerd00b99c2021-02-17 10:56:04 -0500784 nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
785
Cathy Avery4995a362021-01-13 07:07:52 -0500786 svm_switch_vmcb(svm, &svm->vmcb01);
787
788 /*
789 * On vmexit the GIF is set to false and
790 * no event can be injected in L1.
791 */
Maxim Levitsky98837642020-08-27 19:27:18 +0300792 svm_set_gif(svm, false);
Cathy Avery4995a362021-01-13 07:07:52 -0500793 svm->vmcb->control.exit_int_info = 0;
Maxim Levitsky98837642020-08-27 19:27:18 +0300794
Paolo Bonzini7ca62d12020-11-16 06:38:19 -0500795 svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
796 if (svm->vmcb->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
797 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
798 vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
799 }
Paolo Bonzini18fc6c52020-05-18 11:07:08 -0400800
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400801 svm->nested.ctl.nested_cr3 = 0;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100802
Cathy Avery4995a362021-01-13 07:07:52 -0500803 /*
804 * Restore processor state that had been saved in vmcb01
805 */
Paolo Bonzini63129752021-03-02 14:40:39 -0500806 kvm_set_rflags(vcpu, svm->vmcb->save.rflags);
807 svm_set_efer(vcpu, svm->vmcb->save.efer);
808 svm_set_cr0(vcpu, svm->vmcb->save.cr0 | X86_CR0_PE);
809 svm_set_cr4(vcpu, svm->vmcb->save.cr4);
810 kvm_rax_write(vcpu, svm->vmcb->save.rax);
811 kvm_rsp_write(vcpu, svm->vmcb->save.rsp);
812 kvm_rip_write(vcpu, svm->vmcb->save.rip);
Cathy Avery4995a362021-01-13 07:07:52 -0500813
814 svm->vcpu.arch.dr7 = DR7_FIXED_1;
815 kvm_update_dr7(&svm->vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100816
Maxim Levitsky0dd16b52020-08-27 20:11:39 +0300817 trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
818 vmcb12->control.exit_info_1,
819 vmcb12->control.exit_info_2,
820 vmcb12->control.exit_int_info,
821 vmcb12->control.exit_int_info_err,
Paolo Bonzini36e2e982020-05-22 06:04:57 -0400822 KVM_ISA_SVM);
823
Paolo Bonzini63129752021-03-02 14:40:39 -0500824 kvm_vcpu_unmap(vcpu, &map, true);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100825
Sean Christophersond2e56012021-06-09 16:42:26 -0700826 nested_svm_transition_tlb_flush(vcpu);
827
Paolo Bonzini63129752021-03-02 14:40:39 -0500828 nested_svm_uninit_mmu_context(vcpu);
Vitaly Kuznetsovbf7dea42020-07-10 16:11:54 +0200829
Maxim Levitskyb222b0b2021-06-07 12:01:59 +0300830 rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
Vitaly Kuznetsovd82aaef2020-07-10 16:11:56 +0200831 if (rc)
832 return 1;
Vitaly Kuznetsovbf7dea42020-07-10 16:11:54 +0200833
Joerg Roedel883b0a92020-03-24 10:41:52 +0100834 /*
835 * Drop what we picked up for L2 via svm_complete_interrupts() so it
836 * doesn't end up in L1.
837 */
838 svm->vcpu.arch.nmi_injected = false;
Paolo Bonzini63129752021-03-02 14:40:39 -0500839 kvm_clear_exception_queue(vcpu);
840 kvm_clear_interrupt_queue(vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100841
Krish Sadhukhan9a7de6e2021-03-23 13:50:03 -0400842 /*
843 * If we are here following the completion of a VMRUN that
844 * is being single-stepped, queue the pending #DB intercept
845 * right now so that it an be accounted for before we execute
846 * L1's next instruction.
847 */
848 if (unlikely(svm->vmcb->save.rflags & X86_EFLAGS_TF))
849 kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
850
Joerg Roedel883b0a92020-03-24 10:41:52 +0100851 return 0;
852}
853
Sean Christophersoncb6a32c2021-03-02 09:45:14 -0800854static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
855{
Sean Christopherson3a87c7e2021-03-02 09:45:15 -0800856 nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
Sean Christophersoncb6a32c2021-03-02 09:45:14 -0800857}
858
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300859int svm_allocate_nested(struct vcpu_svm *svm)
860{
Cathy Avery4995a362021-01-13 07:07:52 -0500861 struct page *vmcb02_page;
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300862
863 if (svm->nested.initialized)
864 return 0;
865
Cathy Avery4995a362021-01-13 07:07:52 -0500866 vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
867 if (!vmcb02_page)
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300868 return -ENOMEM;
Cathy Avery4995a362021-01-13 07:07:52 -0500869 svm->nested.vmcb02.ptr = page_address(vmcb02_page);
870 svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300871
872 svm->nested.msrpm = svm_vcpu_alloc_msrpm();
873 if (!svm->nested.msrpm)
Cathy Avery4995a362021-01-13 07:07:52 -0500874 goto err_free_vmcb02;
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300875 svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
876
877 svm->nested.initialized = true;
878 return 0;
879
Cathy Avery4995a362021-01-13 07:07:52 -0500880err_free_vmcb02:
881 __free_page(vmcb02_page);
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300882 return -ENOMEM;
883}
884
885void svm_free_nested(struct vcpu_svm *svm)
886{
887 if (!svm->nested.initialized)
888 return;
889
890 svm_vcpu_free_msrpm(svm->nested.msrpm);
891 svm->nested.msrpm = NULL;
892
Cathy Avery4995a362021-01-13 07:07:52 -0500893 __free_page(virt_to_page(svm->nested.vmcb02.ptr));
894 svm->nested.vmcb02.ptr = NULL;
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300895
Maxim Levitskyc74ad082021-05-03 15:54:43 +0300896 /*
897 * When last_vmcb12_gpa matches the current vmcb12 gpa,
898 * some vmcb12 fields are not loaded if they are marked clean
899 * in the vmcb12, since in this case they are up to date already.
900 *
901 * When the vmcb02 is freed, this optimization becomes invalid.
902 */
903 svm->nested.last_vmcb12_gpa = INVALID_GPA;
904
Maxim Levitsky2fcf4872020-10-01 14:29:54 +0300905 svm->nested.initialized = false;
906}
907
Paolo Bonzinic513f482020-05-18 13:08:37 -0400908/*
909 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
910 */
911void svm_leave_nested(struct vcpu_svm *svm)
912{
Paolo Bonzini63129752021-03-02 14:40:39 -0500913 struct kvm_vcpu *vcpu = &svm->vcpu;
914
915 if (is_guest_mode(vcpu)) {
Paolo Bonzinic513f482020-05-18 13:08:37 -0400916 svm->nested.nested_run_pending = 0;
Maxim Levitskyc74ad082021-05-03 15:54:43 +0300917 svm->nested.vmcb12_gpa = INVALID_GPA;
918
Paolo Bonzini63129752021-03-02 14:40:39 -0500919 leave_guest_mode(vcpu);
Cathy Avery4995a362021-01-13 07:07:52 -0500920
Maxim Levitskydeee59b2021-05-03 15:54:42 +0300921 svm_switch_vmcb(svm, &svm->vmcb01);
Cathy Avery4995a362021-01-13 07:07:52 -0500922
Paolo Bonzini63129752021-03-02 14:40:39 -0500923 nested_svm_uninit_mmu_context(vcpu);
Maxim Levitsky56fe28d2021-01-07 11:38:54 +0200924 vmcb_mark_all_dirty(svm->vmcb);
Paolo Bonzinic513f482020-05-18 13:08:37 -0400925 }
Paolo Bonzinia7d5c7c2020-09-22 07:43:14 -0400926
Paolo Bonzini63129752021-03-02 14:40:39 -0500927 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Paolo Bonzinic513f482020-05-18 13:08:37 -0400928}
929
Joerg Roedel883b0a92020-03-24 10:41:52 +0100930static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
931{
932 u32 offset, msr, value;
933 int write, mask;
934
Babu Mogerc62e2e92020-09-11 14:28:28 -0500935 if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100936 return NESTED_EXIT_HOST;
937
938 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
939 offset = svm_msrpm_offset(msr);
940 write = svm->vmcb->control.exit_info_1 & 1;
941 mask = 1 << ((2 * (msr & 0xf)) + write);
942
943 if (offset == MSR_INVALID)
944 return NESTED_EXIT_DONE;
945
946 /* Offset is in 32 bit units but need in 8 bit units */
947 offset *= 4;
948
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400949 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100950 return NESTED_EXIT_DONE;
951
952 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
953}
954
Joerg Roedel883b0a92020-03-24 10:41:52 +0100955static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
956{
957 unsigned port, size, iopm_len;
958 u16 val, mask;
959 u8 start_bit;
960 u64 gpa;
961
Babu Mogerc62e2e92020-09-11 14:28:28 -0500962 if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100963 return NESTED_EXIT_HOST;
964
965 port = svm->vmcb->control.exit_info_1 >> 16;
966 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
967 SVM_IOIO_SIZE_SHIFT;
Paolo Bonzinie670bf62020-05-13 13:16:12 -0400968 gpa = svm->nested.ctl.iopm_base_pa + (port / 8);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100969 start_bit = port % 8;
970 iopm_len = (start_bit + size > 8) ? 2 : 1;
971 mask = (0xf >> (4 - size)) << start_bit;
972 val = 0;
973
974 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
975 return NESTED_EXIT_DONE;
976
977 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
978}
979
980static int nested_svm_intercept(struct vcpu_svm *svm)
981{
982 u32 exit_code = svm->vmcb->control.exit_code;
983 int vmexit = NESTED_EXIT_HOST;
984
985 switch (exit_code) {
986 case SVM_EXIT_MSR:
987 vmexit = nested_svm_exit_handled_msr(svm);
988 break;
989 case SVM_EXIT_IOIO:
990 vmexit = nested_svm_intercept_ioio(svm);
991 break;
992 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
Babu Moger03bfeeb2020-09-11 14:28:05 -0500993 if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100994 vmexit = NESTED_EXIT_DONE;
995 break;
996 }
997 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
Babu Moger30abaa882020-09-11 14:28:12 -0500998 if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
Joerg Roedel883b0a92020-03-24 10:41:52 +0100999 vmexit = NESTED_EXIT_DONE;
1000 break;
1001 }
1002 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
Paolo Bonzini7c866632020-05-16 08:42:28 -04001003 /*
1004 * Host-intercepted exceptions have been checked already in
1005 * nested_svm_exit_special. There is nothing to do here,
1006 * the vmexit is injected by svm_check_nested_events.
1007 */
1008 vmexit = NESTED_EXIT_DONE;
Joerg Roedel883b0a92020-03-24 10:41:52 +01001009 break;
1010 }
1011 case SVM_EXIT_ERR: {
1012 vmexit = NESTED_EXIT_DONE;
1013 break;
1014 }
1015 default: {
Babu Mogerc62e2e92020-09-11 14:28:28 -05001016 if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
Joerg Roedel883b0a92020-03-24 10:41:52 +01001017 vmexit = NESTED_EXIT_DONE;
1018 }
1019 }
1020
1021 return vmexit;
1022}
1023
1024int nested_svm_exit_handled(struct vcpu_svm *svm)
1025{
1026 int vmexit;
1027
1028 vmexit = nested_svm_intercept(svm);
1029
1030 if (vmexit == NESTED_EXIT_DONE)
1031 nested_svm_vmexit(svm);
1032
1033 return vmexit;
1034}
1035
Paolo Bonzini63129752021-03-02 14:40:39 -05001036int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
Joerg Roedel883b0a92020-03-24 10:41:52 +01001037{
Paolo Bonzini63129752021-03-02 14:40:39 -05001038 if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
1039 kvm_queue_exception(vcpu, UD_VECTOR);
Joerg Roedel883b0a92020-03-24 10:41:52 +01001040 return 1;
1041 }
1042
Paolo Bonzini63129752021-03-02 14:40:39 -05001043 if (to_svm(vcpu)->vmcb->save.cpl) {
1044 kvm_inject_gp(vcpu, 0);
Joerg Roedel883b0a92020-03-24 10:41:52 +01001045 return 1;
1046 }
1047
1048 return 0;
1049}
1050
Paolo Bonzini7c866632020-05-16 08:42:28 -04001051static bool nested_exit_on_exception(struct vcpu_svm *svm)
Joerg Roedel883b0a92020-03-24 10:41:52 +01001052{
Paolo Bonzini7c866632020-05-16 08:42:28 -04001053 unsigned int nr = svm->vcpu.arch.exception.nr;
Joerg Roedel883b0a92020-03-24 10:41:52 +01001054
Babu Moger9780d512020-09-11 14:28:20 -05001055 return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr));
Paolo Bonzini7c866632020-05-16 08:42:28 -04001056}
Joerg Roedel883b0a92020-03-24 10:41:52 +01001057
Paolo Bonzini7c866632020-05-16 08:42:28 -04001058static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
1059{
1060 unsigned int nr = svm->vcpu.arch.exception.nr;
Joerg Roedel883b0a92020-03-24 10:41:52 +01001061
1062 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1063 svm->vmcb->control.exit_code_hi = 0;
Paolo Bonzini7c866632020-05-16 08:42:28 -04001064
1065 if (svm->vcpu.arch.exception.has_error_code)
1066 svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
Joerg Roedel883b0a92020-03-24 10:41:52 +01001067
1068 /*
1069 * EXITINFO2 is undefined for all exception intercepts other
1070 * than #PF.
1071 */
Paolo Bonzini7c866632020-05-16 08:42:28 -04001072 if (nr == PF_VECTOR) {
1073 if (svm->vcpu.arch.exception.nested_apf)
1074 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
1075 else if (svm->vcpu.arch.exception.has_payload)
1076 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
1077 else
1078 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1079 } else if (nr == DB_VECTOR) {
1080 /* See inject_pending_event. */
1081 kvm_deliver_exception_payload(&svm->vcpu);
1082 if (svm->vcpu.arch.dr7 & DR7_GD) {
1083 svm->vcpu.arch.dr7 &= ~DR7_GD;
1084 kvm_update_dr7(&svm->vcpu);
1085 }
1086 } else
1087 WARN_ON(svm->vcpu.arch.exception.has_payload);
Joerg Roedel883b0a92020-03-24 10:41:52 +01001088
Paolo Bonzini7c866632020-05-16 08:42:28 -04001089 nested_svm_vmexit(svm);
Joerg Roedel883b0a92020-03-24 10:41:52 +01001090}
1091
Paolo Bonzini5b6724082020-05-16 08:50:35 -04001092static inline bool nested_exit_on_init(struct vcpu_svm *svm)
1093{
Babu Mogerc62e2e92020-09-11 14:28:28 -05001094 return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
Paolo Bonzini5b6724082020-05-16 08:50:35 -04001095}
1096
Paolo Bonzini33b22172020-04-17 10:24:18 -04001097static int svm_check_nested_events(struct kvm_vcpu *vcpu)
Joerg Roedel883b0a92020-03-24 10:41:52 +01001098{
1099 struct vcpu_svm *svm = to_svm(vcpu);
1100 bool block_nested_events =
Paolo Bonzinibd279622020-05-16 08:46:00 -04001101 kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
Paolo Bonzini5b6724082020-05-16 08:50:35 -04001102 struct kvm_lapic *apic = vcpu->arch.apic;
1103
1104 if (lapic_in_kernel(vcpu) &&
1105 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
1106 if (block_nested_events)
1107 return -EBUSY;
1108 if (!nested_exit_on_init(svm))
1109 return 0;
Sean Christopherson3a87c7e2021-03-02 09:45:15 -08001110 nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
Paolo Bonzini5b6724082020-05-16 08:50:35 -04001111 return 0;
1112 }
Joerg Roedel883b0a92020-03-24 10:41:52 +01001113
Paolo Bonzini7c866632020-05-16 08:42:28 -04001114 if (vcpu->arch.exception.pending) {
Maxim Levitsky4020da32021-04-01 17:38:14 +03001115 /*
1116 * Only a pending nested run can block a pending exception.
1117 * Otherwise an injected NMI/interrupt should either be
1118 * lost or delivered to the nested hypervisor in the EXITINTINFO
1119 * vmcb field, while delivering the pending exception.
1120 */
1121 if (svm->nested.nested_run_pending)
Paolo Bonzini7c866632020-05-16 08:42:28 -04001122 return -EBUSY;
1123 if (!nested_exit_on_exception(svm))
1124 return 0;
1125 nested_svm_inject_exception_vmexit(svm);
1126 return 0;
1127 }
1128
Paolo Bonzini221e7612020-04-23 08:13:10 -04001129 if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
Paolo Bonzini55714cd2020-04-23 08:17:28 -04001130 if (block_nested_events)
1131 return -EBUSY;
Paolo Bonzini221e7612020-04-23 08:13:10 -04001132 if (!nested_exit_on_smi(svm))
1133 return 0;
Sean Christopherson3a87c7e2021-03-02 09:45:15 -08001134 nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
Paolo Bonzini55714cd2020-04-23 08:17:28 -04001135 return 0;
1136 }
1137
Paolo Bonzini221e7612020-04-23 08:13:10 -04001138 if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
Cathy Avery9c3d3702020-04-14 16:11:06 -04001139 if (block_nested_events)
1140 return -EBUSY;
Paolo Bonzini221e7612020-04-23 08:13:10 -04001141 if (!nested_exit_on_nmi(svm))
1142 return 0;
Sean Christopherson3a87c7e2021-03-02 09:45:15 -08001143 nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
Cathy Avery9c3d3702020-04-14 16:11:06 -04001144 return 0;
1145 }
1146
Paolo Bonzini221e7612020-04-23 08:13:10 -04001147 if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
Joerg Roedel883b0a92020-03-24 10:41:52 +01001148 if (block_nested_events)
1149 return -EBUSY;
Paolo Bonzini221e7612020-04-23 08:13:10 -04001150 if (!nested_exit_on_intr(svm))
1151 return 0;
Sean Christopherson3a87c7e2021-03-02 09:45:15 -08001152 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1153 nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
Joerg Roedel883b0a92020-03-24 10:41:52 +01001154 return 0;
1155 }
1156
1157 return 0;
1158}
1159
1160int nested_svm_exit_special(struct vcpu_svm *svm)
1161{
1162 u32 exit_code = svm->vmcb->control.exit_code;
1163
1164 switch (exit_code) {
1165 case SVM_EXIT_INTR:
1166 case SVM_EXIT_NMI:
Joerg Roedel883b0a92020-03-24 10:41:52 +01001167 case SVM_EXIT_NPF:
Paolo Bonzini7c866632020-05-16 08:42:28 -04001168 return NESTED_EXIT_HOST;
1169 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1170 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1171
Cathy Avery4995a362021-01-13 07:07:52 -05001172 if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
1173 excp_bits)
Paolo Bonzini7c866632020-05-16 08:42:28 -04001174 return NESTED_EXIT_HOST;
1175 else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
Vitaly Kuznetsov68fd66f2020-05-25 16:41:17 +02001176 svm->vcpu.arch.apf.host_apf_flags)
Paolo Bonzini7c866632020-05-16 08:42:28 -04001177 /* Trap async PF even if not shadowing */
Joerg Roedel883b0a92020-03-24 10:41:52 +01001178 return NESTED_EXIT_HOST;
1179 break;
Paolo Bonzini7c866632020-05-16 08:42:28 -04001180 }
Joerg Roedel883b0a92020-03-24 10:41:52 +01001181 default:
1182 break;
1183 }
1184
1185 return NESTED_EXIT_CONTINUE;
1186}
Paolo Bonzini33b22172020-04-17 10:24:18 -04001187
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001188static int svm_get_nested_state(struct kvm_vcpu *vcpu,
1189 struct kvm_nested_state __user *user_kvm_nested_state,
1190 u32 user_data_size)
1191{
1192 struct vcpu_svm *svm;
1193 struct kvm_nested_state kvm_state = {
1194 .flags = 0,
1195 .format = KVM_STATE_NESTED_FORMAT_SVM,
1196 .size = sizeof(kvm_state),
1197 };
1198 struct vmcb __user *user_vmcb = (struct vmcb __user *)
1199 &user_kvm_nested_state->data.svm[0];
1200
1201 if (!vcpu)
1202 return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
1203
1204 svm = to_svm(vcpu);
1205
1206 if (user_data_size < kvm_state.size)
1207 goto out;
1208
1209 /* First fill in the header and copy it out. */
1210 if (is_guest_mode(vcpu)) {
Maxim Levitsky0dd16b52020-08-27 20:11:39 +03001211 kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001212 kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
1213 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
1214
1215 if (svm->nested.nested_run_pending)
1216 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
1217 }
1218
1219 if (gif_set(svm))
1220 kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
1221
1222 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
1223 return -EFAULT;
1224
1225 if (!is_guest_mode(vcpu))
1226 goto out;
1227
1228 /*
1229 * Copy over the full size of the VMCB rather than just the size
1230 * of the structs.
1231 */
1232 if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
1233 return -EFAULT;
1234 if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
1235 sizeof(user_vmcb->control)))
1236 return -EFAULT;
Cathy Avery4995a362021-01-13 07:07:52 -05001237 if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001238 sizeof(user_vmcb->save)))
1239 return -EFAULT;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001240out:
1241 return kvm_state.size;
1242}
1243
1244static int svm_set_nested_state(struct kvm_vcpu *vcpu,
1245 struct kvm_nested_state __user *user_kvm_nested_state,
1246 struct kvm_nested_state *kvm_state)
1247{
1248 struct vcpu_svm *svm = to_svm(vcpu);
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001249 struct vmcb __user *user_vmcb = (struct vmcb __user *)
1250 &user_kvm_nested_state->data.svm[0];
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001251 struct vmcb_control_area *ctl;
1252 struct vmcb_save_area *save;
Sean Christophersondbc47392021-06-22 10:56:59 -07001253 unsigned long cr0;
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001254 int ret;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001255
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001256 BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
1257 KVM_STATE_NESTED_SVM_VMCB_SIZE);
1258
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001259 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
1260 return -EINVAL;
1261
1262 if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
1263 KVM_STATE_NESTED_RUN_PENDING |
1264 KVM_STATE_NESTED_GIF_SET))
1265 return -EINVAL;
1266
1267 /*
1268 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1269 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1270 */
1271 if (!(vcpu->arch.efer & EFER_SVME)) {
1272 /* GIF=1 and no guest mode are required if SVME=0. */
1273 if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
1274 return -EINVAL;
1275 }
1276
1277 /* SMM temporarily disables SVM, so we cannot be in guest mode. */
1278 if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
1279 return -EINVAL;
1280
1281 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
1282 svm_leave_nested(svm);
Vitaly Kuznetsovd5cd6f32020-09-14 15:37:25 +02001283 svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1284 return 0;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001285 }
1286
1287 if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
1288 return -EINVAL;
1289 if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
1290 return -EINVAL;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001291
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001292 ret = -ENOMEM;
Sean Christophersoneba04b22021-03-30 19:30:25 -07001293 ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT);
1294 save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001295 if (!ctl || !save)
1296 goto out_free;
1297
1298 ret = -EFAULT;
1299 if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
1300 goto out_free;
1301 if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
1302 goto out_free;
1303
1304 ret = -EINVAL;
Krish Sadhukhanee695f22021-04-12 17:56:08 -04001305 if (!nested_vmcb_check_controls(vcpu, ctl))
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001306 goto out_free;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001307
1308 /*
1309 * Processor state contains L2 state. Check that it is
Paolo Bonzinicb9b6a12021-03-31 07:35:52 -04001310 * valid for guest mode (see nested_vmcb_check_save).
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001311 */
1312 cr0 = kvm_read_cr0(vcpu);
1313 if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001314 goto out_free;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001315
1316 /*
1317 * Validate host state saved from before VMRUN (see
1318 * nested_svm_check_permissions).
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001319 */
Krish Sadhukhan6906e062020-10-06 19:06:52 +00001320 if (!(save->cr0 & X86_CR0_PG) ||
1321 !(save->cr0 & X86_CR0_PE) ||
1322 (save->rflags & X86_EFLAGS_VM) ||
Paolo Bonzini63129752021-03-02 14:40:39 -05001323 !nested_vmcb_valid_sregs(vcpu, save))
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001324 goto out_free;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001325
1326 /*
Maxim Levitskyb222b0b2021-06-07 12:01:59 +03001327 * While the nested guest CR3 is already checked and set by
1328 * KVM_SET_SREGS, it was set when nested state was yet loaded,
1329 * thus MMU might not be initialized correctly.
1330 * Set it again to fix this.
1331 */
1332
1333 ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
1334 nested_npt_enabled(svm), false);
1335 if (WARN_ON_ONCE(ret))
1336 goto out_free;
1337
1338
1339 /*
Cathy Avery4995a362021-01-13 07:07:52 -05001340 * All checks done, we can enter guest mode. Userspace provides
1341 * vmcb12.control, which will be combined with L1 and stored into
1342 * vmcb02, and the L1 save state which we store in vmcb01.
1343 * L2 registers if needed are moved from the current VMCB to VMCB02.
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001344 */
Maxim Levitsky81f76ad2021-01-07 11:38:52 +02001345
Maxim Levitsky9d290e12021-05-03 15:54:44 +03001346 if (is_guest_mode(vcpu))
1347 svm_leave_nested(svm);
1348 else
1349 svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
1350
Maxim Levitsky063ab162021-05-04 17:39:35 +03001351 svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1352
Maxim Levitsky81f76ad2021-01-07 11:38:52 +02001353 svm->nested.nested_run_pending =
1354 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
1355
Maxim Levitsky0dd16b52020-08-27 20:11:39 +03001356 svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
Paolo Bonzinic08f3902020-11-17 02:51:35 -05001357
1358 svm->vmcb01.ptr->save.es = save->es;
1359 svm->vmcb01.ptr->save.cs = save->cs;
1360 svm->vmcb01.ptr->save.ss = save->ss;
1361 svm->vmcb01.ptr->save.ds = save->ds;
1362 svm->vmcb01.ptr->save.gdtr = save->gdtr;
1363 svm->vmcb01.ptr->save.idtr = save->idtr;
1364 svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
1365 svm->vmcb01.ptr->save.efer = save->efer;
1366 svm->vmcb01.ptr->save.cr0 = save->cr0;
1367 svm->vmcb01.ptr->save.cr3 = save->cr3;
1368 svm->vmcb01.ptr->save.cr4 = save->cr4;
1369 svm->vmcb01.ptr->save.rax = save->rax;
1370 svm->vmcb01.ptr->save.rsp = save->rsp;
1371 svm->vmcb01.ptr->save.rip = save->rip;
1372 svm->vmcb01.ptr->save.cpl = 0;
1373
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -05001374 nested_load_control_from_vmcb12(svm, ctl);
Cathy Avery4995a362021-01-13 07:07:52 -05001375
1376 svm_switch_vmcb(svm, &svm->nested.vmcb02);
1377
Paolo Bonzini9e8f0fb2020-11-17 05:15:41 -05001378 nested_vmcb02_prepare_control(svm);
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001379
Paolo Bonzinia7d5c7c2020-09-22 07:43:14 -04001380 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Joerg Roedel6ccbd292020-09-07 15:15:02 +02001381 ret = 0;
1382out_free:
1383 kfree(save);
1384 kfree(ctl);
1385
1386 return ret;
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001387}
1388
Maxim Levitsky232f75d2021-04-01 17:18:10 +03001389static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
1390{
1391 struct vcpu_svm *svm = to_svm(vcpu);
1392
1393 if (WARN_ON(!is_guest_mode(vcpu)))
1394 return true;
1395
Maxim Levitsky158a48e2021-06-07 12:02:03 +03001396 if (!vcpu->arch.pdptrs_from_userspace &&
1397 !nested_npt_enabled(svm) && is_pae_paging(vcpu))
Maxim Levitskyb222b0b2021-06-07 12:01:59 +03001398 /*
1399 * Reload the guest's PDPTRs since after a migration
1400 * the guest CR3 might be restored prior to setting the nested
1401 * state which can lead to a load of wrong PDPTRs.
1402 */
1403 if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
1404 return false;
Maxim Levitsky232f75d2021-04-01 17:18:10 +03001405
1406 if (!nested_svm_vmrun_msrpm(svm)) {
1407 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1408 vcpu->run->internal.suberror =
1409 KVM_INTERNAL_ERROR_EMULATION;
1410 vcpu->run->internal.ndata = 0;
1411 return false;
1412 }
1413
1414 return true;
1415}
1416
Paolo Bonzini33b22172020-04-17 10:24:18 -04001417struct kvm_x86_nested_ops svm_nested_ops = {
1418 .check_events = svm_check_nested_events,
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08001419 .triple_fault = nested_svm_triple_fault,
Paolo Bonzinia7d5c7c2020-09-22 07:43:14 -04001420 .get_nested_state_pages = svm_get_nested_state_pages,
Paolo Bonzinicc440cd2020-05-13 13:36:32 -04001421 .get_state = svm_get_nested_state,
1422 .set_state = svm_set_nested_state,
Paolo Bonzini33b22172020-04-17 10:24:18 -04001423};