blob: 2a0f1b0431455c69e45a5b5888c13edd7fbfd827 [file] [log] [blame]
Joerg Roedel883b0a92020-03-24 10:41:52 +01001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM support
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9 *
10 * Authors:
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
13 */
14
15#define pr_fmt(fmt) "SVM: " fmt
16
17#include <linux/kvm_types.h>
18#include <linux/kvm_host.h>
19#include <linux/kernel.h>
20
21#include <asm/msr-index.h>
Paolo Bonzini5679b802020-05-04 11:28:25 -040022#include <asm/debugreg.h>
Joerg Roedel883b0a92020-03-24 10:41:52 +010023
24#include "kvm_emulate.h"
25#include "trace.h"
26#include "mmu.h"
27#include "x86.h"
28#include "svm.h"
29
30static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
31 struct x86_exception *fault)
32{
33 struct vcpu_svm *svm = to_svm(vcpu);
34
35 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
36 /*
37 * TODO: track the cause of the nested page fault, and
38 * correctly fill in the high bits of exit_info_1.
39 */
40 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
41 svm->vmcb->control.exit_code_hi = 0;
42 svm->vmcb->control.exit_info_1 = (1ULL << 32);
43 svm->vmcb->control.exit_info_2 = fault->address;
44 }
45
46 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
47 svm->vmcb->control.exit_info_1 |= fault->error_code;
48
49 /*
50 * The present bit is always zero for page structure faults on real
51 * hardware.
52 */
53 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
54 svm->vmcb->control.exit_info_1 &= ~1;
55
56 nested_svm_vmexit(svm);
57}
58
59static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
60{
61 struct vcpu_svm *svm = to_svm(vcpu);
62 u64 cr3 = svm->nested.nested_cr3;
63 u64 pdpte;
64 int ret;
65
66 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
67 offset_in_page(cr3) + index * 8, 8);
68 if (ret)
69 return 0;
70 return pdpte;
71}
72
73static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
74{
75 struct vcpu_svm *svm = to_svm(vcpu);
76
77 return svm->nested.nested_cr3;
78}
79
80static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
81{
82 WARN_ON(mmu_is_nested(vcpu));
83
84 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
85 kvm_init_shadow_mmu(vcpu);
86 vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
87 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
88 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
89 vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
90 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
91 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
92}
93
94static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
95{
96 vcpu->arch.mmu = &vcpu->arch.root_mmu;
97 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
98}
99
100void recalc_intercepts(struct vcpu_svm *svm)
101{
102 struct vmcb_control_area *c, *h;
103 struct nested_state *g;
104
105 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
106
107 if (!is_guest_mode(&svm->vcpu))
108 return;
109
110 c = &svm->vmcb->control;
111 h = &svm->nested.hsave->control;
112 g = &svm->nested;
113
114 c->intercept_cr = h->intercept_cr;
115 c->intercept_dr = h->intercept_dr;
116 c->intercept_exceptions = h->intercept_exceptions;
117 c->intercept = h->intercept;
118
119 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
120 /* We only want the cr8 intercept bits of L1 */
121 c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
122 c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
123
124 /*
125 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
126 * affect any interrupt we may want to inject; therefore,
127 * interrupt window vmexits are irrelevant to L0.
128 */
129 c->intercept &= ~(1ULL << INTERCEPT_VINTR);
130 }
131
132 /* We don't want to see VMMCALLs from a nested guest */
133 c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
134
135 c->intercept_cr |= g->intercept_cr;
136 c->intercept_dr |= g->intercept_dr;
137 c->intercept_exceptions |= g->intercept_exceptions;
138 c->intercept |= g->intercept;
139}
140
141static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
142{
143 struct vmcb_control_area *dst = &dst_vmcb->control;
144 struct vmcb_control_area *from = &from_vmcb->control;
145
146 dst->intercept_cr = from->intercept_cr;
147 dst->intercept_dr = from->intercept_dr;
148 dst->intercept_exceptions = from->intercept_exceptions;
149 dst->intercept = from->intercept;
150 dst->iopm_base_pa = from->iopm_base_pa;
151 dst->msrpm_base_pa = from->msrpm_base_pa;
152 dst->tsc_offset = from->tsc_offset;
153 dst->asid = from->asid;
154 dst->tlb_ctl = from->tlb_ctl;
155 dst->int_ctl = from->int_ctl;
156 dst->int_vector = from->int_vector;
157 dst->int_state = from->int_state;
158 dst->exit_code = from->exit_code;
159 dst->exit_code_hi = from->exit_code_hi;
160 dst->exit_info_1 = from->exit_info_1;
161 dst->exit_info_2 = from->exit_info_2;
162 dst->exit_int_info = from->exit_int_info;
163 dst->exit_int_info_err = from->exit_int_info_err;
164 dst->nested_ctl = from->nested_ctl;
165 dst->event_inj = from->event_inj;
166 dst->event_inj_err = from->event_inj_err;
167 dst->nested_cr3 = from->nested_cr3;
168 dst->virt_ext = from->virt_ext;
169 dst->pause_filter_count = from->pause_filter_count;
170 dst->pause_filter_thresh = from->pause_filter_thresh;
171}
172
173static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
174{
175 /*
176 * This function merges the msr permission bitmaps of kvm and the
177 * nested vmcb. It is optimized in that it only merges the parts where
178 * the kvm msr permission bitmap may contain zero bits
179 */
180 int i;
181
182 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
183 return true;
184
185 for (i = 0; i < MSRPM_OFFSETS; i++) {
186 u32 value, p;
187 u64 offset;
188
189 if (msrpm_offsets[i] == 0xffffffff)
190 break;
191
192 p = msrpm_offsets[i];
193 offset = svm->nested.vmcb_msrpm + (p * 4);
194
195 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
196 return false;
197
198 svm->nested.msrpm[p] = svm->msrpm[p] | value;
199 }
200
201 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
202
203 return true;
204}
205
206static bool nested_vmcb_checks(struct vmcb *vmcb)
207{
208 if ((vmcb->save.efer & EFER_SVME) == 0)
209 return false;
210
Krish Sadhukhan4f233372020-04-09 16:50:33 -0400211 if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
212 (vmcb->save.cr0 & X86_CR0_NW))
213 return false;
214
Joerg Roedel883b0a92020-03-24 10:41:52 +0100215 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
216 return false;
217
218 if (vmcb->control.asid == 0)
219 return false;
220
221 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
222 !npt_enabled)
223 return false;
224
225 return true;
226}
227
228void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
229 struct vmcb *nested_vmcb, struct kvm_host_map *map)
230{
231 bool evaluate_pending_interrupts =
232 is_intercept(svm, INTERCEPT_VINTR) ||
233 is_intercept(svm, INTERCEPT_IRET);
234
235 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
236 svm->vcpu.arch.hflags |= HF_HIF_MASK;
237 else
238 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
239
240 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
241 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
242 nested_svm_init_mmu_context(&svm->vcpu);
243 }
244
245 /* Load the nested guest state */
246 svm->vmcb->save.es = nested_vmcb->save.es;
247 svm->vmcb->save.cs = nested_vmcb->save.cs;
248 svm->vmcb->save.ss = nested_vmcb->save.ss;
249 svm->vmcb->save.ds = nested_vmcb->save.ds;
250 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
251 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
252 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
253 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
254 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
255 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
256 if (npt_enabled) {
257 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
258 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
259 } else
260 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
261
262 /* Guest paging mode is active - reset mmu */
263 kvm_mmu_reset_context(&svm->vcpu);
264
265 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
266 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
267 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
268 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
269
270 /* In case we don't even reach vcpu_run, the fields are not updated */
271 svm->vmcb->save.rax = nested_vmcb->save.rax;
272 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
273 svm->vmcb->save.rip = nested_vmcb->save.rip;
274 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
Paolo Bonzini5679b802020-05-04 11:28:25 -0400275 svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100276 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
277
278 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
279 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
280
281 /* cache intercepts */
282 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
283 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
284 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
285 svm->nested.intercept = nested_vmcb->control.intercept;
286
Sean Christophersonf55ac302020-03-20 14:28:12 -0700287 svm_flush_tlb(&svm->vcpu);
Joerg Roedel883b0a92020-03-24 10:41:52 +0100288 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
289 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
290 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
291 else
292 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
293
294 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
295 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
296
297 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
298 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
299 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
300 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
301 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
302
303 svm->vmcb->control.pause_filter_count =
304 nested_vmcb->control.pause_filter_count;
305 svm->vmcb->control.pause_filter_thresh =
306 nested_vmcb->control.pause_filter_thresh;
307
308 kvm_vcpu_unmap(&svm->vcpu, map, true);
309
310 /* Enter Guest-Mode */
311 enter_guest_mode(&svm->vcpu);
312
313 /*
314 * Merge guest and host intercepts - must be called with vcpu in
315 * guest-mode to take affect here
316 */
317 recalc_intercepts(svm);
318
319 svm->nested.vmcb = vmcb_gpa;
320
321 /*
322 * If L1 had a pending IRQ/NMI before executing VMRUN,
323 * which wasn't delivered because it was disallowed (e.g.
324 * interrupts disabled), L0 needs to evaluate if this pending
325 * event should cause an exit from L2 to L1 or be delivered
326 * directly to L2.
327 *
328 * Usually this would be handled by the processor noticing an
329 * IRQ/NMI window request. However, VMRUN can unblock interrupts
330 * by implicitly setting GIF, so force L0 to perform pending event
331 * evaluation by requesting a KVM_REQ_EVENT.
332 */
333 enable_gif(svm);
334 if (unlikely(evaluate_pending_interrupts))
335 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
336
337 mark_all_dirty(svm->vmcb);
338}
339
340int nested_svm_vmrun(struct vcpu_svm *svm)
341{
342 int ret;
343 struct vmcb *nested_vmcb;
344 struct vmcb *hsave = svm->nested.hsave;
345 struct vmcb *vmcb = svm->vmcb;
346 struct kvm_host_map map;
347 u64 vmcb_gpa;
348
Paolo Bonzini7c67f5462020-04-23 10:52:48 -0400349 if (is_smm(&svm->vcpu)) {
350 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
351 return 1;
352 }
Joerg Roedel883b0a92020-03-24 10:41:52 +0100353
Paolo Bonzini7c67f5462020-04-23 10:52:48 -0400354 vmcb_gpa = svm->vmcb->save.rax;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100355 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
356 if (ret == -EINVAL) {
357 kvm_inject_gp(&svm->vcpu, 0);
358 return 1;
359 } else if (ret) {
360 return kvm_skip_emulated_instruction(&svm->vcpu);
361 }
362
363 ret = kvm_skip_emulated_instruction(&svm->vcpu);
364
365 nested_vmcb = map.hva;
366
367 if (!nested_vmcb_checks(nested_vmcb)) {
368 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
369 nested_vmcb->control.exit_code_hi = 0;
370 nested_vmcb->control.exit_info_1 = 0;
371 nested_vmcb->control.exit_info_2 = 0;
372
373 kvm_vcpu_unmap(&svm->vcpu, &map, true);
374
375 return ret;
376 }
377
378 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
379 nested_vmcb->save.rip,
380 nested_vmcb->control.int_ctl,
381 nested_vmcb->control.event_inj,
382 nested_vmcb->control.nested_ctl);
383
384 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
385 nested_vmcb->control.intercept_cr >> 16,
386 nested_vmcb->control.intercept_exceptions,
387 nested_vmcb->control.intercept);
388
389 /* Clear internal status */
390 kvm_clear_exception_queue(&svm->vcpu);
391 kvm_clear_interrupt_queue(&svm->vcpu);
392
393 /*
394 * Save the old vmcb, so we don't need to pick what we save, but can
395 * restore everything when a VMEXIT occurs
396 */
397 hsave->save.es = vmcb->save.es;
398 hsave->save.cs = vmcb->save.cs;
399 hsave->save.ss = vmcb->save.ss;
400 hsave->save.ds = vmcb->save.ds;
401 hsave->save.gdtr = vmcb->save.gdtr;
402 hsave->save.idtr = vmcb->save.idtr;
403 hsave->save.efer = svm->vcpu.arch.efer;
404 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
405 hsave->save.cr4 = svm->vcpu.arch.cr4;
406 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
407 hsave->save.rip = kvm_rip_read(&svm->vcpu);
408 hsave->save.rsp = vmcb->save.rsp;
409 hsave->save.rax = vmcb->save.rax;
410 if (npt_enabled)
411 hsave->save.cr3 = vmcb->save.cr3;
412 else
413 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
414
415 copy_vmcb_control_area(hsave, vmcb);
416
Paolo Bonzinif74f9412020-04-23 13:22:27 -0400417 svm->nested.nested_run_pending = 1;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100418 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
419
420 if (!nested_svm_vmrun_msrpm(svm)) {
421 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
422 svm->vmcb->control.exit_code_hi = 0;
423 svm->vmcb->control.exit_info_1 = 0;
424 svm->vmcb->control.exit_info_2 = 0;
425
426 nested_svm_vmexit(svm);
427 }
428
429 return ret;
430}
431
432void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
433{
434 to_vmcb->save.fs = from_vmcb->save.fs;
435 to_vmcb->save.gs = from_vmcb->save.gs;
436 to_vmcb->save.tr = from_vmcb->save.tr;
437 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
438 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
439 to_vmcb->save.star = from_vmcb->save.star;
440 to_vmcb->save.lstar = from_vmcb->save.lstar;
441 to_vmcb->save.cstar = from_vmcb->save.cstar;
442 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
443 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
444 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
445 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
446}
447
448int nested_svm_vmexit(struct vcpu_svm *svm)
449{
450 int rc;
451 struct vmcb *nested_vmcb;
452 struct vmcb *hsave = svm->nested.hsave;
453 struct vmcb *vmcb = svm->vmcb;
454 struct kvm_host_map map;
455
456 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
457 vmcb->control.exit_info_1,
458 vmcb->control.exit_info_2,
459 vmcb->control.exit_int_info,
460 vmcb->control.exit_int_info_err,
461 KVM_ISA_SVM);
462
463 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
464 if (rc) {
465 if (rc == -EINVAL)
466 kvm_inject_gp(&svm->vcpu, 0);
467 return 1;
468 }
469
470 nested_vmcb = map.hva;
471
472 /* Exit Guest-Mode */
473 leave_guest_mode(&svm->vcpu);
474 svm->nested.vmcb = 0;
475
Paolo Bonzini38c0b192020-04-23 13:13:09 -0400476 /* in case we halted in L2 */
477 svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
478
Joerg Roedel883b0a92020-03-24 10:41:52 +0100479 /* Give the current vmcb to the guest */
480 disable_gif(svm);
481
482 nested_vmcb->save.es = vmcb->save.es;
483 nested_vmcb->save.cs = vmcb->save.cs;
484 nested_vmcb->save.ss = vmcb->save.ss;
485 nested_vmcb->save.ds = vmcb->save.ds;
486 nested_vmcb->save.gdtr = vmcb->save.gdtr;
487 nested_vmcb->save.idtr = vmcb->save.idtr;
488 nested_vmcb->save.efer = svm->vcpu.arch.efer;
489 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
490 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
491 nested_vmcb->save.cr2 = vmcb->save.cr2;
492 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
493 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
494 nested_vmcb->save.rip = vmcb->save.rip;
495 nested_vmcb->save.rsp = vmcb->save.rsp;
496 nested_vmcb->save.rax = vmcb->save.rax;
497 nested_vmcb->save.dr7 = vmcb->save.dr7;
Paolo Bonzini5679b802020-05-04 11:28:25 -0400498 nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100499 nested_vmcb->save.cpl = vmcb->save.cpl;
500
501 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
502 nested_vmcb->control.int_vector = vmcb->control.int_vector;
503 nested_vmcb->control.int_state = vmcb->control.int_state;
504 nested_vmcb->control.exit_code = vmcb->control.exit_code;
505 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
506 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
507 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
508 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
509 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
510
511 if (svm->nrips_enabled)
512 nested_vmcb->control.next_rip = vmcb->control.next_rip;
513
514 /*
515 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
516 * to make sure that we do not lose injected events. So check event_inj
517 * here and copy it to exit_int_info if it is valid.
518 * Exit_int_info and event_inj can't be both valid because the case
519 * below only happens on a VMRUN instruction intercept which has
520 * no valid exit_int_info set.
521 */
522 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
523 struct vmcb_control_area *nc = &nested_vmcb->control;
524
525 nc->exit_int_info = vmcb->control.event_inj;
526 nc->exit_int_info_err = vmcb->control.event_inj_err;
527 }
528
529 nested_vmcb->control.tlb_ctl = 0;
530 nested_vmcb->control.event_inj = 0;
531 nested_vmcb->control.event_inj_err = 0;
532
533 nested_vmcb->control.pause_filter_count =
534 svm->vmcb->control.pause_filter_count;
535 nested_vmcb->control.pause_filter_thresh =
536 svm->vmcb->control.pause_filter_thresh;
537
538 /* We always set V_INTR_MASKING and remember the old value in hflags */
539 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
540 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
541
542 /* Restore the original control entries */
543 copy_vmcb_control_area(vmcb, hsave);
544
545 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
546 kvm_clear_exception_queue(&svm->vcpu);
547 kvm_clear_interrupt_queue(&svm->vcpu);
548
549 svm->nested.nested_cr3 = 0;
550
551 /* Restore selected save entries */
552 svm->vmcb->save.es = hsave->save.es;
553 svm->vmcb->save.cs = hsave->save.cs;
554 svm->vmcb->save.ss = hsave->save.ss;
555 svm->vmcb->save.ds = hsave->save.ds;
556 svm->vmcb->save.gdtr = hsave->save.gdtr;
557 svm->vmcb->save.idtr = hsave->save.idtr;
558 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
559 svm_set_efer(&svm->vcpu, hsave->save.efer);
560 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
561 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
562 if (npt_enabled) {
563 svm->vmcb->save.cr3 = hsave->save.cr3;
564 svm->vcpu.arch.cr3 = hsave->save.cr3;
565 } else {
566 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
567 }
568 kvm_rax_write(&svm->vcpu, hsave->save.rax);
569 kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
570 kvm_rip_write(&svm->vcpu, hsave->save.rip);
571 svm->vmcb->save.dr7 = 0;
572 svm->vmcb->save.cpl = 0;
573 svm->vmcb->control.exit_int_info = 0;
574
575 mark_all_dirty(svm->vmcb);
576
577 kvm_vcpu_unmap(&svm->vcpu, &map, true);
578
579 nested_svm_uninit_mmu_context(&svm->vcpu);
580 kvm_mmu_reset_context(&svm->vcpu);
581 kvm_mmu_load(&svm->vcpu);
582
583 /*
584 * Drop what we picked up for L2 via svm_complete_interrupts() so it
585 * doesn't end up in L1.
586 */
587 svm->vcpu.arch.nmi_injected = false;
588 kvm_clear_exception_queue(&svm->vcpu);
589 kvm_clear_interrupt_queue(&svm->vcpu);
590
591 return 0;
592}
593
594static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
595{
596 u32 offset, msr, value;
597 int write, mask;
598
599 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
600 return NESTED_EXIT_HOST;
601
602 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
603 offset = svm_msrpm_offset(msr);
604 write = svm->vmcb->control.exit_info_1 & 1;
605 mask = 1 << ((2 * (msr & 0xf)) + write);
606
607 if (offset == MSR_INVALID)
608 return NESTED_EXIT_DONE;
609
610 /* Offset is in 32 bit units but need in 8 bit units */
611 offset *= 4;
612
613 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
614 return NESTED_EXIT_DONE;
615
616 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
617}
618
619/* DB exceptions for our internal use must not cause vmexit */
620static int nested_svm_intercept_db(struct vcpu_svm *svm)
621{
Paolo Bonzini5679b802020-05-04 11:28:25 -0400622 unsigned long dr6 = svm->vmcb->save.dr6;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100623
Paolo Bonzini2c19dba2020-05-07 07:20:27 -0400624 /* Always catch it and pass it to userspace if debugging. */
625 if (svm->vcpu.guest_debug &
626 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
627 return NESTED_EXIT_HOST;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100628
629 /* if we're not singlestepping, it's not ours */
630 if (!svm->nmi_singlestep)
Paolo Bonzini5679b802020-05-04 11:28:25 -0400631 goto reflected_db;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100632
633 /* if it's not a singlestep exception, it's not ours */
Joerg Roedel883b0a92020-03-24 10:41:52 +0100634 if (!(dr6 & DR6_BS))
Paolo Bonzini5679b802020-05-04 11:28:25 -0400635 goto reflected_db;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100636
637 /* if the guest is singlestepping, it should get the vmexit */
638 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
639 disable_nmi_singlestep(svm);
Paolo Bonzini5679b802020-05-04 11:28:25 -0400640 goto reflected_db;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100641 }
642
643 /* it's ours, the nested hypervisor must not see this one */
644 return NESTED_EXIT_HOST;
Paolo Bonzini5679b802020-05-04 11:28:25 -0400645
646reflected_db:
647 /*
Paolo Bonzinid67668e2020-05-06 06:40:04 -0400648 * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
649 * it will be moved into the nested VMCB by nested_svm_vmexit. Once
650 * exceptions will be moved to svm_check_nested_events, all this stuff
651 * will just go away and we could just return NESTED_EXIT_HOST
652 * unconditionally. db_interception will queue the exception, which
653 * will be processed by svm_check_nested_events if a nested vmexit is
654 * required, and we will just use kvm_deliver_exception_payload to copy
655 * the payload to DR6 before vmexit.
Paolo Bonzini5679b802020-05-04 11:28:25 -0400656 */
Paolo Bonzinid67668e2020-05-06 06:40:04 -0400657 WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
658 svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
659 svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
Paolo Bonzini5679b802020-05-04 11:28:25 -0400660 return NESTED_EXIT_DONE;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100661}
662
663static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
664{
665 unsigned port, size, iopm_len;
666 u16 val, mask;
667 u8 start_bit;
668 u64 gpa;
669
670 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
671 return NESTED_EXIT_HOST;
672
673 port = svm->vmcb->control.exit_info_1 >> 16;
674 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
675 SVM_IOIO_SIZE_SHIFT;
676 gpa = svm->nested.vmcb_iopm + (port / 8);
677 start_bit = port % 8;
678 iopm_len = (start_bit + size > 8) ? 2 : 1;
679 mask = (0xf >> (4 - size)) << start_bit;
680 val = 0;
681
682 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
683 return NESTED_EXIT_DONE;
684
685 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
686}
687
688static int nested_svm_intercept(struct vcpu_svm *svm)
689{
690 u32 exit_code = svm->vmcb->control.exit_code;
691 int vmexit = NESTED_EXIT_HOST;
692
693 switch (exit_code) {
694 case SVM_EXIT_MSR:
695 vmexit = nested_svm_exit_handled_msr(svm);
696 break;
697 case SVM_EXIT_IOIO:
698 vmexit = nested_svm_intercept_ioio(svm);
699 break;
700 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
701 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
702 if (svm->nested.intercept_cr & bit)
703 vmexit = NESTED_EXIT_DONE;
704 break;
705 }
706 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
707 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
708 if (svm->nested.intercept_dr & bit)
709 vmexit = NESTED_EXIT_DONE;
710 break;
711 }
712 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
713 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
714 if (svm->nested.intercept_exceptions & excp_bits) {
715 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
716 vmexit = nested_svm_intercept_db(svm);
Paolo Bonzini2c19dba2020-05-07 07:20:27 -0400717 else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
718 svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
719 vmexit = NESTED_EXIT_HOST;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100720 else
721 vmexit = NESTED_EXIT_DONE;
722 }
723 /* async page fault always cause vmexit */
724 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
725 svm->vcpu.arch.exception.nested_apf != 0)
726 vmexit = NESTED_EXIT_DONE;
727 break;
728 }
729 case SVM_EXIT_ERR: {
730 vmexit = NESTED_EXIT_DONE;
731 break;
732 }
733 default: {
734 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
735 if (svm->nested.intercept & exit_bits)
736 vmexit = NESTED_EXIT_DONE;
737 }
738 }
739
740 return vmexit;
741}
742
743int nested_svm_exit_handled(struct vcpu_svm *svm)
744{
745 int vmexit;
746
747 vmexit = nested_svm_intercept(svm);
748
749 if (vmexit == NESTED_EXIT_DONE)
750 nested_svm_vmexit(svm);
751
752 return vmexit;
753}
754
755int nested_svm_check_permissions(struct vcpu_svm *svm)
756{
757 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
758 !is_paging(&svm->vcpu)) {
759 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
760 return 1;
761 }
762
763 if (svm->vmcb->save.cpl) {
764 kvm_inject_gp(&svm->vcpu, 0);
765 return 1;
766 }
767
768 return 0;
769}
770
771int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
772 bool has_error_code, u32 error_code)
773{
774 int vmexit;
775
776 if (!is_guest_mode(&svm->vcpu))
777 return 0;
778
779 vmexit = nested_svm_intercept(svm);
780 if (vmexit != NESTED_EXIT_DONE)
781 return 0;
782
783 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
784 svm->vmcb->control.exit_code_hi = 0;
785 svm->vmcb->control.exit_info_1 = error_code;
786
787 /*
788 * EXITINFO2 is undefined for all exception intercepts other
789 * than #PF.
790 */
791 if (svm->vcpu.arch.exception.nested_apf)
792 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
793 else if (svm->vcpu.arch.exception.has_payload)
794 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
795 else
796 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
797
798 svm->nested.exit_required = true;
799 return vmexit;
800}
801
802static void nested_svm_intr(struct vcpu_svm *svm)
803{
804 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
805 svm->vmcb->control.exit_info_1 = 0;
806 svm->vmcb->control.exit_info_2 = 0;
807
808 /* nested_svm_vmexit this gets called afterwards from handle_exit */
809 svm->nested.exit_required = true;
810 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
811}
812
813static bool nested_exit_on_intr(struct vcpu_svm *svm)
814{
815 return (svm->nested.intercept & 1ULL);
816}
817
Paolo Bonzini33b22172020-04-17 10:24:18 -0400818static int svm_check_nested_events(struct kvm_vcpu *vcpu)
Joerg Roedel883b0a92020-03-24 10:41:52 +0100819{
820 struct vcpu_svm *svm = to_svm(vcpu);
821 bool block_nested_events =
Paolo Bonzinif74f9412020-04-23 13:22:27 -0400822 kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required ||
823 svm->nested.nested_run_pending;
Joerg Roedel883b0a92020-03-24 10:41:52 +0100824
825 if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
826 if (block_nested_events)
827 return -EBUSY;
828 nested_svm_intr(svm);
829 return 0;
830 }
831
832 return 0;
833}
834
835int nested_svm_exit_special(struct vcpu_svm *svm)
836{
837 u32 exit_code = svm->vmcb->control.exit_code;
838
839 switch (exit_code) {
840 case SVM_EXIT_INTR:
841 case SVM_EXIT_NMI:
842 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
843 return NESTED_EXIT_HOST;
844 case SVM_EXIT_NPF:
845 /* For now we are always handling NPFs when using them */
846 if (npt_enabled)
847 return NESTED_EXIT_HOST;
848 break;
849 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
850 /* When we're shadowing, trap PFs, but not async PF */
851 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
852 return NESTED_EXIT_HOST;
853 break;
854 default:
855 break;
856 }
857
858 return NESTED_EXIT_CONTINUE;
859}
Paolo Bonzini33b22172020-04-17 10:24:18 -0400860
861struct kvm_x86_nested_ops svm_nested_ops = {
862 .check_events = svm_check_nested_events,
863};