blob: f235f77cbc037a1f1dc819ab5685dab5d34622a2 [file] [log] [blame]
Sean Christopherson55d23752018-12-03 13:53:18 -08001// SPDX-License-Identifier: GPL-2.0
2
Julien Thierry00089c02020-09-04 16:30:25 +01003#include <linux/objtool.h>
Sean Christopherson55d23752018-12-03 13:53:18 -08004#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
Oliver Uptonbfc6ad62019-11-13 16:17:16 -080013#include "pmu.h"
Sean Christopherson72add912021-04-12 16:21:42 +120014#include "sgx.h"
Sean Christopherson55d23752018-12-03 13:53:18 -080015#include "trace.h"
Uros Bizjak150f17b2020-12-30 16:26:57 -080016#include "vmx.h"
Sean Christopherson55d23752018-12-03 13:53:18 -080017#include "x86.h"
18
19static bool __read_mostly enable_shadow_vmcs = 1;
20module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
21
22static bool __read_mostly nested_early_check = 0;
23module_param(nested_early_check, bool, S_IRUGO);
24
Sean Christopherson648fc8a2021-02-03 16:01:16 -080025#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
Sean Christopherson5497b952019-07-11 08:58:29 -070026
Sean Christopherson55d23752018-12-03 13:53:18 -080027/*
28 * Hyper-V requires all of these, so mark them as supported even though
29 * they are just treated the same as all-context.
30 */
31#define VMX_VPID_EXTENT_SUPPORTED_MASK \
32 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
33 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
34 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
35 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
36
37#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
38
39enum {
40 VMX_VMREAD_BITMAP,
41 VMX_VMWRITE_BITMAP,
42 VMX_BITMAP_NR
43};
44static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
45
46#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
47#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
48
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070049struct shadow_vmcs_field {
50 u16 encoding;
51 u16 offset;
52};
53static struct shadow_vmcs_field shadow_read_only_fields[] = {
54#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
Sean Christopherson55d23752018-12-03 13:53:18 -080055#include "vmcs_shadow_fields.h"
56};
57static int max_shadow_read_only_fields =
58 ARRAY_SIZE(shadow_read_only_fields);
59
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070060static struct shadow_vmcs_field shadow_read_write_fields[] = {
61#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
Sean Christopherson55d23752018-12-03 13:53:18 -080062#include "vmcs_shadow_fields.h"
63};
64static int max_shadow_read_write_fields =
65 ARRAY_SIZE(shadow_read_write_fields);
66
Yi Wang8997f652019-01-21 15:27:05 +080067static void init_vmcs_shadow_fields(void)
Sean Christopherson55d23752018-12-03 13:53:18 -080068{
69 int i, j;
70
71 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
72 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
73
74 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070075 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
76 u16 field = entry.encoding;
Sean Christopherson55d23752018-12-03 13:53:18 -080077
78 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
79 (i + 1 == max_shadow_read_only_fields ||
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070080 shadow_read_only_fields[i + 1].encoding != field + 1))
Sean Christopherson55d23752018-12-03 13:53:18 -080081 pr_err("Missing field from shadow_read_only_field %x\n",
82 field + 1);
83
84 clear_bit(field, vmx_vmread_bitmap);
Sean Christopherson55d23752018-12-03 13:53:18 -080085 if (field & 1)
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070086#ifdef CONFIG_X86_64
Sean Christopherson55d23752018-12-03 13:53:18 -080087 continue;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070088#else
89 entry.offset += sizeof(u32);
Sean Christopherson55d23752018-12-03 13:53:18 -080090#endif
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070091 shadow_read_only_fields[j++] = entry;
Sean Christopherson55d23752018-12-03 13:53:18 -080092 }
93 max_shadow_read_only_fields = j;
94
95 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070096 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
97 u16 field = entry.encoding;
Sean Christopherson55d23752018-12-03 13:53:18 -080098
99 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
100 (i + 1 == max_shadow_read_write_fields ||
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700101 shadow_read_write_fields[i + 1].encoding != field + 1))
Sean Christopherson55d23752018-12-03 13:53:18 -0800102 pr_err("Missing field from shadow_read_write_field %x\n",
103 field + 1);
104
Sean Christophersonb6437802019-05-07 08:36:24 -0700105 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
106 field <= GUEST_TR_AR_BYTES,
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700107 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
Sean Christophersonb6437802019-05-07 08:36:24 -0700108
Sean Christopherson55d23752018-12-03 13:53:18 -0800109 /*
110 * PML and the preemption timer can be emulated, but the
111 * processor cannot vmwrite to fields that don't exist
112 * on bare metal.
113 */
114 switch (field) {
115 case GUEST_PML_INDEX:
116 if (!cpu_has_vmx_pml())
117 continue;
118 break;
119 case VMX_PREEMPTION_TIMER_VALUE:
120 if (!cpu_has_vmx_preemption_timer())
121 continue;
122 break;
123 case GUEST_INTR_STATUS:
124 if (!cpu_has_vmx_apicv())
125 continue;
126 break;
127 default:
128 break;
129 }
130
131 clear_bit(field, vmx_vmwrite_bitmap);
132 clear_bit(field, vmx_vmread_bitmap);
Sean Christopherson55d23752018-12-03 13:53:18 -0800133 if (field & 1)
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700134#ifdef CONFIG_X86_64
Sean Christopherson55d23752018-12-03 13:53:18 -0800135 continue;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700136#else
137 entry.offset += sizeof(u32);
Sean Christopherson55d23752018-12-03 13:53:18 -0800138#endif
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700139 shadow_read_write_fields[j++] = entry;
Sean Christopherson55d23752018-12-03 13:53:18 -0800140 }
141 max_shadow_read_write_fields = j;
142}
143
144/*
145 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
146 * set the success or error code of an emulated VMX instruction (as specified
147 * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
148 * instruction.
149 */
150static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
151{
152 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
153 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
154 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
155 return kvm_skip_emulated_instruction(vcpu);
156}
157
158static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
159{
160 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
161 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
162 X86_EFLAGS_SF | X86_EFLAGS_OF))
163 | X86_EFLAGS_CF);
164 return kvm_skip_emulated_instruction(vcpu);
165}
166
167static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
168 u32 vm_instruction_error)
169{
Sean Christopherson55d23752018-12-03 13:53:18 -0800170 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
171 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
172 X86_EFLAGS_SF | X86_EFLAGS_OF))
173 | X86_EFLAGS_ZF);
174 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
175 /*
Vitaly Kuznetsovb7685cf2021-05-26 15:20:23 +0200176 * We don't need to force sync to shadow VMCS because
177 * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
178 * fields and thus must be synced.
Sean Christopherson55d23752018-12-03 13:53:18 -0800179 */
Vitaly Kuznetsovb7685cf2021-05-26 15:20:23 +0200180 if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
181 to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
182
Sean Christopherson55d23752018-12-03 13:53:18 -0800183 return kvm_skip_emulated_instruction(vcpu);
184}
185
Sean Christophersonb2656e42020-06-08 18:56:07 -0700186static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
187{
188 struct vcpu_vmx *vmx = to_vmx(vcpu);
189
190 /*
191 * failValid writes the error number to the current VMCS, which
192 * can't be done if there isn't a current VMCS.
193 */
Yu Zhang64c78502021-09-30 01:51:53 +0800194 if (vmx->nested.current_vmptr == INVALID_GPA &&
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200195 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -0700196 return nested_vmx_failInvalid(vcpu);
197
198 return nested_vmx_failValid(vcpu, vm_instruction_error);
199}
200
Sean Christopherson55d23752018-12-03 13:53:18 -0800201static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
202{
203 /* TODO: not to reset guest simply here. */
204 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
205 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
206}
207
Marc Orrf0b51052019-09-17 11:50:57 -0700208static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
209{
210 return fixed_bits_valid(control, low, high);
211}
212
213static inline u64 vmx_control_msr(u32 low, u32 high)
214{
215 return low | ((u64)high << 32);
216}
217
Sean Christopherson55d23752018-12-03 13:53:18 -0800218static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
219{
Sean Christophersonfe7f895d2019-05-07 12:17:57 -0700220 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
Yu Zhang64c78502021-09-30 01:51:53 +0800221 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
Paolo Bonzini88dddc12019-07-19 18:41:10 +0200222 vmx->nested.need_vmcs12_to_shadow_sync = false;
Sean Christopherson55d23752018-12-03 13:53:18 -0800223}
224
225static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
226{
227 struct vcpu_vmx *vmx = to_vmx(vcpu);
228
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200229 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
230 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
231 vmx->nested.hv_evmcs = NULL;
232 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800233
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200234 vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
Sean Christopherson55d23752018-12-03 13:53:18 -0800235}
236
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700237static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
238 struct loaded_vmcs *prev)
239{
240 struct vmcs_host_state *dest, *src;
241
242 if (unlikely(!vmx->guest_state_loaded))
243 return;
244
245 src = &prev->host_state;
246 dest = &vmx->loaded_vmcs->host_state;
247
Lai Jiangshana9f27052021-12-16 10:19:36 +0800248 vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
249 src->fs_base, src->gs_base);
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700250 dest->ldt_sel = src->ldt_sel;
251#ifdef CONFIG_X86_64
252 dest->ds_sel = src->ds_sel;
253 dest->es_sel = src->es_sel;
254#endif
255}
256
257static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
258{
259 struct vcpu_vmx *vmx = to_vmx(vcpu);
260 struct loaded_vmcs *prev;
261 int cpu;
262
Sean Christopherson138534a2020-09-23 11:44:52 -0700263 if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700264 return;
265
266 cpu = get_cpu();
267 prev = vmx->loaded_vmcs;
268 vmx->loaded_vmcs = vmcs;
269 vmx_vcpu_load_vmcs(vcpu, cpu, prev);
270 vmx_sync_vmcs_host_state(vmx, prev);
271 put_cpu();
272
Paolo Bonzini41e68b62021-11-26 07:00:15 -0500273 vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;
274
275 /*
276 * All lazily updated registers will be reloaded from VMCS12 on both
277 * vmentry and vmexit.
278 */
279 vcpu->arch.regs_dirty = 0;
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700280}
281
Sean Christopherson55d23752018-12-03 13:53:18 -0800282/*
283 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
284 * just stops using VMX.
285 */
286static void free_nested(struct kvm_vcpu *vcpu)
287{
288 struct vcpu_vmx *vmx = to_vmx(vcpu);
289
Sean Christophersondf82a242020-09-23 11:44:50 -0700290 if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
291 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
292
Sean Christopherson55d23752018-12-03 13:53:18 -0800293 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
294 return;
295
Paolo Bonzini729c15c2020-09-22 06:53:57 -0400296 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Jan Kiszkacf645272019-07-21 13:52:18 +0200297
Sean Christopherson55d23752018-12-03 13:53:18 -0800298 vmx->nested.vmxon = false;
299 vmx->nested.smm.vmxon = false;
Vitaly Kuznetsovfeb31622021-09-30 01:51:54 +0800300 vmx->nested.vmxon_ptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -0800301 free_vpid(vmx->nested.vpid02);
302 vmx->nested.posted_intr_nv = -1;
Yu Zhang64c78502021-09-30 01:51:53 +0800303 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -0800304 if (enable_shadow_vmcs) {
305 vmx_disable_shadow_vmcs(vmx);
306 vmcs_clear(vmx->vmcs01.shadow_vmcs);
307 free_vmcs(vmx->vmcs01.shadow_vmcs);
308 vmx->vmcs01.shadow_vmcs = NULL;
309 }
310 kfree(vmx->nested.cached_vmcs12);
Jan Kiszkac6bf2ae2019-07-21 16:01:36 +0200311 vmx->nested.cached_vmcs12 = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800312 kfree(vmx->nested.cached_shadow_vmcs12);
Jan Kiszkac6bf2ae2019-07-21 16:01:36 +0200313 vmx->nested.cached_shadow_vmcs12 = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800314 /* Unpin physical memory we referred to in the vmcs02 */
315 if (vmx->nested.apic_access_page) {
Liran Alonb11494b2019-11-21 00:31:47 +0200316 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -0800317 vmx->nested.apic_access_page = NULL;
318 }
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +0100319 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
KarimAllah Ahmed3278e042019-01-31 21:24:38 +0100320 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
321 vmx->nested.pi_desc = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800322
323 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
324
325 nested_release_evmcs(vcpu);
326
327 free_loaded_vmcs(&vmx->nested.vmcs02);
328}
329
Sean Christopherson55d23752018-12-03 13:53:18 -0800330/*
331 * Ensure that the current vmcs of the logical processor is the
332 * vmcs01 of the vcpu before calling free_nested().
333 */
334void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
335{
336 vcpu_load(vcpu);
Paolo Bonzinib4b65b52019-01-29 19:12:35 +0100337 vmx_leave_nested(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800338 vcpu_put(vcpu);
339}
340
Junaid Shahid85aa8882021-08-06 15:22:29 -0700341#define EPTP_PA_MASK GENMASK_ULL(51, 12)
342
343static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
344{
345 return VALID_PAGE(root_hpa) &&
346 ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
347}
348
349static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
350 gpa_t addr)
351{
352 uint i;
353 struct kvm_mmu_root_info *cached_root;
354
355 WARN_ON_ONCE(!mmu_is_nested(vcpu));
356
357 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
358 cached_root = &vcpu->arch.mmu->prev_roots[i];
359
360 if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
361 eptp))
362 vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
363 }
364}
365
Sean Christopherson55d23752018-12-03 13:53:18 -0800366static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
367 struct x86_exception *fault)
368{
369 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
370 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700371 u32 vm_exit_reason;
Sean Christopherson55d23752018-12-03 13:53:18 -0800372 unsigned long exit_qualification = vcpu->arch.exit_qualification;
373
374 if (vmx->nested.pml_full) {
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700375 vm_exit_reason = EXIT_REASON_PML_FULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800376 vmx->nested.pml_full = false;
377 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
Junaid Shahid85aa8882021-08-06 15:22:29 -0700378 } else {
379 if (fault->error_code & PFERR_RSVD_MASK)
380 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
381 else
382 vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
383
384 /*
385 * Although the caller (kvm_inject_emulated_page_fault) would
386 * have already synced the faulting address in the shadow EPT
387 * tables for the current EPTP12, we also need to sync it for
388 * any other cached EPTP02s based on the same EP4TA, since the
389 * TLB associates mappings to the EP4TA rather than the full EPTP.
390 */
391 nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
392 fault->address);
393 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800394
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700395 nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
Sean Christopherson55d23752018-12-03 13:53:18 -0800396 vmcs12->guest_physical_address = fault->address;
397}
398
Sean Christopherson39353ab2021-06-09 16:42:31 -0700399static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
400{
Lai Jiangshancc022ae2021-11-24 20:20:49 +0800401 struct vcpu_vmx *vmx = to_vmx(vcpu);
402 bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
403 int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);
404
405 kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
Sean Christopherson39353ab2021-06-09 16:42:31 -0700406 nested_ept_ad_enabled(vcpu),
407 nested_ept_get_eptp(vcpu));
408}
409
Sean Christopherson55d23752018-12-03 13:53:18 -0800410static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
411{
412 WARN_ON(mmu_is_nested(vcpu));
413
414 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
Sean Christopherson39353ab2021-06-09 16:42:31 -0700415 nested_ept_new_eptp(vcpu);
Sean Christophersond8dd54e2020-03-02 18:02:39 -0800416 vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
Sean Christopherson55d23752018-12-03 13:53:18 -0800417 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
418 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
419
420 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
421}
422
423static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
424{
425 vcpu->arch.mmu = &vcpu->arch.root_mmu;
426 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
427}
428
429static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
430 u16 error_code)
431{
432 bool inequality, bit;
433
434 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
435 inequality =
436 (error_code & vmcs12->page_fault_error_code_mask) !=
437 vmcs12->page_fault_error_code_match;
438 return inequality ^ bit;
439}
440
441
442/*
443 * KVM wants to inject page-faults which it got to the guest. This function
444 * checks whether in a nested guest, we need to inject them to L1 or L2.
445 */
446static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
447{
448 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
449 unsigned int nr = vcpu->arch.exception.nr;
450 bool has_payload = vcpu->arch.exception.has_payload;
451 unsigned long payload = vcpu->arch.exception.payload;
452
453 if (nr == PF_VECTOR) {
454 if (vcpu->arch.exception.nested_apf) {
455 *exit_qual = vcpu->arch.apf.nested_apf_token;
456 return 1;
457 }
458 if (nested_vmx_is_page_fault_vmexit(vmcs12,
459 vcpu->arch.exception.error_code)) {
460 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
461 return 1;
462 }
463 } else if (vmcs12->exception_bitmap & (1u << nr)) {
464 if (nr == DB_VECTOR) {
465 if (!has_payload) {
466 payload = vcpu->arch.dr6;
Chenyi Qiang9a3ecd52021-02-02 17:04:31 +0800467 payload &= ~DR6_BT;
468 payload ^= DR6_ACTIVE_LOW;
Sean Christopherson55d23752018-12-03 13:53:18 -0800469 }
470 *exit_qual = payload;
471 } else
472 *exit_qual = 0;
473 return 1;
474 }
475
476 return 0;
477}
478
479
480static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
481 struct x86_exception *fault)
482{
483 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
484
485 WARN_ON(!is_guest_mode(vcpu));
486
487 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
488 !to_vmx(vcpu)->nested.nested_run_pending) {
489 vmcs12->vm_exit_intr_error_code = fault->error_code;
490 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
491 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
492 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
493 fault->address);
494 } else {
495 kvm_inject_page_fault(vcpu, fault);
496 }
497}
498
Sean Christopherson55d23752018-12-03 13:53:18 -0800499static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
500 struct vmcs12 *vmcs12)
501{
502 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
503 return 0;
504
Sean Christopherson5497b952019-07-11 08:58:29 -0700505 if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
506 CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800507 return -EINVAL;
508
509 return 0;
510}
511
512static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
513 struct vmcs12 *vmcs12)
514{
515 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
516 return 0;
517
Sean Christopherson5497b952019-07-11 08:58:29 -0700518 if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800519 return -EINVAL;
520
521 return 0;
522}
523
524static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
525 struct vmcs12 *vmcs12)
526{
527 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
528 return 0;
529
Sean Christopherson5497b952019-07-11 08:58:29 -0700530 if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800531 return -EINVAL;
532
533 return 0;
534}
535
536/*
Sean Christophersona5e0c252021-11-09 01:30:47 +0000537 * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
538 * itself utilizing x2APIC. All MSRs were previously set to be intercepted,
539 * only the "disable intercept" case needs to be handled.
Sean Christopherson55d23752018-12-03 13:53:18 -0800540 */
Sean Christophersona5e0c252021-11-09 01:30:47 +0000541static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
542 unsigned long *msr_bitmap_l0,
543 u32 msr, int type)
Sean Christopherson55d23752018-12-03 13:53:18 -0800544{
Sean Christophersona5e0c252021-11-09 01:30:47 +0000545 if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
546 vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
Sean Christopherson55d23752018-12-03 13:53:18 -0800547
Sean Christophersona5e0c252021-11-09 01:30:47 +0000548 if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
549 vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
Sean Christopherson55d23752018-12-03 13:53:18 -0800550}
551
Miaohe Linffdbd502020-02-07 23:22:45 +0800552static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
553{
Marc Orracff7842019-04-01 23:55:59 -0700554 int msr;
555
556 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
557 unsigned word = msr / BITS_PER_LONG;
558
559 msr_bitmap[word] = ~0;
560 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
561 }
562}
563
Sean Christopherson67f4b992021-11-09 01:30:45 +0000564#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
565static inline \
566void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
567 unsigned long *msr_bitmap_l1, \
568 unsigned long *msr_bitmap_l0, u32 msr) \
569{ \
570 if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
571 vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
572 vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
573 else \
574 vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
575}
576BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
577BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
578
579static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
580 unsigned long *msr_bitmap_l1,
581 unsigned long *msr_bitmap_l0,
582 u32 msr, int types)
583{
584 if (types & MSR_TYPE_R)
585 nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
586 msr_bitmap_l0, msr);
587 if (types & MSR_TYPE_W)
588 nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
589 msr_bitmap_l0, msr);
590}
591
Sean Christopherson55d23752018-12-03 13:53:18 -0800592/*
593 * Merge L0's and L1's MSR bitmap, return false to indicate that
594 * we do not use the hardware.
595 */
596static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
597 struct vmcs12 *vmcs12)
598{
Sean Christopherson67f4b992021-11-09 01:30:45 +0000599 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800600 int msr;
Sean Christopherson55d23752018-12-03 13:53:18 -0800601 unsigned long *msr_bitmap_l1;
Sean Christopherson67f4b992021-11-09 01:30:45 +0000602 unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
Vitaly Kuznetsov502d2bf2021-11-29 10:47:04 +0100603 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
Sean Christopherson67f4b992021-11-09 01:30:45 +0000604 struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
Sean Christopherson55d23752018-12-03 13:53:18 -0800605
606 /* Nothing to do if the MSR bitmap is not in use. */
607 if (!cpu_has_vmx_msr_bitmap() ||
608 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
609 return false;
610
Vitaly Kuznetsov502d2bf2021-11-29 10:47:04 +0100611 /*
612 * MSR bitmap update can be skipped when:
613 * - MSR bitmap for L1 hasn't changed.
614 * - Nested hypervisor (L1) is attempting to launch the same L2 as
615 * before.
616 * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
617 * and tells KVM (L0) there were no changes in MSR bitmap for L2.
618 */
619 if (!vmx->nested.force_msr_bitmap_recalc && evmcs &&
620 evmcs->hv_enlightenments_control.msr_bitmap &&
621 evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
622 return true;
623
KarimAllah Ahmed31f0b6c2019-01-31 21:24:36 +0100624 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
Sean Christopherson55d23752018-12-03 13:53:18 -0800625 return false;
626
KarimAllah Ahmed31f0b6c2019-01-31 21:24:36 +0100627 msr_bitmap_l1 = (unsigned long *)map->hva;
Sean Christopherson55d23752018-12-03 13:53:18 -0800628
Marc Orracff7842019-04-01 23:55:59 -0700629 /*
630 * To keep the control flow simple, pay eight 8-byte writes (sixteen
631 * 4-byte writes on 32-bit systems) up front to enable intercepts for
Sean Christophersona5e0c252021-11-09 01:30:47 +0000632 * the x2APIC MSR range and selectively toggle those relevant to L2.
Marc Orracff7842019-04-01 23:55:59 -0700633 */
634 enable_x2apic_msr_intercepts(msr_bitmap_l0);
Sean Christopherson55d23752018-12-03 13:53:18 -0800635
Marc Orracff7842019-04-01 23:55:59 -0700636 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
637 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
638 /*
639 * L0 need not intercept reads for MSRs between 0x800
640 * and 0x8ff, it just lets the processor take the value
641 * from the virtual-APIC page; take those 256 bits
642 * directly from the L1 bitmap.
643 */
644 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
645 unsigned word = msr / BITS_PER_LONG;
646
647 msr_bitmap_l0[word] = msr_bitmap_l1[word];
648 }
649 }
650
Sean Christophersona5e0c252021-11-09 01:30:47 +0000651 nested_vmx_disable_intercept_for_x2apic_msr(
Sean Christopherson55d23752018-12-03 13:53:18 -0800652 msr_bitmap_l1, msr_bitmap_l0,
Marc Orracff7842019-04-01 23:55:59 -0700653 X2APIC_MSR(APIC_TASKPRI),
Marc Orrc73f4c92019-04-01 23:56:00 -0700654 MSR_TYPE_R | MSR_TYPE_W);
Marc Orracff7842019-04-01 23:55:59 -0700655
656 if (nested_cpu_has_vid(vmcs12)) {
Sean Christophersona5e0c252021-11-09 01:30:47 +0000657 nested_vmx_disable_intercept_for_x2apic_msr(
Marc Orracff7842019-04-01 23:55:59 -0700658 msr_bitmap_l1, msr_bitmap_l0,
659 X2APIC_MSR(APIC_EOI),
660 MSR_TYPE_W);
Sean Christophersona5e0c252021-11-09 01:30:47 +0000661 nested_vmx_disable_intercept_for_x2apic_msr(
Marc Orracff7842019-04-01 23:55:59 -0700662 msr_bitmap_l1, msr_bitmap_l0,
663 X2APIC_MSR(APIC_SELF_IPI),
664 MSR_TYPE_W);
665 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800666 }
667
Sean Christophersond69129b2019-05-08 07:32:15 -0700668 /*
Sean Christopherson67f4b992021-11-09 01:30:45 +0000669 * Always check vmcs01's bitmap to honor userspace MSR filters and any
670 * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
Sean Christophersond69129b2019-05-08 07:32:15 -0700671 */
Sean Christopherson67f4b992021-11-09 01:30:45 +0000672#ifdef CONFIG_X86_64
673 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
674 MSR_FS_BASE, MSR_TYPE_RW);
Sean Christopherson55d23752018-12-03 13:53:18 -0800675
Sean Christopherson67f4b992021-11-09 01:30:45 +0000676 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
677 MSR_GS_BASE, MSR_TYPE_RW);
Sean Christopherson55d23752018-12-03 13:53:18 -0800678
Sean Christopherson67f4b992021-11-09 01:30:45 +0000679 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
680 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
681#endif
682 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
683 MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
684
685 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
686 MSR_IA32_PRED_CMD, MSR_TYPE_W);
687
688 kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
Sean Christopherson55d23752018-12-03 13:53:18 -0800689
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +0100690 vmx->nested.force_msr_bitmap_recalc = false;
691
Sean Christopherson55d23752018-12-03 13:53:18 -0800692 return true;
693}
694
695static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
696 struct vmcs12 *vmcs12)
697{
David Woodhouse297d5972021-11-15 16:50:24 +0000698 struct vcpu_vmx *vmx = to_vmx(vcpu);
699 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
Sean Christopherson55d23752018-12-03 13:53:18 -0800700
701 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
Yu Zhang64c78502021-09-30 01:51:53 +0800702 vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -0800703 return;
704
David Woodhouse297d5972021-11-15 16:50:24 +0000705 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
706 kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
707 vmcs12->vmcs_link_pointer, VMCS12_SIZE))
KarimAllah Ahmed88925302019-01-31 21:24:41 +0100708 return;
Sean Christopherson55d23752018-12-03 13:53:18 -0800709
David Woodhouse297d5972021-11-15 16:50:24 +0000710 kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
711 VMCS12_SIZE);
Sean Christopherson55d23752018-12-03 13:53:18 -0800712}
713
714static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
715 struct vmcs12 *vmcs12)
716{
717 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Woodhouse297d5972021-11-15 16:50:24 +0000718 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
Sean Christopherson55d23752018-12-03 13:53:18 -0800719
720 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
Yu Zhang64c78502021-09-30 01:51:53 +0800721 vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -0800722 return;
723
David Woodhouse297d5972021-11-15 16:50:24 +0000724 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
725 kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
726 vmcs12->vmcs_link_pointer, VMCS12_SIZE))
727 return;
728
729 kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
730 VMCS12_SIZE);
Sean Christopherson55d23752018-12-03 13:53:18 -0800731}
732
733/*
734 * In nested virtualization, check if L1 has set
735 * VM_EXIT_ACK_INTR_ON_EXIT
736 */
737static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
738{
739 return get_vmcs12(vcpu)->vm_exit_controls &
740 VM_EXIT_ACK_INTR_ON_EXIT;
741}
742
Sean Christopherson55d23752018-12-03 13:53:18 -0800743static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
744 struct vmcs12 *vmcs12)
745{
746 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
Sean Christopherson5497b952019-07-11 08:58:29 -0700747 CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800748 return -EINVAL;
749 else
750 return 0;
751}
752
753static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
754 struct vmcs12 *vmcs12)
755{
756 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
757 !nested_cpu_has_apic_reg_virt(vmcs12) &&
758 !nested_cpu_has_vid(vmcs12) &&
759 !nested_cpu_has_posted_intr(vmcs12))
760 return 0;
761
762 /*
763 * If virtualize x2apic mode is enabled,
764 * virtualize apic access must be disabled.
765 */
Sean Christopherson5497b952019-07-11 08:58:29 -0700766 if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
767 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800768 return -EINVAL;
769
770 /*
771 * If virtual interrupt delivery is enabled,
772 * we must exit on external interrupts.
773 */
Sean Christopherson5497b952019-07-11 08:58:29 -0700774 if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800775 return -EINVAL;
776
777 /*
778 * bits 15:8 should be zero in posted_intr_nv,
779 * the descriptor address has been already checked
780 * in nested_get_vmcs12_pages.
781 *
782 * bits 5:0 of posted_intr_desc_addr should be zero.
783 */
784 if (nested_cpu_has_posted_intr(vmcs12) &&
Sean Christopherson5497b952019-07-11 08:58:29 -0700785 (CC(!nested_cpu_has_vid(vmcs12)) ||
786 CC(!nested_exit_intr_ack_set(vcpu)) ||
787 CC((vmcs12->posted_intr_nv & 0xff00)) ||
Sean Christopherson636e8b72021-02-03 16:01:10 -0800788 CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
Sean Christopherson55d23752018-12-03 13:53:18 -0800789 return -EINVAL;
790
791 /* tpr shadow is needed by all apicv features. */
Sean Christopherson5497b952019-07-11 08:58:29 -0700792 if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800793 return -EINVAL;
794
795 return 0;
796}
797
798static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500799 u32 count, u64 addr)
Sean Christopherson55d23752018-12-03 13:53:18 -0800800{
Sean Christopherson55d23752018-12-03 13:53:18 -0800801 if (count == 0)
802 return 0;
Sean Christopherson636e8b72021-02-03 16:01:10 -0800803
804 if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
805 !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800806 return -EINVAL;
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500807
Sean Christopherson55d23752018-12-03 13:53:18 -0800808 return 0;
809}
810
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500811static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
812 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -0800813{
Sean Christopherson5497b952019-07-11 08:58:29 -0700814 if (CC(nested_vmx_check_msr_switch(vcpu,
815 vmcs12->vm_exit_msr_load_count,
816 vmcs12->vm_exit_msr_load_addr)) ||
817 CC(nested_vmx_check_msr_switch(vcpu,
818 vmcs12->vm_exit_msr_store_count,
819 vmcs12->vm_exit_msr_store_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800820 return -EINVAL;
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500821
Sean Christopherson55d23752018-12-03 13:53:18 -0800822 return 0;
823}
824
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -0500825static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
826 struct vmcs12 *vmcs12)
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500827{
Sean Christopherson5497b952019-07-11 08:58:29 -0700828 if (CC(nested_vmx_check_msr_switch(vcpu,
829 vmcs12->vm_entry_msr_load_count,
830 vmcs12->vm_entry_msr_load_addr)))
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500831 return -EINVAL;
832
833 return 0;
834}
835
Sean Christopherson55d23752018-12-03 13:53:18 -0800836static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
837 struct vmcs12 *vmcs12)
838{
839 if (!nested_cpu_has_pml(vmcs12))
840 return 0;
841
Sean Christopherson5497b952019-07-11 08:58:29 -0700842 if (CC(!nested_cpu_has_ept(vmcs12)) ||
843 CC(!page_address_valid(vcpu, vmcs12->pml_address)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800844 return -EINVAL;
845
846 return 0;
847}
848
849static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
850 struct vmcs12 *vmcs12)
851{
Sean Christopherson5497b952019-07-11 08:58:29 -0700852 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
853 !nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800854 return -EINVAL;
855 return 0;
856}
857
858static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
859 struct vmcs12 *vmcs12)
860{
Sean Christopherson5497b952019-07-11 08:58:29 -0700861 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
862 !nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800863 return -EINVAL;
864 return 0;
865}
866
867static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
868 struct vmcs12 *vmcs12)
869{
870 if (!nested_cpu_has_shadow_vmcs(vmcs12))
871 return 0;
872
Sean Christopherson5497b952019-07-11 08:58:29 -0700873 if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
874 CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800875 return -EINVAL;
876
877 return 0;
878}
879
880static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
881 struct vmx_msr_entry *e)
882{
883 /* x2APIC MSR accesses are not allowed */
Sean Christopherson5497b952019-07-11 08:58:29 -0700884 if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
Sean Christopherson55d23752018-12-03 13:53:18 -0800885 return -EINVAL;
Sean Christopherson5497b952019-07-11 08:58:29 -0700886 if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
887 CC(e->index == MSR_IA32_UCODE_REV))
Sean Christopherson55d23752018-12-03 13:53:18 -0800888 return -EINVAL;
Sean Christopherson5497b952019-07-11 08:58:29 -0700889 if (CC(e->reserved != 0))
Sean Christopherson55d23752018-12-03 13:53:18 -0800890 return -EINVAL;
891 return 0;
892}
893
894static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
895 struct vmx_msr_entry *e)
896{
Sean Christopherson5497b952019-07-11 08:58:29 -0700897 if (CC(e->index == MSR_FS_BASE) ||
898 CC(e->index == MSR_GS_BASE) ||
899 CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
Sean Christopherson55d23752018-12-03 13:53:18 -0800900 nested_vmx_msr_check_common(vcpu, e))
901 return -EINVAL;
902 return 0;
903}
904
905static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
906 struct vmx_msr_entry *e)
907{
Sean Christopherson5497b952019-07-11 08:58:29 -0700908 if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
Sean Christopherson55d23752018-12-03 13:53:18 -0800909 nested_vmx_msr_check_common(vcpu, e))
910 return -EINVAL;
911 return 0;
912}
913
Marc Orrf0b51052019-09-17 11:50:57 -0700914static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
915{
916 struct vcpu_vmx *vmx = to_vmx(vcpu);
917 u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
918 vmx->nested.msrs.misc_high);
919
920 return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
921}
922
Sean Christopherson55d23752018-12-03 13:53:18 -0800923/*
924 * Load guest's/host's msr at nested entry/exit.
925 * return 0 for success, entry index for failure.
Marc Orrf0b51052019-09-17 11:50:57 -0700926 *
927 * One of the failure modes for MSR load/store is when a list exceeds the
928 * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
929 * as possible, process all valid entries before failing rather than precheck
930 * for a capacity violation.
Sean Christopherson55d23752018-12-03 13:53:18 -0800931 */
932static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
933{
934 u32 i;
935 struct vmx_msr_entry e;
Marc Orrf0b51052019-09-17 11:50:57 -0700936 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800937
Sean Christopherson55d23752018-12-03 13:53:18 -0800938 for (i = 0; i < count; i++) {
Marc Orrf0b51052019-09-17 11:50:57 -0700939 if (unlikely(i >= max_msr_list_size))
940 goto fail;
941
Sean Christopherson55d23752018-12-03 13:53:18 -0800942 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
943 &e, sizeof(e))) {
944 pr_debug_ratelimited(
945 "%s cannot read MSR entry (%u, 0x%08llx)\n",
946 __func__, i, gpa + i * sizeof(e));
947 goto fail;
948 }
949 if (nested_vmx_load_msr_check(vcpu, &e)) {
950 pr_debug_ratelimited(
951 "%s check failed (%u, 0x%x, 0x%x)\n",
952 __func__, i, e.index, e.reserved);
953 goto fail;
954 }
Sean Christophersonf20935d2019-09-05 14:22:54 -0700955 if (kvm_set_msr(vcpu, e.index, e.value)) {
Sean Christopherson55d23752018-12-03 13:53:18 -0800956 pr_debug_ratelimited(
957 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
958 __func__, i, e.index, e.value);
959 goto fail;
960 }
961 }
962 return 0;
963fail:
Sean Christopherson68cda402020-05-11 15:05:29 -0700964 /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
Sean Christopherson55d23752018-12-03 13:53:18 -0800965 return i + 1;
966}
967
Aaron Lewis662f1d12019-11-07 21:14:39 -0800968static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
969 u32 msr_index,
970 u64 *data)
971{
972 struct vcpu_vmx *vmx = to_vmx(vcpu);
973
974 /*
975 * If the L0 hypervisor stored a more accurate value for the TSC that
976 * does not include the time taken for emulation of the L2->L1
977 * VM-exit in L0, use the more accurate value.
978 */
979 if (msr_index == MSR_IA32_TSC) {
Sean Christophersona128a932020-09-23 11:03:57 -0700980 int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
981 MSR_IA32_TSC);
Aaron Lewis662f1d12019-11-07 21:14:39 -0800982
Sean Christophersona128a932020-09-23 11:03:57 -0700983 if (i >= 0) {
984 u64 val = vmx->msr_autostore.guest.val[i].value;
Aaron Lewis662f1d12019-11-07 21:14:39 -0800985
986 *data = kvm_read_l1_tsc(vcpu, val);
987 return true;
988 }
989 }
990
991 if (kvm_get_msr(vcpu, msr_index, data)) {
992 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
993 msr_index);
994 return false;
995 }
996 return true;
997}
998
Aaron Lewis365d3d52019-11-07 21:14:36 -0800999static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
1000 struct vmx_msr_entry *e)
1001{
1002 if (kvm_vcpu_read_guest(vcpu,
1003 gpa + i * sizeof(*e),
1004 e, 2 * sizeof(u32))) {
1005 pr_debug_ratelimited(
1006 "%s cannot read MSR entry (%u, 0x%08llx)\n",
1007 __func__, i, gpa + i * sizeof(*e));
1008 return false;
1009 }
1010 if (nested_vmx_store_msr_check(vcpu, e)) {
1011 pr_debug_ratelimited(
1012 "%s check failed (%u, 0x%x, 0x%x)\n",
1013 __func__, i, e->index, e->reserved);
1014 return false;
1015 }
1016 return true;
1017}
1018
Sean Christopherson55d23752018-12-03 13:53:18 -08001019static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
1020{
Sean Christophersonf20935d2019-09-05 14:22:54 -07001021 u64 data;
Sean Christopherson55d23752018-12-03 13:53:18 -08001022 u32 i;
1023 struct vmx_msr_entry e;
Marc Orrf0b51052019-09-17 11:50:57 -07001024 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08001025
1026 for (i = 0; i < count; i++) {
Marc Orrf0b51052019-09-17 11:50:57 -07001027 if (unlikely(i >= max_msr_list_size))
1028 return -EINVAL;
1029
Aaron Lewis365d3d52019-11-07 21:14:36 -08001030 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
Sean Christopherson55d23752018-12-03 13:53:18 -08001031 return -EINVAL;
Aaron Lewis365d3d52019-11-07 21:14:36 -08001032
Aaron Lewis662f1d12019-11-07 21:14:39 -08001033 if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
Sean Christopherson55d23752018-12-03 13:53:18 -08001034 return -EINVAL;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001035
Sean Christopherson55d23752018-12-03 13:53:18 -08001036 if (kvm_vcpu_write_guest(vcpu,
1037 gpa + i * sizeof(e) +
1038 offsetof(struct vmx_msr_entry, value),
Sean Christophersonf20935d2019-09-05 14:22:54 -07001039 &data, sizeof(data))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001040 pr_debug_ratelimited(
1041 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Sean Christophersonf20935d2019-09-05 14:22:54 -07001042 __func__, i, e.index, data);
Sean Christopherson55d23752018-12-03 13:53:18 -08001043 return -EINVAL;
1044 }
1045 }
1046 return 0;
1047}
1048
Aaron Lewis662f1d12019-11-07 21:14:39 -08001049static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
1050{
1051 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1052 u32 count = vmcs12->vm_exit_msr_store_count;
1053 u64 gpa = vmcs12->vm_exit_msr_store_addr;
1054 struct vmx_msr_entry e;
1055 u32 i;
1056
1057 for (i = 0; i < count; i++) {
1058 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1059 return false;
1060
1061 if (e.index == msr_index)
1062 return true;
1063 }
1064 return false;
1065}
1066
1067static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1068 u32 msr_index)
1069{
1070 struct vcpu_vmx *vmx = to_vmx(vcpu);
1071 struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
1072 bool in_vmcs12_store_list;
Sean Christophersona128a932020-09-23 11:03:57 -07001073 int msr_autostore_slot;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001074 bool in_autostore_list;
1075 int last;
1076
Sean Christophersona128a932020-09-23 11:03:57 -07001077 msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1078 in_autostore_list = msr_autostore_slot >= 0;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001079 in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1080
1081 if (in_vmcs12_store_list && !in_autostore_list) {
Sean Christophersonce833b22020-09-23 11:03:56 -07001082 if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
Aaron Lewis662f1d12019-11-07 21:14:39 -08001083 /*
1084 * Emulated VMEntry does not fail here. Instead a less
1085 * accurate value will be returned by
1086 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
1087 * instead of reading the value from the vmcs02 VMExit
1088 * MSR-store area.
1089 */
1090 pr_warn_ratelimited(
1091 "Not enough msr entries in msr_autostore. Can't add msr %x\n",
1092 msr_index);
1093 return;
1094 }
1095 last = autostore->nr++;
1096 autostore->val[last].index = msr_index;
1097 } else if (!in_vmcs12_store_list && in_autostore_list) {
1098 last = --autostore->nr;
Sean Christophersona128a932020-09-23 11:03:57 -07001099 autostore->val[msr_autostore_slot] = autostore->val[last];
Aaron Lewis662f1d12019-11-07 21:14:39 -08001100 }
1101}
1102
Sean Christopherson55d23752018-12-03 13:53:18 -08001103/*
Sean Christophersonea79a752020-02-04 07:32:59 -08001104 * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
1105 * emulating VM-Entry into a guest with EPT enabled. On failure, the expected
1106 * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
1107 * @entry_failure_code.
Sean Christopherson55d23752018-12-03 13:53:18 -08001108 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03001109static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
1110 bool nested_ept, bool reload_pdptrs,
Sean Christopherson68cda402020-05-11 15:05:29 -07001111 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08001112{
Sean Christopherson636e8b72021-02-03 16:01:10 -08001113 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
Sean Christopherson0cc69202020-05-01 21:32:26 -07001114 *entry_failure_code = ENTRY_FAIL_DEFAULT;
1115 return -EINVAL;
1116 }
Sean Christopherson55d23752018-12-03 13:53:18 -08001117
Sean Christopherson0cc69202020-05-01 21:32:26 -07001118 /*
1119 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
1120 * must not be dereferenced.
1121 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03001122 if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
Lai Jiangshan2df4a5e2021-11-24 20:20:52 +08001123 CC(!load_pdptrs(vcpu, cr3))) {
Sean Christophersonbcb72d02021-06-07 12:01:56 +03001124 *entry_failure_code = ENTRY_FAIL_PDPTE;
1125 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08001126 }
1127
Sean Christopherson50a41792021-06-09 16:42:28 -07001128 if (!nested_ept)
Sean Christophersonb5129102021-06-09 16:42:27 -07001129 kvm_mmu_new_pgd(vcpu, cr3);
Sean Christopherson07ffaf32021-06-09 16:42:21 -07001130
Sean Christopherson55d23752018-12-03 13:53:18 -08001131 vcpu->arch.cr3 = cr3;
Lai Jiangshan3883bc9d2021-11-08 20:44:02 +08001132 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
Sean Christopherson55d23752018-12-03 13:53:18 -08001133
Sean Christopherson616007c2021-06-22 10:57:34 -07001134 /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
Sean Christophersonc9060662021-06-09 16:42:33 -07001135 kvm_init_mmu(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08001136
1137 return 0;
1138}
1139
1140/*
1141 * Returns if KVM is able to config CPU to tag TLB entries
1142 * populated by L2 differently than TLB entries populated
1143 * by L1.
1144 *
Liran Alon992edea2019-11-20 14:24:52 +02001145 * If L0 uses EPT, L1 and L2 run with different EPTP because
1146 * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
1147 * are tagged with different EPTP.
Sean Christopherson55d23752018-12-03 13:53:18 -08001148 *
1149 * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
1150 * with different VPID (L1 entries are tagged with vmx->vpid
1151 * while L2 entries are tagged with vmx->nested.vpid02).
1152 */
1153static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
1154{
1155 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1156
Liran Alon992edea2019-11-20 14:24:52 +02001157 return enable_ept ||
Sean Christopherson55d23752018-12-03 13:53:18 -08001158 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1159}
1160
Sean Christopherson50b265a2020-03-20 14:28:19 -07001161static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
1162 struct vmcs12 *vmcs12,
1163 bool is_vmenter)
1164{
1165 struct vcpu_vmx *vmx = to_vmx(vcpu);
1166
1167 /*
Sean Christopherson50a41792021-06-09 16:42:28 -07001168 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
1169 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
1170 * full TLB flush from the guest's perspective. This is required even
1171 * if VPID is disabled in the host as KVM may need to synchronize the
1172 * MMU in response to the guest TLB flush.
1173 *
1174 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
1175 * EPT is a special snowflake, as guest-physical mappings aren't
1176 * flushed on VPID invalidations, including VM-Enter or VM-Exit with
1177 * VPID disabled. As a result, KVM _never_ needs to sync nEPT
1178 * entries on VM-Enter because L1 can't rely on VM-Enter to flush
1179 * those mappings.
Sean Christopherson50b265a2020-03-20 14:28:19 -07001180 */
Sean Christopherson50a41792021-06-09 16:42:28 -07001181 if (!nested_cpu_has_vpid(vmcs12)) {
1182 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001183 return;
Sean Christopherson50a41792021-06-09 16:42:28 -07001184 }
1185
1186 /* L2 should never have a VPID if VPID is disabled. */
1187 WARN_ON(!enable_vpid);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001188
1189 /*
Sean Christopherson712494d2021-11-25 01:49:44 +00001190 * VPID is enabled and in use by vmcs12. If vpid12 is changing, then
1191 * emulate a guest TLB flush as KVM does not track vpid12 history nor
1192 * is the VPID incorporated into the MMU context. I.e. KVM must assume
1193 * that the new vpid12 has never been used and thus represents a new
1194 * guest ASID that cannot have entries in the TLB.
Sean Christopherson50b265a2020-03-20 14:28:19 -07001195 */
Sean Christopherson712494d2021-11-25 01:49:44 +00001196 if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
Sean Christopherson50b265a2020-03-20 14:28:19 -07001197 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Sean Christopherson712494d2021-11-25 01:49:44 +00001198 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1199 return;
Sean Christopherson50b265a2020-03-20 14:28:19 -07001200 }
Sean Christopherson712494d2021-11-25 01:49:44 +00001201
1202 /*
1203 * If VPID is enabled, used by vmc12, and vpid12 is not changing but
1204 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
1205 * KVM was unable to allocate a VPID for L2, flush the current context
1206 * as the effective ASID is common to both L1 and L2.
1207 */
1208 if (!nested_has_guest_tlb_tag(vcpu))
1209 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001210}
1211
Sean Christopherson55d23752018-12-03 13:53:18 -08001212static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1213{
1214 superset &= mask;
1215 subset &= mask;
1216
1217 return (superset | subset) == superset;
1218}
1219
1220static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1221{
1222 const u64 feature_and_reserved =
1223 /* feature (except bit 48; see below) */
1224 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1225 /* reserved */
1226 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1227 u64 vmx_basic = vmx->nested.msrs.basic;
1228
1229 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1230 return -EINVAL;
1231
1232 /*
1233 * KVM does not emulate a version of VMX that constrains physical
1234 * addresses of VMX structures (e.g. VMCS) to 32-bits.
1235 */
1236 if (data & BIT_ULL(48))
1237 return -EINVAL;
1238
1239 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1240 vmx_basic_vmcs_revision_id(data))
1241 return -EINVAL;
1242
1243 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1244 return -EINVAL;
1245
1246 vmx->nested.msrs.basic = data;
1247 return 0;
1248}
1249
1250static int
1251vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1252{
1253 u64 supported;
1254 u32 *lowp, *highp;
1255
1256 switch (msr_index) {
1257 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1258 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1259 highp = &vmx->nested.msrs.pinbased_ctls_high;
1260 break;
1261 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1262 lowp = &vmx->nested.msrs.procbased_ctls_low;
1263 highp = &vmx->nested.msrs.procbased_ctls_high;
1264 break;
1265 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1266 lowp = &vmx->nested.msrs.exit_ctls_low;
1267 highp = &vmx->nested.msrs.exit_ctls_high;
1268 break;
1269 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1270 lowp = &vmx->nested.msrs.entry_ctls_low;
1271 highp = &vmx->nested.msrs.entry_ctls_high;
1272 break;
1273 case MSR_IA32_VMX_PROCBASED_CTLS2:
1274 lowp = &vmx->nested.msrs.secondary_ctls_low;
1275 highp = &vmx->nested.msrs.secondary_ctls_high;
1276 break;
1277 default:
1278 BUG();
1279 }
1280
1281 supported = vmx_control_msr(*lowp, *highp);
1282
1283 /* Check must-be-1 bits are still 1. */
1284 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1285 return -EINVAL;
1286
1287 /* Check must-be-0 bits are still 0. */
1288 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1289 return -EINVAL;
1290
1291 *lowp = data;
1292 *highp = data >> 32;
1293 return 0;
1294}
1295
1296static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1297{
1298 const u64 feature_and_reserved_bits =
1299 /* feature */
1300 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1301 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1302 /* reserved */
1303 GENMASK_ULL(13, 9) | BIT_ULL(31);
1304 u64 vmx_misc;
1305
1306 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1307 vmx->nested.msrs.misc_high);
1308
1309 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1310 return -EINVAL;
1311
1312 if ((vmx->nested.msrs.pinbased_ctls_high &
1313 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1314 vmx_misc_preemption_timer_rate(data) !=
1315 vmx_misc_preemption_timer_rate(vmx_misc))
1316 return -EINVAL;
1317
1318 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1319 return -EINVAL;
1320
1321 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1322 return -EINVAL;
1323
1324 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1325 return -EINVAL;
1326
1327 vmx->nested.msrs.misc_low = data;
1328 vmx->nested.msrs.misc_high = data >> 32;
1329
Sean Christopherson55d23752018-12-03 13:53:18 -08001330 return 0;
1331}
1332
1333static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1334{
1335 u64 vmx_ept_vpid_cap;
1336
1337 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1338 vmx->nested.msrs.vpid_caps);
1339
1340 /* Every bit is either reserved or a feature bit. */
1341 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1342 return -EINVAL;
1343
1344 vmx->nested.msrs.ept_caps = data;
1345 vmx->nested.msrs.vpid_caps = data >> 32;
1346 return 0;
1347}
1348
1349static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1350{
1351 u64 *msr;
1352
1353 switch (msr_index) {
1354 case MSR_IA32_VMX_CR0_FIXED0:
1355 msr = &vmx->nested.msrs.cr0_fixed0;
1356 break;
1357 case MSR_IA32_VMX_CR4_FIXED0:
1358 msr = &vmx->nested.msrs.cr4_fixed0;
1359 break;
1360 default:
1361 BUG();
1362 }
1363
1364 /*
1365 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
1366 * must be 1 in the restored value.
1367 */
1368 if (!is_bitwise_subset(data, *msr, -1ULL))
1369 return -EINVAL;
1370
1371 *msr = data;
1372 return 0;
1373}
1374
1375/*
1376 * Called when userspace is restoring VMX MSRs.
1377 *
1378 * Returns 0 on success, non-0 otherwise.
1379 */
1380int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1381{
1382 struct vcpu_vmx *vmx = to_vmx(vcpu);
1383
1384 /*
1385 * Don't allow changes to the VMX capability MSRs while the vCPU
1386 * is in VMX operation.
1387 */
1388 if (vmx->nested.vmxon)
1389 return -EBUSY;
1390
1391 switch (msr_index) {
1392 case MSR_IA32_VMX_BASIC:
1393 return vmx_restore_vmx_basic(vmx, data);
1394 case MSR_IA32_VMX_PINBASED_CTLS:
1395 case MSR_IA32_VMX_PROCBASED_CTLS:
1396 case MSR_IA32_VMX_EXIT_CTLS:
1397 case MSR_IA32_VMX_ENTRY_CTLS:
1398 /*
1399 * The "non-true" VMX capability MSRs are generated from the
1400 * "true" MSRs, so we do not support restoring them directly.
1401 *
1402 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
1403 * should restore the "true" MSRs with the must-be-1 bits
1404 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
1405 * DEFAULT SETTINGS".
1406 */
1407 return -EINVAL;
1408 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1409 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1410 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1411 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1412 case MSR_IA32_VMX_PROCBASED_CTLS2:
1413 return vmx_restore_control_msr(vmx, msr_index, data);
1414 case MSR_IA32_VMX_MISC:
1415 return vmx_restore_vmx_misc(vmx, data);
1416 case MSR_IA32_VMX_CR0_FIXED0:
1417 case MSR_IA32_VMX_CR4_FIXED0:
1418 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1419 case MSR_IA32_VMX_CR0_FIXED1:
1420 case MSR_IA32_VMX_CR4_FIXED1:
1421 /*
1422 * These MSRs are generated based on the vCPU's CPUID, so we
1423 * do not support restoring them directly.
1424 */
1425 return -EINVAL;
1426 case MSR_IA32_VMX_EPT_VPID_CAP:
1427 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1428 case MSR_IA32_VMX_VMCS_ENUM:
1429 vmx->nested.msrs.vmcs_enum = data;
1430 return 0;
Paolo Bonzinie8a70bd2019-07-02 14:40:40 +02001431 case MSR_IA32_VMX_VMFUNC:
1432 if (data & ~vmx->nested.msrs.vmfunc_controls)
1433 return -EINVAL;
1434 vmx->nested.msrs.vmfunc_controls = data;
1435 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08001436 default:
1437 /*
1438 * The rest of the VMX capability MSRs do not support restore.
1439 */
1440 return -EINVAL;
1441 }
1442}
1443
1444/* Returns 0 on success, non-0 otherwise. */
1445int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1446{
1447 switch (msr_index) {
1448 case MSR_IA32_VMX_BASIC:
1449 *pdata = msrs->basic;
1450 break;
1451 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1452 case MSR_IA32_VMX_PINBASED_CTLS:
1453 *pdata = vmx_control_msr(
1454 msrs->pinbased_ctls_low,
1455 msrs->pinbased_ctls_high);
1456 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1457 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1458 break;
1459 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1460 case MSR_IA32_VMX_PROCBASED_CTLS:
1461 *pdata = vmx_control_msr(
1462 msrs->procbased_ctls_low,
1463 msrs->procbased_ctls_high);
1464 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1465 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1466 break;
1467 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1468 case MSR_IA32_VMX_EXIT_CTLS:
1469 *pdata = vmx_control_msr(
1470 msrs->exit_ctls_low,
1471 msrs->exit_ctls_high);
1472 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1473 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1474 break;
1475 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1476 case MSR_IA32_VMX_ENTRY_CTLS:
1477 *pdata = vmx_control_msr(
1478 msrs->entry_ctls_low,
1479 msrs->entry_ctls_high);
1480 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1481 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1482 break;
1483 case MSR_IA32_VMX_MISC:
1484 *pdata = vmx_control_msr(
1485 msrs->misc_low,
1486 msrs->misc_high);
1487 break;
1488 case MSR_IA32_VMX_CR0_FIXED0:
1489 *pdata = msrs->cr0_fixed0;
1490 break;
1491 case MSR_IA32_VMX_CR0_FIXED1:
1492 *pdata = msrs->cr0_fixed1;
1493 break;
1494 case MSR_IA32_VMX_CR4_FIXED0:
1495 *pdata = msrs->cr4_fixed0;
1496 break;
1497 case MSR_IA32_VMX_CR4_FIXED1:
1498 *pdata = msrs->cr4_fixed1;
1499 break;
1500 case MSR_IA32_VMX_VMCS_ENUM:
1501 *pdata = msrs->vmcs_enum;
1502 break;
1503 case MSR_IA32_VMX_PROCBASED_CTLS2:
1504 *pdata = vmx_control_msr(
1505 msrs->secondary_ctls_low,
1506 msrs->secondary_ctls_high);
1507 break;
1508 case MSR_IA32_VMX_EPT_VPID_CAP:
1509 *pdata = msrs->ept_caps |
1510 ((u64)msrs->vpid_caps << 32);
1511 break;
1512 case MSR_IA32_VMX_VMFUNC:
1513 *pdata = msrs->vmfunc_controls;
1514 break;
1515 default:
1516 return 1;
1517 }
1518
1519 return 0;
1520}
1521
1522/*
Sean Christophersonfadcead2019-05-07 08:36:23 -07001523 * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
1524 * been modified by the L1 guest. Note, "writable" in this context means
1525 * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
1526 * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
1527 * VM-exit information fields (which are actually writable if the vCPU is
1528 * configured to support "VMWRITE to any supported field in the VMCS").
Sean Christopherson55d23752018-12-03 13:53:18 -08001529 */
1530static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1531{
Sean Christopherson55d23752018-12-03 13:53:18 -08001532 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Sean Christophersonfadcead2019-05-07 08:36:23 -07001533 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001534 struct shadow_vmcs_field field;
1535 unsigned long val;
Sean Christophersonfadcead2019-05-07 08:36:23 -07001536 int i;
Sean Christopherson55d23752018-12-03 13:53:18 -08001537
Paolo Bonzini88dddc12019-07-19 18:41:10 +02001538 if (WARN_ON(!shadow_vmcs))
1539 return;
1540
Sean Christopherson55d23752018-12-03 13:53:18 -08001541 preempt_disable();
1542
1543 vmcs_load(shadow_vmcs);
1544
Sean Christophersonfadcead2019-05-07 08:36:23 -07001545 for (i = 0; i < max_shadow_read_write_fields; i++) {
1546 field = shadow_read_write_fields[i];
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001547 val = __vmcs_readl(field.encoding);
1548 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
Sean Christopherson55d23752018-12-03 13:53:18 -08001549 }
1550
1551 vmcs_clear(shadow_vmcs);
1552 vmcs_load(vmx->loaded_vmcs->vmcs);
1553
1554 preempt_enable();
1555}
1556
1557static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1558{
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001559 const struct shadow_vmcs_field *fields[] = {
Sean Christopherson55d23752018-12-03 13:53:18 -08001560 shadow_read_write_fields,
1561 shadow_read_only_fields
1562 };
1563 const int max_fields[] = {
1564 max_shadow_read_write_fields,
1565 max_shadow_read_only_fields
1566 };
Sean Christopherson55d23752018-12-03 13:53:18 -08001567 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001568 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1569 struct shadow_vmcs_field field;
1570 unsigned long val;
1571 int i, q;
Sean Christopherson55d23752018-12-03 13:53:18 -08001572
Paolo Bonzini88dddc12019-07-19 18:41:10 +02001573 if (WARN_ON(!shadow_vmcs))
1574 return;
1575
Sean Christopherson55d23752018-12-03 13:53:18 -08001576 vmcs_load(shadow_vmcs);
1577
1578 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1579 for (i = 0; i < max_fields[q]; i++) {
1580 field = fields[q][i];
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001581 val = vmcs12_read_any(vmcs12, field.encoding,
1582 field.offset);
1583 __vmcs_writel(field.encoding, val);
Sean Christopherson55d23752018-12-03 13:53:18 -08001584 }
1585 }
1586
1587 vmcs_clear(shadow_vmcs);
1588 vmcs_load(vmx->loaded_vmcs->vmcs);
1589}
1590
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001591static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
Sean Christopherson55d23752018-12-03 13:53:18 -08001592{
1593 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1594 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1595
1596 /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
1597 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1598 vmcs12->guest_rip = evmcs->guest_rip;
1599
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001600 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001601 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1602 vmcs12->guest_rsp = evmcs->guest_rsp;
1603 vmcs12->guest_rflags = evmcs->guest_rflags;
1604 vmcs12->guest_interruptibility_info =
1605 evmcs->guest_interruptibility_info;
1606 }
1607
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001608 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001609 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1610 vmcs12->cpu_based_vm_exec_control =
1611 evmcs->cpu_based_vm_exec_control;
1612 }
1613
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001614 if (unlikely(!(hv_clean_fields &
Vitaly Kuznetsovf9bc5222019-06-13 13:35:02 +02001615 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001616 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1617 }
1618
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001619 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001620 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1621 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1622 }
1623
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001624 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001625 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1626 vmcs12->vm_entry_intr_info_field =
1627 evmcs->vm_entry_intr_info_field;
1628 vmcs12->vm_entry_exception_error_code =
1629 evmcs->vm_entry_exception_error_code;
1630 vmcs12->vm_entry_instruction_len =
1631 evmcs->vm_entry_instruction_len;
1632 }
1633
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001634 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001635 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1636 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1637 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1638 vmcs12->host_cr0 = evmcs->host_cr0;
1639 vmcs12->host_cr3 = evmcs->host_cr3;
1640 vmcs12->host_cr4 = evmcs->host_cr4;
1641 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1642 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1643 vmcs12->host_rip = evmcs->host_rip;
1644 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1645 vmcs12->host_es_selector = evmcs->host_es_selector;
1646 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1647 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1648 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1649 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1650 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1651 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1652 }
1653
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001654 if (unlikely(!(hv_clean_fields &
Vitaly Kuznetsovf9bc5222019-06-13 13:35:02 +02001655 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001656 vmcs12->pin_based_vm_exec_control =
1657 evmcs->pin_based_vm_exec_control;
1658 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1659 vmcs12->secondary_vm_exec_control =
1660 evmcs->secondary_vm_exec_control;
1661 }
1662
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001663 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001664 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1665 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1666 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1667 }
1668
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001669 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001670 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1671 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1672 }
1673
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001674 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001675 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1676 vmcs12->guest_es_base = evmcs->guest_es_base;
1677 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1678 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1679 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1680 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1681 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1682 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1683 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1684 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1685 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1686 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1687 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1688 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1689 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1690 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1691 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1692 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1693 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1694 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1695 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1696 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1697 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1698 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1699 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1700 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1701 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1702 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1703 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1704 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1705 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1706 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1707 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1708 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1709 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1710 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1711 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1712 }
1713
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001714 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001715 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1716 vmcs12->tsc_offset = evmcs->tsc_offset;
1717 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1718 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1719 }
1720
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001721 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001722 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1723 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1724 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1725 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1726 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1727 vmcs12->guest_cr0 = evmcs->guest_cr0;
1728 vmcs12->guest_cr3 = evmcs->guest_cr3;
1729 vmcs12->guest_cr4 = evmcs->guest_cr4;
1730 vmcs12->guest_dr7 = evmcs->guest_dr7;
1731 }
1732
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001733 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001734 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1735 vmcs12->host_fs_base = evmcs->host_fs_base;
1736 vmcs12->host_gs_base = evmcs->host_gs_base;
1737 vmcs12->host_tr_base = evmcs->host_tr_base;
1738 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1739 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1740 vmcs12->host_rsp = evmcs->host_rsp;
1741 }
1742
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001743 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001744 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1745 vmcs12->ept_pointer = evmcs->ept_pointer;
1746 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1747 }
1748
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001749 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001750 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1751 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1752 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1753 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1754 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1755 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1756 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1757 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1758 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1759 vmcs12->guest_pending_dbg_exceptions =
1760 evmcs->guest_pending_dbg_exceptions;
1761 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1762 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1763 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1764 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1765 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1766 }
1767
1768 /*
1769 * Not used?
1770 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
1771 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
1772 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
Sean Christopherson55d23752018-12-03 13:53:18 -08001773 * vmcs12->page_fault_error_code_mask =
1774 * evmcs->page_fault_error_code_mask;
1775 * vmcs12->page_fault_error_code_match =
1776 * evmcs->page_fault_error_code_match;
1777 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
1778 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
1779 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
1780 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
1781 */
1782
1783 /*
1784 * Read only fields:
1785 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
1786 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
1787 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
1788 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
1789 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
1790 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
1791 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
1792 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
1793 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
1794 * vmcs12->exit_qualification = evmcs->exit_qualification;
1795 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
1796 *
1797 * Not present in struct vmcs12:
1798 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
1799 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
1800 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
1801 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
1802 */
1803
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001804 return;
Sean Christopherson55d23752018-12-03 13:53:18 -08001805}
1806
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001807static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
Sean Christopherson55d23752018-12-03 13:53:18 -08001808{
1809 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1810 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1811
1812 /*
1813 * Should not be changed by KVM:
1814 *
1815 * evmcs->host_es_selector = vmcs12->host_es_selector;
1816 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
1817 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
1818 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
1819 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
1820 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
1821 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
1822 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
1823 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
1824 * evmcs->host_cr0 = vmcs12->host_cr0;
1825 * evmcs->host_cr3 = vmcs12->host_cr3;
1826 * evmcs->host_cr4 = vmcs12->host_cr4;
1827 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
1828 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
1829 * evmcs->host_rip = vmcs12->host_rip;
1830 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
1831 * evmcs->host_fs_base = vmcs12->host_fs_base;
1832 * evmcs->host_gs_base = vmcs12->host_gs_base;
1833 * evmcs->host_tr_base = vmcs12->host_tr_base;
1834 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
1835 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
1836 * evmcs->host_rsp = vmcs12->host_rsp;
Sean Christopherson3731905ef2019-05-07 08:36:27 -07001837 * sync_vmcs02_to_vmcs12() doesn't read these:
Sean Christopherson55d23752018-12-03 13:53:18 -08001838 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
1839 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
1840 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
1841 * evmcs->ept_pointer = vmcs12->ept_pointer;
1842 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
1843 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
1844 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
1845 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
Sean Christopherson55d23752018-12-03 13:53:18 -08001846 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
1847 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
1848 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
1849 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
1850 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
1851 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
1852 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
1853 * evmcs->page_fault_error_code_mask =
1854 * vmcs12->page_fault_error_code_mask;
1855 * evmcs->page_fault_error_code_match =
1856 * vmcs12->page_fault_error_code_match;
1857 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
1858 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
1859 * evmcs->tsc_offset = vmcs12->tsc_offset;
1860 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
1861 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
1862 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
1863 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
1864 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
1865 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
1866 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
1867 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
1868 *
1869 * Not present in struct vmcs12:
1870 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
1871 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
1872 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
1873 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
1874 */
1875
1876 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1877 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1878 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1879 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1880 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1881 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1882 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1883 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1884
1885 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1886 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1887 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1888 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1889 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1890 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1891 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1892 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1893 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1894 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1895
1896 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1897 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1898 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1899 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1900 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1901 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1902 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1903 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1904
1905 evmcs->guest_es_base = vmcs12->guest_es_base;
1906 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1907 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1908 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1909 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1910 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1911 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1912 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1913 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1914 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1915
1916 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1917 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1918
1919 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1920 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1921 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1922 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1923
1924 evmcs->guest_pending_dbg_exceptions =
1925 vmcs12->guest_pending_dbg_exceptions;
1926 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1927 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1928
1929 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1930 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1931
1932 evmcs->guest_cr0 = vmcs12->guest_cr0;
1933 evmcs->guest_cr3 = vmcs12->guest_cr3;
1934 evmcs->guest_cr4 = vmcs12->guest_cr4;
1935 evmcs->guest_dr7 = vmcs12->guest_dr7;
1936
1937 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1938
1939 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1940 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1941 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1942 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1943 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1944 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1945 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1946 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1947
1948 evmcs->exit_qualification = vmcs12->exit_qualification;
1949
1950 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1951 evmcs->guest_rsp = vmcs12->guest_rsp;
1952 evmcs->guest_rflags = vmcs12->guest_rflags;
1953
1954 evmcs->guest_interruptibility_info =
1955 vmcs12->guest_interruptibility_info;
1956 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1957 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1958 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1959 evmcs->vm_entry_exception_error_code =
1960 vmcs12->vm_entry_exception_error_code;
1961 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1962
1963 evmcs->guest_rip = vmcs12->guest_rip;
1964
1965 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1966
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001967 return;
Sean Christopherson55d23752018-12-03 13:53:18 -08001968}
1969
1970/*
1971 * This is an equivalent of the nested hypervisor executing the vmptrld
1972 * instruction.
1973 */
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001974static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
1975 struct kvm_vcpu *vcpu, bool from_launch)
Sean Christopherson55d23752018-12-03 13:53:18 -08001976{
1977 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02001978 bool evmcs_gpa_changed = false;
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02001979 u64 evmcs_gpa;
Sean Christopherson55d23752018-12-03 13:53:18 -08001980
1981 if (likely(!vmx->nested.enlightened_vmcs_enabled))
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001982 return EVMPTRLD_DISABLED;
Sean Christopherson55d23752018-12-03 13:53:18 -08001983
Vitaly Kuznetsov02761712021-05-26 15:20:18 +02001984 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
1985 nested_release_evmcs(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001986 return EVMPTRLD_DISABLED;
Vitaly Kuznetsov02761712021-05-26 15:20:18 +02001987 }
Sean Christopherson55d23752018-12-03 13:53:18 -08001988
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02001989 if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
Yu Zhang64c78502021-09-30 01:51:53 +08001990 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08001991
1992 nested_release_evmcs(vcpu);
1993
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02001994 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
KarimAllah Ahmeddee9c042019-01-31 21:24:42 +01001995 &vmx->nested.hv_evmcs_map))
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001996 return EVMPTRLD_ERROR;
Sean Christopherson55d23752018-12-03 13:53:18 -08001997
KarimAllah Ahmeddee9c042019-01-31 21:24:42 +01001998 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
Sean Christopherson55d23752018-12-03 13:53:18 -08001999
2000 /*
2001 * Currently, KVM only supports eVMCS version 1
2002 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
2003 * value to first u32 field of eVMCS which should specify eVMCS
2004 * VersionNumber.
2005 *
2006 * Guest should be aware of supported eVMCS versions by host by
2007 * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
2008 * expected to set this CPUID leaf according to the value
2009 * returned in vmcs_version from nested_enable_evmcs().
2010 *
2011 * However, it turns out that Microsoft Hyper-V fails to comply
2012 * to their own invented interface: When Hyper-V use eVMCS, it
2013 * just sets first u32 field of eVMCS to revision_id specified
2014 * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
2015 * which is one of the supported versions specified in
2016 * CPUID.0x4000000A.EAX[0:15].
2017 *
2018 * To overcome Hyper-V bug, we accept here either a supported
2019 * eVMCS version or VMCS12 revision_id as valid values for first
2020 * u32 field of eVMCS.
2021 */
2022 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
2023 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
2024 nested_release_evmcs(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01002025 return EVMPTRLD_VMFAIL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002026 }
2027
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02002028 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
Sean Christopherson55d23752018-12-03 13:53:18 -08002029
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002030 evmcs_gpa_changed = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08002031 /*
2032 * Unlike normal vmcs12, enlightened vmcs12 is not fully
2033 * reloaded from guest's memory (read only fields, fields not
2034 * present in struct hv_enlightened_vmcs, ...). Make sure there
2035 * are no leftovers.
2036 */
2037 if (from_launch) {
2038 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2039 memset(vmcs12, 0, sizeof(*vmcs12));
2040 vmcs12->hdr.revision_id = VMCS12_REVISION;
2041 }
2042
2043 }
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002044
2045 /*
Miaohe Linffdbd502020-02-07 23:22:45 +08002046 * Clean fields data can't be used on VMLAUNCH and when we switch
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002047 * between different L2 guests as KVM keeps a single VMCS12 per L1.
2048 */
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01002049 if (from_launch || evmcs_gpa_changed) {
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002050 vmx->nested.hv_evmcs->hv_clean_fields &=
2051 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2052
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01002053 vmx->nested.force_msr_bitmap_recalc = true;
2054 }
2055
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01002056 return EVMPTRLD_SUCCEEDED;
Sean Christopherson55d23752018-12-03 13:53:18 -08002057}
2058
Sean Christopherson3731905ef2019-05-07 08:36:27 -07002059void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08002060{
2061 struct vcpu_vmx *vmx = to_vmx(vcpu);
2062
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002063 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08002064 copy_vmcs12_to_enlightened(vmx);
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002065 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002066 copy_vmcs12_to_shadow(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08002067
Sean Christopherson3731905ef2019-05-07 08:36:27 -07002068 vmx->nested.need_vmcs12_to_shadow_sync = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002069}
2070
2071static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
2072{
2073 struct vcpu_vmx *vmx =
2074 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
2075
2076 vmx->nested.preemption_timer_expired = true;
2077 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
2078 kvm_vcpu_kick(&vmx->vcpu);
2079
2080 return HRTIMER_NORESTART;
2081}
2082
Peter Shier850448f2020-05-26 14:51:06 -07002083static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08002084{
Peter Shier850448f2020-05-26 14:51:06 -07002085 struct vcpu_vmx *vmx = to_vmx(vcpu);
2086 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Peter Shier850448f2020-05-26 14:51:06 -07002087
2088 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
2089 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2090
2091 if (!vmx->nested.has_preemption_timer_deadline) {
Makarand Sonare8d7fbf02020-05-26 14:51:07 -07002092 vmx->nested.preemption_timer_deadline =
2093 vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
Peter Shier850448f2020-05-26 14:51:06 -07002094 vmx->nested.has_preemption_timer_deadline = true;
Makarand Sonare8d7fbf02020-05-26 14:51:07 -07002095 }
2096 return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
Peter Shier850448f2020-05-26 14:51:06 -07002097}
2098
2099static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
2100 u64 preemption_timeout)
2101{
Sean Christopherson55d23752018-12-03 13:53:18 -08002102 struct vcpu_vmx *vmx = to_vmx(vcpu);
2103
2104 /*
2105 * A timer value of zero is architecturally guaranteed to cause
2106 * a VMExit prior to executing any instructions in the guest.
2107 */
2108 if (preemption_timeout == 0) {
2109 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
2110 return;
2111 }
2112
2113 if (vcpu->arch.virtual_tsc_khz == 0)
2114 return;
2115
2116 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2117 preemption_timeout *= 1000000;
2118 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
2119 hrtimer_start(&vmx->nested.preemption_timer,
Jim Mattsonada00982020-05-08 13:36:42 -07002120 ktime_add_ns(ktime_get(), preemption_timeout),
2121 HRTIMER_MODE_ABS_PINNED);
Sean Christopherson55d23752018-12-03 13:53:18 -08002122}
2123
2124static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2125{
2126 if (vmx->nested.nested_run_pending &&
2127 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
2128 return vmcs12->guest_ia32_efer;
2129 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
2130 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
2131 else
2132 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
2133}
2134
2135static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
2136{
2137 /*
2138 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
2139 * according to L0's settings (vmcs12 is irrelevant here). Host
2140 * fields that come from L0 and are not constant, e.g. HOST_CR3,
2141 * will be set as needed prior to VMLAUNCH/VMRESUME.
2142 */
2143 if (vmx->nested.vmcs02_initialized)
2144 return;
2145 vmx->nested.vmcs02_initialized = true;
2146
2147 /*
2148 * We don't care what the EPTP value is we just need to guarantee
2149 * it's valid so we don't get a false positive when doing early
2150 * consistency checks.
2151 */
2152 if (enable_ept && nested_early_check)
Sean Christopherson2a40b902020-07-15 20:41:18 -07002153 vmcs_write64(EPT_POINTER,
2154 construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
Sean Christopherson55d23752018-12-03 13:53:18 -08002155
2156 /* All VMFUNCs are currently emulated through L0 vmexits. */
2157 if (cpu_has_vmx_vmfunc())
2158 vmcs_write64(VM_FUNCTION_CONTROL, 0);
2159
2160 if (cpu_has_vmx_posted_intr())
2161 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
2162
2163 if (cpu_has_vmx_msr_bitmap())
2164 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2165
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002166 /*
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002167 * PML is emulated for L2, but never enabled in hardware as the MMU
2168 * handles A/D emulation. Disabling PML for L2 also avoids having to
2169 * deal with filtering out L2 GPAs from the buffer.
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002170 */
2171 if (enable_pml) {
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002172 vmcs_write64(PML_ADDRESS, 0);
2173 vmcs_write16(GUEST_PML_INDEX, -1);
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002174 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002175
Sean Christophersonc538d572019-05-07 09:06:29 -07002176 if (cpu_has_vmx_encls_vmexit())
Yu Zhang64c78502021-09-30 01:51:53 +08002177 vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08002178
2179 /*
2180 * Set the MSR load/store lists to match L0's settings. Only the
2181 * addresses are constant (for vmcs02), the counts can change based
2182 * on L2's behavior, e.g. switching to/from long mode.
2183 */
Aaron Lewis662f1d12019-11-07 21:14:39 -08002184 vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
Sean Christopherson55d23752018-12-03 13:53:18 -08002185 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
2186 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
2187
2188 vmx_set_constant_host_state(vmx);
2189}
2190
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002191static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
Sean Christopherson55d23752018-12-03 13:53:18 -08002192 struct vmcs12 *vmcs12)
2193{
2194 prepare_vmcs02_constant_state(vmx);
2195
Yu Zhang64c78502021-09-30 01:51:53 +08002196 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08002197
2198 if (enable_vpid) {
2199 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2200 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2201 else
2202 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2203 }
2204}
2205
Sean Christopherson389ab252021-08-10 10:19:50 -07002206static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
2207 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002208{
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002209 u32 exec_control;
Sean Christopherson55d23752018-12-03 13:53:18 -08002210 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2211
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002212 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002213 prepare_vmcs02_early_rare(vmx, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08002214
2215 /*
Sean Christopherson55d23752018-12-03 13:53:18 -08002216 * PIN CONTROLS
2217 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002218 exec_control = __pin_controls_get(vmcs01);
Sean Christopherson804939e2019-05-07 12:18:05 -07002219 exec_control |= (vmcs12->pin_based_vm_exec_control &
2220 ~PIN_BASED_VMX_PREEMPTION_TIMER);
Sean Christopherson55d23752018-12-03 13:53:18 -08002221
2222 /* Posted interrupts setting is only taken from vmcs12. */
Sean Christophersonf7782bb82021-08-10 07:45:26 -07002223 vmx->nested.pi_pending = false;
2224 if (nested_cpu_has_posted_intr(vmcs12))
Sean Christopherson55d23752018-12-03 13:53:18 -08002225 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
Sean Christophersonf7782bb82021-08-10 07:45:26 -07002226 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002227 exec_control &= ~PIN_BASED_POSTED_INTR;
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002228 pin_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002229
2230 /*
2231 * EXEC CONTROLS
2232 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002233 exec_control = __exec_controls_get(vmcs01); /* L0's desires */
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08002234 exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08002235 exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
Sean Christopherson55d23752018-12-03 13:53:18 -08002236 exec_control &= ~CPU_BASED_TPR_SHADOW;
2237 exec_control |= vmcs12->cpu_based_vm_exec_control;
2238
Liran Alon02d496cf2019-11-11 14:30:55 +02002239 vmx->nested.l1_tpr_threshold = -1;
Sean Christophersonca2f5462019-05-07 09:06:33 -07002240 if (exec_control & CPU_BASED_TPR_SHADOW)
Sean Christopherson55d23752018-12-03 13:53:18 -08002241 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
Sean Christopherson55d23752018-12-03 13:53:18 -08002242#ifdef CONFIG_X86_64
Sean Christophersonca2f5462019-05-07 09:06:33 -07002243 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002244 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2245 CPU_BASED_CR8_STORE_EXITING;
2246#endif
Sean Christopherson55d23752018-12-03 13:53:18 -08002247
2248 /*
2249 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
2250 * for I/O port accesses.
2251 */
Sean Christopherson55d23752018-12-03 13:53:18 -08002252 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
Sean Christophersonde0286b2019-05-07 12:18:01 -07002253 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2254
2255 /*
2256 * This bit will be computed in nested_get_vmcs12_pages, because
2257 * we do not have access to L1's MSR bitmap yet. For now, keep
2258 * the same bit as before, hoping to avoid multiple VMWRITEs that
2259 * only set/clear this bit.
2260 */
2261 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2262 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2263
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002264 exec_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002265
2266 /*
2267 * SECONDARY EXEC CONTROLS
2268 */
2269 if (cpu_has_secondary_exec_ctrls()) {
Sean Christopherson389ab252021-08-10 10:19:50 -07002270 exec_control = __secondary_exec_controls_get(vmcs01);
Sean Christopherson55d23752018-12-03 13:53:18 -08002271
2272 /* Take the following fields only from vmcs12 */
2273 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Sean Christopherson389ab252021-08-10 10:19:50 -07002274 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sean Christopherson55d23752018-12-03 13:53:18 -08002275 SECONDARY_EXEC_ENABLE_INVPCID |
Sean Christopherson7f3603b2020-09-23 09:50:47 -07002276 SECONDARY_EXEC_ENABLE_RDTSCP |
Sean Christopherson55d23752018-12-03 13:53:18 -08002277 SECONDARY_EXEC_XSAVES |
Tao Xue69e72fa2019-07-16 14:55:49 +08002278 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
Sean Christopherson55d23752018-12-03 13:53:18 -08002279 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2280 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01002281 SECONDARY_EXEC_ENABLE_VMFUNC |
Sean Christopherson389ab252021-08-10 10:19:50 -07002282 SECONDARY_EXEC_TSC_SCALING |
2283 SECONDARY_EXEC_DESC);
2284
Sean Christopherson55d23752018-12-03 13:53:18 -08002285 if (nested_cpu_has(vmcs12,
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002286 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
2287 exec_control |= vmcs12->secondary_vm_exec_control;
2288
2289 /* PML is emulated and never enabled in hardware for L2. */
2290 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Sean Christopherson55d23752018-12-03 13:53:18 -08002291
2292 /* VMCS shadowing for L2 is emulated for now */
2293 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2294
Sean Christopherson469debd2019-05-07 12:18:02 -07002295 /*
2296 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
2297 * will not have to rewrite the controls just for this bit.
2298 */
2299 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2300 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2301 exec_control |= SECONDARY_EXEC_DESC;
2302
Sean Christopherson55d23752018-12-03 13:53:18 -08002303 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2304 vmcs_write16(GUEST_INTR_STATUS,
2305 vmcs12->guest_intr_status);
2306
Krish Sadhukhanbddd82d2020-09-21 08:10:25 +00002307 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
2308 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2309
Sean Christopherson72add912021-04-12 16:21:42 +12002310 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
2311 vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
2312
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002313 secondary_exec_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002314 }
2315
2316 /*
2317 * ENTRY CONTROLS
2318 *
2319 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
2320 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
2321 * on the related bits (if supported by the CPU) in the hope that
2322 * we can avoid VMWrites during vmx_set_efer().
2323 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002324 exec_control = __vm_entry_controls_get(vmcs01);
2325 exec_control |= vmcs12->vm_entry_controls;
2326 exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
Sean Christopherson55d23752018-12-03 13:53:18 -08002327 if (cpu_has_load_ia32_efer()) {
2328 if (guest_efer & EFER_LMA)
2329 exec_control |= VM_ENTRY_IA32E_MODE;
2330 if (guest_efer != host_efer)
2331 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2332 }
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002333 vm_entry_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002334
2335 /*
2336 * EXIT CONTROLS
2337 *
2338 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
2339 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
2340 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
2341 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002342 exec_control = __vm_exit_controls_get(vmcs01);
Sean Christopherson55d23752018-12-03 13:53:18 -08002343 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2344 exec_control |= VM_EXIT_LOAD_IA32_EFER;
Sean Christopherson389ab252021-08-10 10:19:50 -07002345 else
2346 exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002347 vm_exit_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002348
2349 /*
2350 * Interrupt/Exception Fields
2351 */
2352 if (vmx->nested.nested_run_pending) {
2353 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2354 vmcs12->vm_entry_intr_info_field);
2355 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2356 vmcs12->vm_entry_exception_error_code);
2357 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2358 vmcs12->vm_entry_instruction_len);
2359 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2360 vmcs12->guest_interruptibility_info);
2361 vmx->loaded_vmcs->nmi_known_unmasked =
2362 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2363 } else {
2364 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2365 }
2366}
2367
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002368static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002369{
2370 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2371
2372 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2373 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2374 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2375 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2376 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2377 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2378 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2379 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2380 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2381 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2382 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2383 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2384 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2385 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2386 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2387 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2388 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2389 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2390 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2391 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07002392 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2393 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
Sean Christopherson55d23752018-12-03 13:53:18 -08002394 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2395 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2396 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2397 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2398 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2399 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2400 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2401 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2402 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2403 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2404 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2405 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2406 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2407 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2408 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2409 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
Sean Christophersonfc387d82020-09-23 11:44:46 -07002410
2411 vmx->segment_cache.bitmask = 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08002412 }
2413
2414 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2415 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2416 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2417 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2418 vmcs12->guest_pending_dbg_exceptions);
2419 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2420 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2421
2422 /*
2423 * L1 may access the L2's PDPTR, so save them to construct
2424 * vmcs12
2425 */
2426 if (enable_ept) {
2427 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2428 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2429 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2430 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2431 }
Sean Christophersonc27e5b02019-05-07 09:06:39 -07002432
2433 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2434 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2435 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
Sean Christopherson55d23752018-12-03 13:53:18 -08002436 }
2437
2438 if (nested_cpu_has_xsaves(vmcs12))
2439 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2440
2441 /*
2442 * Whether page-faults are trapped is determined by a combination of
Paolo Bonzinia0c13432020-07-10 17:48:08 +02002443 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0
2444 * doesn't care about page faults then we should set all of these to
2445 * L1's desires. However, if L0 does care about (some) page faults, it
2446 * is not easy (if at all possible?) to merge L0 and L1's desires, we
2447 * simply ask to exit on each and every L2 page fault. This is done by
2448 * setting MASK=MATCH=0 and (see below) EB.PF=1.
Sean Christopherson55d23752018-12-03 13:53:18 -08002449 * Note that below we don't need special code to set EB.PF beyond the
2450 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
2451 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
2452 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
2453 */
Paolo Bonzinia0c13432020-07-10 17:48:08 +02002454 if (vmx_need_pf_intercept(&vmx->vcpu)) {
2455 /*
2456 * TODO: if both L0 and L1 need the same MASK and MATCH,
2457 * go ahead and use it?
2458 */
2459 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
2460 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
2461 } else {
2462 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
2463 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
2464 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002465
2466 if (cpu_has_vmx_apicv()) {
2467 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2468 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2469 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2470 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2471 }
2472
Aaron Lewis662f1d12019-11-07 21:14:39 -08002473 /*
2474 * Make sure the msr_autostore list is up to date before we set the
2475 * count in the vmcs02.
2476 */
2477 prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2478
2479 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
Sean Christopherson55d23752018-12-03 13:53:18 -08002480 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2481 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2482
2483 set_cr4_guest_host_mask(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08002484}
2485
2486/*
2487 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
2488 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
2489 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
2490 * guest in a way that will both be appropriate to L1's requests, and our
2491 * needs. In addition to modifying the active vmcs (which is vmcs02), this
2492 * function also has additional necessary side-effects, like setting various
2493 * vcpu->arch fields.
2494 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
2495 * is assigned to entry_failure_code on failure.
2496 */
2497static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Maxim Levitsky0f857222021-06-07 12:02:00 +03002498 bool from_vmentry,
Sean Christopherson68cda402020-05-11 15:05:29 -07002499 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08002500{
2501 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002502 bool load_guest_pdptrs_vmcs12 = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002503
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002504 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002505 prepare_vmcs02_rare(vmx, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08002506 vmx->nested.dirty_vmcs12 = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002507
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002508 load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
2509 !(vmx->nested.hv_evmcs->hv_clean_fields &
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002510 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
Sean Christopherson55d23752018-12-03 13:53:18 -08002511 }
2512
2513 if (vmx->nested.nested_run_pending &&
2514 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2515 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2516 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2517 } else {
2518 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2519 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2520 }
Sean Christopherson3b013a22019-05-07 09:06:28 -07002521 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2522 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2523 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
Sean Christopherson55d23752018-12-03 13:53:18 -08002524 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2525
Sean Christopherson55d23752018-12-03 13:53:18 -08002526 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
2527 * bitwise-or of what L1 wants to trap for L2, and what we want to
2528 * trap. Note that CR0.TS also needs updating - we do this later.
2529 */
Jason Baronb6a7cc32021-01-14 22:27:54 -05002530 vmx_update_exception_bitmap(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002531 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2532 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2533
2534 if (vmx->nested.nested_run_pending &&
2535 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2536 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2537 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2538 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2539 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2540 }
2541
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01002542 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2543 vcpu->arch.l1_tsc_offset,
2544 vmx_get_l2_tsc_offset(vcpu),
2545 vmx_get_l2_tsc_multiplier(vcpu));
2546
2547 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2548 vcpu->arch.l1_tsc_scaling_ratio,
2549 vmx_get_l2_tsc_multiplier(vcpu));
2550
Sean Christopherson55d23752018-12-03 13:53:18 -08002551 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Sean Christopherson55d23752018-12-03 13:53:18 -08002552 if (kvm_has_tsc_control)
Ilias Stamatis1ab92872021-06-07 11:54:38 +01002553 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
Sean Christopherson55d23752018-12-03 13:53:18 -08002554
Sean Christopherson50b265a2020-03-20 14:28:19 -07002555 nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
Sean Christopherson55d23752018-12-03 13:53:18 -08002556
2557 if (nested_cpu_has_ept(vmcs12))
2558 nested_ept_init_mmu_context(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002559
2560 /*
2561 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
2562 * bits which we consider mandatory enabled.
2563 * The CR0_READ_SHADOW is what L2 should have expected to read given
2564 * the specifications by L1; It's not enough to take
2565 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
2566 * have more bits than L1 expected.
2567 */
2568 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2569 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2570
2571 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2572 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2573
2574 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2575 /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
2576 vmx_set_efer(vcpu, vcpu->arch.efer);
2577
2578 /*
2579 * Guest state is invalid and unrestricted guest is disabled,
2580 * which means L1 attempted VMEntry to L2 with invalid state.
2581 * Fail the VMEntry.
Maxim Levitskyc8607e42021-09-13 17:09:53 +03002582 *
2583 * However when force loading the guest state (SMM exit or
2584 * loading nested state after migration, it is possible to
2585 * have invalid guest state now, which will be later fixed by
2586 * restoring L2 register state
Sean Christopherson55d23752018-12-03 13:53:18 -08002587 */
Maxim Levitskyc8607e42021-09-13 17:09:53 +03002588 if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08002589 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christophersonc80add02019-04-11 12:18:09 -07002590 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002591 }
2592
2593 /* Shadow page tables on either EPT or shadow page tables. */
2594 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Maxim Levitsky0f857222021-06-07 12:02:00 +03002595 from_vmentry, entry_failure_code))
Sean Christophersonc80add02019-04-11 12:18:09 -07002596 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002597
Sean Christopherson04f11ef2019-09-27 14:45:16 -07002598 /*
2599 * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
2600 * on nested VM-Exit, which can occur without actually running L2 and
Paolo Bonzini727a7e22020-03-05 03:52:50 -05002601 * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
Sean Christopherson04f11ef2019-09-27 14:45:16 -07002602 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
2603 * transition to HLT instead of running L2.
2604 */
2605 if (enable_ept)
2606 vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
2607
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002608 /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
2609 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2610 is_pae_paging(vcpu)) {
2611 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2612 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2613 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2614 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2615 }
2616
Sean Christopherson55d23752018-12-03 13:53:18 -08002617 if (!enable_ept)
2618 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2619
Oliver Upton71f73472019-11-13 16:17:19 -08002620 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
Oliver Uptond1968422019-12-13 16:33:58 -08002621 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
Dan Carpenterbfbb3072021-11-30 15:53:37 +03002622 vmcs12->guest_ia32_perf_global_ctrl))) {
2623 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Oliver Upton71f73472019-11-13 16:17:19 -08002624 return -EINVAL;
Dan Carpenterbfbb3072021-11-30 15:53:37 +03002625 }
Oliver Upton71f73472019-11-13 16:17:19 -08002626
Paolo Bonzinie9c16c72019-04-30 22:07:26 +02002627 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2628 kvm_rip_write(vcpu, vmcs12->guest_rip);
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002629
2630 /*
2631 * It was observed that genuine Hyper-V running in L1 doesn't reset
2632 * 'hv_clean_fields' by itself, it only sets the corresponding dirty
2633 * bits when it changes a field in eVMCS. Mark all fields as clean
2634 * here.
2635 */
2636 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2637 vmx->nested.hv_evmcs->hv_clean_fields |=
2638 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2639
Sean Christopherson55d23752018-12-03 13:53:18 -08002640 return 0;
2641}
2642
2643static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2644{
Sean Christopherson5497b952019-07-11 08:58:29 -07002645 if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
2646 nested_cpu_has_virtual_nmis(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002647 return -EINVAL;
2648
Sean Christopherson5497b952019-07-11 08:58:29 -07002649 if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08002650 nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002651 return -EINVAL;
2652
2653 return 0;
2654}
2655
Sean Christophersonac6389a2020-03-02 18:02:38 -08002656static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
Sean Christopherson55d23752018-12-03 13:53:18 -08002657{
2658 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002659
2660 /* Check for memory type validity */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002661 switch (new_eptp & VMX_EPTP_MT_MASK) {
Sean Christopherson55d23752018-12-03 13:53:18 -08002662 case VMX_EPTP_MT_UC:
Sean Christopherson5497b952019-07-11 08:58:29 -07002663 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002664 return false;
2665 break;
2666 case VMX_EPTP_MT_WB:
Sean Christopherson5497b952019-07-11 08:58:29 -07002667 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002668 return false;
2669 break;
2670 default:
2671 return false;
2672 }
2673
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002674 /* Page-walk levels validity. */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002675 switch (new_eptp & VMX_EPTP_PWL_MASK) {
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002676 case VMX_EPTP_PWL_5:
2677 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
2678 return false;
2679 break;
2680 case VMX_EPTP_PWL_4:
2681 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
2682 return false;
2683 break;
2684 default:
Sean Christopherson55d23752018-12-03 13:53:18 -08002685 return false;
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002686 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002687
2688 /* Reserved bits should not be set */
Sean Christopherson636e8b72021-02-03 16:01:10 -08002689 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002690 return false;
2691
2692 /* AD, if set, should be supported */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002693 if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002694 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002695 return false;
2696 }
2697
2698 return true;
2699}
2700
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002701/*
2702 * Checks related to VM-Execution Control Fields
2703 */
2704static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2705 struct vmcs12 *vmcs12)
2706{
2707 struct vcpu_vmx *vmx = to_vmx(vcpu);
2708
Sean Christopherson5497b952019-07-11 08:58:29 -07002709 if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2710 vmx->nested.msrs.pinbased_ctls_low,
2711 vmx->nested.msrs.pinbased_ctls_high)) ||
2712 CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2713 vmx->nested.msrs.procbased_ctls_low,
2714 vmx->nested.msrs.procbased_ctls_high)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002715 return -EINVAL;
2716
2717 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07002718 CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
2719 vmx->nested.msrs.secondary_ctls_low,
2720 vmx->nested.msrs.secondary_ctls_high)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002721 return -EINVAL;
2722
Sean Christopherson5497b952019-07-11 08:58:29 -07002723 if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002724 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2725 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2726 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2727 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2728 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2729 nested_vmx_check_nmi_controls(vmcs12) ||
2730 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2731 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2732 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2733 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
Sean Christopherson5497b952019-07-11 08:58:29 -07002734 CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002735 return -EINVAL;
2736
Sean Christophersonbc441212019-02-12 16:42:23 -08002737 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2738 nested_cpu_has_save_preemption_timer(vmcs12))
2739 return -EINVAL;
2740
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002741 if (nested_cpu_has_ept(vmcs12) &&
Sean Christophersonac6389a2020-03-02 18:02:38 -08002742 CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002743 return -EINVAL;
2744
2745 if (nested_cpu_has_vmfunc(vmcs12)) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002746 if (CC(vmcs12->vm_function_control &
2747 ~vmx->nested.msrs.vmfunc_controls))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002748 return -EINVAL;
2749
2750 if (nested_cpu_has_eptp_switching(vmcs12)) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002751 if (CC(!nested_cpu_has_ept(vmcs12)) ||
2752 CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002753 return -EINVAL;
2754 }
2755 }
2756
2757 return 0;
2758}
2759
Krish Sadhukhan61446ba2018-12-12 13:30:09 -05002760/*
2761 * Checks related to VM-Exit Control Fields
2762 */
2763static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2764 struct vmcs12 *vmcs12)
2765{
2766 struct vcpu_vmx *vmx = to_vmx(vcpu);
2767
Sean Christopherson5497b952019-07-11 08:58:29 -07002768 if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
2769 vmx->nested.msrs.exit_ctls_low,
2770 vmx->nested.msrs.exit_ctls_high)) ||
2771 CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
Krish Sadhukhan61446ba2018-12-12 13:30:09 -05002772 return -EINVAL;
2773
2774 return 0;
2775}
2776
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002777/*
2778 * Checks related to VM-Entry Control Fields
2779 */
2780static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2781 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002782{
2783 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002784
Sean Christopherson5497b952019-07-11 08:58:29 -07002785 if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
2786 vmx->nested.msrs.entry_ctls_low,
2787 vmx->nested.msrs.entry_ctls_high)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002788 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002789
2790 /*
2791 * From the Intel SDM, volume 3:
2792 * Fields relevant to VM-entry event injection must be set properly.
2793 * These fields are the VM-entry interruption-information field, the
2794 * VM-entry exception error code, and the VM-entry instruction length.
2795 */
2796 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2797 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2798 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2799 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2800 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2801 bool should_have_error_code;
2802 bool urg = nested_cpu_has2(vmcs12,
2803 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2804 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2805
2806 /* VM-entry interruption-info field: interruption type */
Sean Christopherson5497b952019-07-11 08:58:29 -07002807 if (CC(intr_type == INTR_TYPE_RESERVED) ||
2808 CC(intr_type == INTR_TYPE_OTHER_EVENT &&
2809 !nested_cpu_supports_monitor_trap_flag(vcpu)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002810 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002811
2812 /* VM-entry interruption-info field: vector */
Sean Christopherson5497b952019-07-11 08:58:29 -07002813 if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2814 CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2815 CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002816 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002817
2818 /* VM-entry interruption-info field: deliver error code */
2819 should_have_error_code =
2820 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2821 x86_exception_has_error_code(vector);
Sean Christopherson5497b952019-07-11 08:58:29 -07002822 if (CC(has_error_code != should_have_error_code))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002823 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002824
2825 /* VM-entry exception error code */
Sean Christopherson5497b952019-07-11 08:58:29 -07002826 if (CC(has_error_code &&
Sean Christopherson567926c2019-10-01 09:21:23 -07002827 vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002828 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002829
2830 /* VM-entry interruption-info field: reserved bits */
Sean Christopherson5497b952019-07-11 08:58:29 -07002831 if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002832 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002833
2834 /* VM-entry instruction length */
2835 switch (intr_type) {
2836 case INTR_TYPE_SOFT_EXCEPTION:
2837 case INTR_TYPE_SOFT_INTR:
2838 case INTR_TYPE_PRIV_SW_EXCEPTION:
Sean Christopherson5497b952019-07-11 08:58:29 -07002839 if (CC(vmcs12->vm_entry_instruction_len > 15) ||
2840 CC(vmcs12->vm_entry_instruction_len == 0 &&
2841 CC(!nested_cpu_has_zero_length_injection(vcpu))))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002842 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002843 }
2844 }
2845
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002846 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2847 return -EINVAL;
2848
2849 return 0;
2850}
2851
Sean Christopherson5478ba32019-04-11 12:18:06 -07002852static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2853 struct vmcs12 *vmcs12)
2854{
2855 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2856 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2857 nested_check_vm_entry_controls(vcpu, vmcs12))
Paolo Bonzini98d9e852019-04-12 10:19:57 +02002858 return -EINVAL;
Sean Christopherson5478ba32019-04-11 12:18:06 -07002859
Vitaly Kuznetsova8350232020-02-05 13:30:34 +01002860 if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
2861 return nested_evmcs_check_controls(vmcs12);
2862
Sean Christopherson5478ba32019-04-11 12:18:06 -07002863 return 0;
2864}
2865
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002866static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
2867 struct vmcs12 *vmcs12)
2868{
2869#ifdef CONFIG_X86_64
2870 if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
2871 !!(vcpu->arch.efer & EFER_LMA)))
2872 return -EINVAL;
2873#endif
2874 return 0;
2875}
2876
Paolo Bonzini98d9e852019-04-12 10:19:57 +02002877static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2878 struct vmcs12 *vmcs12)
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002879{
2880 bool ia32e;
2881
Sean Christopherson5497b952019-07-11 08:58:29 -07002882 if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
2883 CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
Sean Christopherson636e8b72021-02-03 16:01:10 -08002884 CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
Krish Sadhukhan254b2f32018-12-12 13:30:11 -05002885 return -EINVAL;
Krish Sadhukhan711eff32019-02-07 14:05:30 -05002886
Sean Christopherson5497b952019-07-11 08:58:29 -07002887 if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
2888 CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
Krish Sadhukhan711eff32019-02-07 14:05:30 -05002889 return -EINVAL;
2890
Krish Sadhukhanf6b0db1f2019-04-08 17:35:11 -04002891 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07002892 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
Krish Sadhukhanf6b0db1f2019-04-08 17:35:11 -04002893 return -EINVAL;
2894
Oliver Uptonc547cb62019-11-13 16:17:17 -08002895 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2896 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2897 vmcs12->host_ia32_perf_global_ctrl)))
2898 return -EINVAL;
2899
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002900#ifdef CONFIG_X86_64
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002901 ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002902#else
2903 ia32e = false;
2904#endif
2905
2906 if (ia32e) {
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002907 if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002908 return -EINVAL;
2909 } else {
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002910 if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002911 CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
2912 CC((vmcs12->host_rip) >> 32))
2913 return -EINVAL;
2914 }
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002915
Sean Christopherson5497b952019-07-11 08:58:29 -07002916 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2917 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2918 CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2919 CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2920 CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2921 CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2922 CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2923 CC(vmcs12->host_cs_selector == 0) ||
2924 CC(vmcs12->host_tr_selector == 0) ||
2925 CC(vmcs12->host_ss_selector == 0 && !ia32e))
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002926 return -EINVAL;
2927
Sean Christopherson5497b952019-07-11 08:58:29 -07002928 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
2929 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
2930 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
2931 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002932 CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
2933 CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
Krish Sadhukhan58450382019-08-09 12:26:19 -07002934 return -EINVAL;
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002935
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002936 /*
2937 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
2938 * IA32_EFER MSR must be 0 in the field for that register. In addition,
2939 * the values of the LMA and LME bits in the field must each be that of
2940 * the host address-space size VM-exit control.
2941 */
2942 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002943 if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
2944 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
2945 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
Krish Sadhukhan254b2f32018-12-12 13:30:11 -05002946 return -EINVAL;
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002947 }
2948
Sean Christopherson55d23752018-12-03 13:53:18 -08002949 return 0;
2950}
2951
2952static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2953 struct vmcs12 *vmcs12)
2954{
David Woodhouse7d0172b2021-11-15 16:50:25 +00002955 struct vcpu_vmx *vmx = to_vmx(vcpu);
2956 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
2957 struct vmcs_hdr hdr;
Sean Christopherson55d23752018-12-03 13:53:18 -08002958
Yu Zhang64c78502021-09-30 01:51:53 +08002959 if (vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08002960 return 0;
2961
Sean Christopherson5497b952019-07-11 08:58:29 -07002962 if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002963 return -EINVAL;
2964
David Woodhouse7d0172b2021-11-15 16:50:25 +00002965 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
2966 CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
2967 vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
2968 return -EINVAL;
2969
2970 if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
2971 offsetof(struct vmcs12, hdr),
2972 sizeof(hdr))))
Sean Christopherson55d23752018-12-03 13:53:18 -08002973 return -EINVAL;
2974
David Woodhouse7d0172b2021-11-15 16:50:25 +00002975 if (CC(hdr.revision_id != VMCS12_REVISION) ||
2976 CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
2977 return -EINVAL;
KarimAllah Ahmed88925302019-01-31 21:24:41 +01002978
David Woodhouse7d0172b2021-11-15 16:50:25 +00002979 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08002980}
2981
Sean Christopherson55d23752018-12-03 13:53:18 -08002982/*
2983 * Checks related to Guest Non-register State
2984 */
2985static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2986{
Sean Christopherson5497b952019-07-11 08:58:29 -07002987 if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
Yadong Qibf0cd882020-11-06 14:51:22 +08002988 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
2989 vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
Sean Christopherson55d23752018-12-03 13:53:18 -08002990 return -EINVAL;
2991
2992 return 0;
2993}
2994
Sean Christopherson5478ba32019-04-11 12:18:06 -07002995static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2996 struct vmcs12 *vmcs12,
Sean Christopherson68cda402020-05-11 15:05:29 -07002997 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08002998{
2999 bool ia32e;
3000
Sean Christopherson68cda402020-05-11 15:05:29 -07003001 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christopherson55d23752018-12-03 13:53:18 -08003002
Sean Christopherson5497b952019-07-11 08:58:29 -07003003 if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
3004 CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
Sean Christophersonc80add02019-04-11 12:18:09 -07003005 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003006
Krish Sadhukhanb91991b2020-01-15 19:54:32 -05003007 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
3008 CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
3009 return -EINVAL;
3010
Krish Sadhukhande2bc2b2019-04-08 17:35:12 -04003011 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07003012 CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
Sean Christophersonc80add02019-04-11 12:18:09 -07003013 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003014
3015 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
Sean Christopherson68cda402020-05-11 15:05:29 -07003016 *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
Sean Christophersonc80add02019-04-11 12:18:09 -07003017 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003018 }
3019
Oliver Uptonbfc6ad62019-11-13 16:17:16 -08003020 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
3021 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
3022 vmcs12->guest_ia32_perf_global_ctrl)))
3023 return -EINVAL;
3024
Sean Christopherson55d23752018-12-03 13:53:18 -08003025 /*
3026 * If the load IA32_EFER VM-entry control is 1, the following checks
3027 * are performed on the field for the IA32_EFER MSR:
3028 * - Bits reserved in the IA32_EFER MSR must be 0.
3029 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
3030 * the IA-32e mode guest VM-exit control. It must also be identical
3031 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
3032 * CR0.PG) is 1.
3033 */
3034 if (to_vmx(vcpu)->nested.nested_run_pending &&
3035 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3036 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
Sean Christopherson5497b952019-07-11 08:58:29 -07003037 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
3038 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
3039 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
3040 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
Sean Christophersonc80add02019-04-11 12:18:09 -07003041 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003042 }
3043
3044 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07003045 (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
3046 CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
Sean Christophersonc80add02019-04-11 12:18:09 -07003047 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003048
Sean Christopherson9c3e9222019-04-11 12:18:05 -07003049 if (nested_check_guest_non_reg_state(vmcs12))
Sean Christophersonc80add02019-04-11 12:18:09 -07003050 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003051
3052 return 0;
3053}
3054
Sean Christopherson453eafb2018-12-20 12:25:17 -08003055static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003056{
3057 struct vcpu_vmx *vmx = to_vmx(vcpu);
Lai Jiangshan15ad9762021-11-18 19:08:03 +08003058 unsigned long cr4;
Sean Christophersonf1727b42019-01-25 07:40:58 -08003059 bool vm_fail;
Sean Christopherson55d23752018-12-03 13:53:18 -08003060
3061 if (!nested_early_check)
3062 return 0;
3063
3064 if (vmx->msr_autoload.host.nr)
3065 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3066 if (vmx->msr_autoload.guest.nr)
3067 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3068
3069 preempt_disable();
3070
3071 vmx_prepare_switch_to_guest(vcpu);
3072
3073 /*
3074 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
3075 * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
Miaohe Lin49f933d2020-02-27 11:20:54 +08003076 * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
Sean Christopherson55d23752018-12-03 13:53:18 -08003077 * there is no need to preserve other bits or save/restore the field.
3078 */
3079 vmcs_writel(GUEST_RFLAGS, 0);
3080
Sean Christopherson55d23752018-12-03 13:53:18 -08003081 cr4 = cr4_read_shadow();
3082 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
3083 vmcs_writel(HOST_CR4, cr4);
3084 vmx->loaded_vmcs->host_state.cr4 = cr4;
3085 }
3086
Uros Bizjak150f17b2020-12-30 16:26:57 -08003087 vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
3088 vmx->loaded_vmcs->launched);
Sean Christopherson55d23752018-12-03 13:53:18 -08003089
Sean Christopherson55d23752018-12-03 13:53:18 -08003090 if (vmx->msr_autoload.host.nr)
3091 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
3092 if (vmx->msr_autoload.guest.nr)
3093 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
3094
Sean Christophersonf1727b42019-01-25 07:40:58 -08003095 if (vm_fail) {
Sean Christopherson380e0052019-07-11 08:58:30 -07003096 u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
3097
Wanpeng Li541e8862019-05-17 16:49:50 +08003098 preempt_enable();
Sean Christopherson380e0052019-07-11 08:58:30 -07003099
3100 trace_kvm_nested_vmenter_failed(
3101 "early hardware check VM-instruction error: ", error);
3102 WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003103 return 1;
3104 }
3105
3106 /*
3107 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
3108 */
Sean Christopherson55d23752018-12-03 13:53:18 -08003109 if (hw_breakpoint_active())
3110 set_debugreg(__this_cpu_read(cpu_dr7), 7);
Peter Zijlstra84b6a342020-05-29 23:27:36 +02003111 local_irq_enable();
Wanpeng Li541e8862019-05-17 16:49:50 +08003112 preempt_enable();
Sean Christopherson55d23752018-12-03 13:53:18 -08003113
3114 /*
3115 * A non-failing VMEntry means we somehow entered guest mode with
3116 * an illegal RIP, and that's just the tip of the iceberg. There
3117 * is no telling what memory has been modified or what state has
3118 * been exposed to unknown code. Hitting this all but guarantees
3119 * a (very critical) hardware issue.
3120 */
3121 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
3122 VMX_EXIT_REASONS_FAILED_VMENTRY));
3123
3124 return 0;
3125}
Sean Christopherson55d23752018-12-03 13:53:18 -08003126
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003127static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003128{
Sean Christopherson55d23752018-12-03 13:53:18 -08003129 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003130
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01003131 /*
3132 * hv_evmcs may end up being not mapped after migration (when
3133 * L2 was running), map it here to make sure vmcs12 changes are
3134 * properly reflected.
3135 */
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003136 if (vmx->nested.enlightened_vmcs_enabled &&
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02003137 vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003138 enum nested_evmptrld_status evmptrld_status =
3139 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
3140
3141 if (evmptrld_status == EVMPTRLD_VMFAIL ||
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003142 evmptrld_status == EVMPTRLD_ERROR)
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003143 return false;
Vitaly Kuznetsov8629b622021-05-26 15:20:25 +02003144
3145 /*
3146 * Post migration VMCS12 always provides the most actual
3147 * information, copy it to eVMCS upon entry.
3148 */
3149 vmx->nested.need_vmcs12_to_shadow_sync = true;
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003150 }
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01003151
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003152 return true;
3153}
3154
3155static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
3156{
3157 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3158 struct vcpu_vmx *vmx = to_vmx(vcpu);
3159 struct kvm_host_map *map;
3160 struct page *page;
3161 u64 hpa;
3162
Maxim Levitsky158a48e2021-06-07 12:02:03 +03003163 if (!vcpu->arch.pdptrs_from_userspace &&
3164 !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
Maxim Levitsky0f857222021-06-07 12:02:00 +03003165 /*
3166 * Reload the guest's PDPTRs since after a migration
3167 * the guest CR3 might be restored prior to setting the nested
3168 * state which can lead to a load of wrong PDPTRs.
3169 */
Lai Jiangshan2df4a5e2021-11-24 20:20:52 +08003170 if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
Maxim Levitsky0f857222021-06-07 12:02:00 +03003171 return false;
3172 }
3173
3174
Sean Christopherson55d23752018-12-03 13:53:18 -08003175 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3176 /*
3177 * Translate L1 physical address to host physical
3178 * address for vmcs02. Keep the page pinned, so this
3179 * physical address remains valid. We keep a reference
3180 * to it so we can release it later.
3181 */
3182 if (vmx->nested.apic_access_page) { /* shouldn't happen */
Liran Alonb11494b2019-11-21 00:31:47 +02003183 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -08003184 vmx->nested.apic_access_page = NULL;
3185 }
3186 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Sean Christopherson55d23752018-12-03 13:53:18 -08003187 if (!is_error_page(page)) {
3188 vmx->nested.apic_access_page = page;
3189 hpa = page_to_phys(vmx->nested.apic_access_page);
3190 vmcs_write64(APIC_ACCESS_ADDR, hpa);
3191 } else {
Jim Mattson671ddc72019-10-15 10:44:05 -07003192 pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
3193 __func__);
3194 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3195 vcpu->run->internal.suberror =
3196 KVM_INTERNAL_ERROR_EMULATION;
3197 vcpu->run->internal.ndata = 0;
3198 return false;
Sean Christopherson55d23752018-12-03 13:53:18 -08003199 }
3200 }
3201
3202 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003203 map = &vmx->nested.virtual_apic_map;
Sean Christopherson55d23752018-12-03 13:53:18 -08003204
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003205 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
3206 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
Paolo Bonzini69090812019-04-15 15:16:17 +02003207 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
3208 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
3209 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3210 /*
3211 * The processor will never use the TPR shadow, simply
3212 * clear the bit from the execution control. Such a
3213 * configuration is useless, but it happens in tests.
3214 * For any other configuration, failing the vm entry is
3215 * _not_ what the processor does but it's basically the
3216 * only possibility we have.
3217 */
Sean Christopherson2183f562019-05-07 12:17:56 -07003218 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
Paolo Bonzini69090812019-04-15 15:16:17 +02003219 } else {
Sean Christophersonca2f5462019-05-07 09:06:33 -07003220 /*
3221 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
3222 * force VM-Entry to fail.
3223 */
Yu Zhang64c78502021-09-30 01:51:53 +08003224 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08003225 }
3226 }
3227
3228 if (nested_cpu_has_posted_intr(vmcs12)) {
KarimAllah Ahmed3278e042019-01-31 21:24:38 +01003229 map = &vmx->nested.pi_desc_map;
3230
3231 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
3232 vmx->nested.pi_desc =
3233 (struct pi_desc *)(((void *)map->hva) +
3234 offset_in_page(vmcs12->posted_intr_desc_addr));
3235 vmcs_write64(POSTED_INTR_DESC_ADDR,
3236 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
Jim Mattson966eefb2021-06-04 10:26:06 -07003237 } else {
3238 /*
3239 * Defer the KVM_INTERNAL_EXIT until KVM tries to
3240 * access the contents of the VMCS12 posted interrupt
3241 * descriptor. (Note that KVM may do this when it
3242 * should not, per the architectural specification.)
3243 */
3244 vmx->nested.pi_desc = NULL;
3245 pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
Sean Christopherson55d23752018-12-03 13:53:18 -08003246 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003247 }
3248 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
Sean Christopherson2183f562019-05-07 12:17:56 -07003249 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
Sean Christopherson55d23752018-12-03 13:53:18 -08003250 else
Sean Christopherson2183f562019-05-07 12:17:56 -07003251 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003252
3253 return true;
3254}
3255
3256static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
3257{
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003258 if (!nested_get_evmcs_page(vcpu)) {
3259 pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
3260 __func__);
3261 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3262 vcpu->run->internal.suberror =
3263 KVM_INTERNAL_ERROR_EMULATION;
3264 vcpu->run->internal.ndata = 0;
3265
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003266 return false;
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003267 }
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003268
3269 if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
3270 return false;
3271
Jim Mattson671ddc72019-10-15 10:44:05 -07003272 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08003273}
3274
Sean Christopherson02f5fb22020-06-22 14:58:32 -07003275static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
3276{
3277 struct vmcs12 *vmcs12;
3278 struct vcpu_vmx *vmx = to_vmx(vcpu);
3279 gpa_t dst;
3280
3281 if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
3282 return 0;
3283
3284 if (WARN_ON_ONCE(vmx->nested.pml_full))
3285 return 1;
3286
3287 /*
3288 * Check if PML is enabled for the nested guest. Whether eptp bit 6 is
3289 * set is already checked as part of A/D emulation.
3290 */
3291 vmcs12 = get_vmcs12(vcpu);
3292 if (!nested_cpu_has_pml(vmcs12))
3293 return 0;
3294
3295 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
3296 vmx->nested.pml_full = true;
3297 return 1;
3298 }
3299
3300 gpa &= ~0xFFFull;
3301 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
3302
3303 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
3304 offset_in_page(dst), sizeof(gpa)))
3305 return 0;
3306
3307 vmcs12->guest_pml_index--;
3308
3309 return 0;
3310}
3311
Sean Christopherson55d23752018-12-03 13:53:18 -08003312/*
3313 * Intel's VMX Instruction Reference specifies a common set of prerequisites
3314 * for running VMX instructions (except VMXON, whose prerequisites are
3315 * slightly different). It also specifies what exception to inject otherwise.
3316 * Note that many of these exceptions have priority over VM exits, so they
3317 * don't have to be checked again here.
3318 */
3319static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
3320{
3321 if (!to_vmx(vcpu)->nested.vmxon) {
3322 kvm_queue_exception(vcpu, UD_VECTOR);
3323 return 0;
3324 }
3325
3326 if (vmx_get_cpl(vcpu)) {
3327 kvm_inject_gp(vcpu, 0);
3328 return 0;
3329 }
3330
3331 return 1;
3332}
3333
3334static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
3335{
3336 u8 rvi = vmx_get_rvi();
3337 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
3338
3339 return ((rvi & 0xf0) > (vppr & 0xf0));
3340}
3341
3342static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3343 struct vmcs12 *vmcs12);
3344
3345/*
3346 * If from_vmentry is false, this is being called from state restore (either RSM
3347 * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
Jim Mattson671ddc72019-10-15 10:44:05 -07003348 *
3349 * Returns:
Miaohe Lin463bfee2020-02-14 10:44:05 +08003350 * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
3351 * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
3352 * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
3353 * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
Sean Christopherson55d23752018-12-03 13:53:18 -08003354 */
Jim Mattson671ddc72019-10-15 10:44:05 -07003355enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
3356 bool from_vmentry)
Sean Christopherson55d23752018-12-03 13:53:18 -08003357{
3358 struct vcpu_vmx *vmx = to_vmx(vcpu);
3359 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson68cda402020-05-11 15:05:29 -07003360 enum vm_entry_failure_code entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003361 bool evaluate_pending_interrupts;
Sean Christopherson8e533242020-11-06 17:03:12 +08003362 union vmx_exit_reason exit_reason = {
3363 .basic = EXIT_REASON_INVALID_STATE,
3364 .failed_vmentry = 1,
3365 };
3366 u32 failed_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08003367
Sean Christopherson40e5f9082021-11-25 01:49:43 +00003368 kvm_service_local_tlb_flush_requests(vcpu);
Sean Christophersoneeeb4f62020-03-20 14:28:20 -07003369
Sean Christopherson2183f562019-05-07 12:17:56 -07003370 evaluate_pending_interrupts = exec_controls_get(vmx) &
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08003371 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08003372 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3373 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3374
3375 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3376 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3377 if (kvm_mpx_supported() &&
3378 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3379 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3380
Sean Christophersonf087a022019-06-07 11:55:34 -07003381 /*
3382 * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
3383 * nested early checks are disabled. In the event of a "late" VM-Fail,
3384 * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
3385 * software model to the pre-VMEntry host state. When EPT is disabled,
3386 * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
3387 * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
3388 * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
3389 * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
3390 * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
3391 * guaranteed to be overwritten with a shadow CR3 prior to re-entering
3392 * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
3393 * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
3394 * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
3395 * path would need to manually save/restore vmcs01.GUEST_CR3.
3396 */
3397 if (!enable_ept && !nested_early_check)
3398 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3399
Sean Christopherson55d23752018-12-03 13:53:18 -08003400 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3401
Sean Christopherson389ab252021-08-10 10:19:50 -07003402 prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08003403
3404 if (from_vmentry) {
Sean Christophersonb89d5ad2020-09-23 11:44:47 -07003405 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
3406 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson671ddc72019-10-15 10:44:05 -07003407 return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
Sean Christophersonb89d5ad2020-09-23 11:44:47 -07003408 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003409
3410 if (nested_vmx_check_vmentry_hw(vcpu)) {
3411 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson671ddc72019-10-15 10:44:05 -07003412 return NVMX_VMENTRY_VMFAIL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003413 }
3414
Sean Christopherson68cda402020-05-11 15:05:29 -07003415 if (nested_vmx_check_guest_state(vcpu, vmcs12,
3416 &entry_failure_code)) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003417 exit_reason.basic = EXIT_REASON_INVALID_STATE;
Sean Christopherson68cda402020-05-11 15:05:29 -07003418 vmcs12->exit_qualification = entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003419 goto vmentry_fail_vmexit;
Sean Christopherson68cda402020-05-11 15:05:29 -07003420 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003421 }
3422
3423 enter_guest_mode(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003424
Maxim Levitsky0f857222021-06-07 12:02:00 +03003425 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003426 exit_reason.basic = EXIT_REASON_INVALID_STATE;
Sean Christopherson68cda402020-05-11 15:05:29 -07003427 vmcs12->exit_qualification = entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003428 goto vmentry_fail_vmexit_guest_mode;
Sean Christopherson68cda402020-05-11 15:05:29 -07003429 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003430
3431 if (from_vmentry) {
Sean Christopherson68cda402020-05-11 15:05:29 -07003432 failed_index = nested_vmx_load_msr(vcpu,
3433 vmcs12->vm_entry_msr_load_addr,
3434 vmcs12->vm_entry_msr_load_count);
3435 if (failed_index) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003436 exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
Sean Christopherson68cda402020-05-11 15:05:29 -07003437 vmcs12->exit_qualification = failed_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08003438 goto vmentry_fail_vmexit_guest_mode;
Sean Christopherson68cda402020-05-11 15:05:29 -07003439 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003440 } else {
3441 /*
3442 * The MMU is not initialized to point at the right entities yet and
3443 * "get pages" would need to read data from the guest (i.e. we will
3444 * need to perform gpa to hpa translation). Request a call
3445 * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
3446 * have already been set at vmentry time and should not be reset.
3447 */
Paolo Bonzini729c15c2020-09-22 06:53:57 -04003448 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003449 }
3450
3451 /*
3452 * If L1 had a pending IRQ/NMI until it executed
3453 * VMLAUNCH/VMRESUME which wasn't delivered because it was
3454 * disallowed (e.g. interrupts disabled), L0 needs to
3455 * evaluate if this pending event should cause an exit from L2
3456 * to L1 or delivered directly to L2 (e.g. In case L1 don't
3457 * intercept EXTERNAL_INTERRUPT).
3458 *
3459 * Usually this would be handled by the processor noticing an
3460 * IRQ/NMI window request, or checking RVI during evaluation of
3461 * pending virtual interrupts. However, this setting was done
3462 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
3463 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
3464 */
3465 if (unlikely(evaluate_pending_interrupts))
3466 kvm_make_request(KVM_REQ_EVENT, vcpu);
3467
3468 /*
Paolo Bonzini359a6c32019-01-29 19:14:46 +01003469 * Do not start the preemption timer hrtimer until after we know
3470 * we are successful, so that only nested_vmx_vmexit needs to cancel
3471 * the timer.
3472 */
3473 vmx->nested.preemption_timer_expired = false;
Peter Shier850448f2020-05-26 14:51:06 -07003474 if (nested_cpu_has_preemption_timer(vmcs12)) {
3475 u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
3476 vmx_start_preemption_timer(vcpu, timer_value);
3477 }
Paolo Bonzini359a6c32019-01-29 19:14:46 +01003478
3479 /*
Sean Christopherson55d23752018-12-03 13:53:18 -08003480 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
3481 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
3482 * returned as far as L1 is concerned. It will only return (and set
3483 * the success flag) when L2 exits (see nested_vmx_vmexit()).
3484 */
Jim Mattson671ddc72019-10-15 10:44:05 -07003485 return NVMX_VMENTRY_SUCCESS;
Sean Christopherson55d23752018-12-03 13:53:18 -08003486
3487 /*
3488 * A failed consistency check that leads to a VMExit during L1's
3489 * VMEnter to L2 is a variation of a normal VMexit, as explained in
3490 * 26.7 "VM-entry failures during or after loading guest state".
3491 */
3492vmentry_fail_vmexit_guest_mode:
Xiaoyao Li5e3d3942019-12-06 16:45:26 +08003493 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
Sean Christopherson55d23752018-12-03 13:53:18 -08003494 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3495 leave_guest_mode(vcpu);
3496
3497vmentry_fail_vmexit:
3498 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3499
3500 if (!from_vmentry)
Jim Mattson671ddc72019-10-15 10:44:05 -07003501 return NVMX_VMENTRY_VMEXIT;
Sean Christopherson55d23752018-12-03 13:53:18 -08003502
3503 load_vmcs12_host_state(vcpu, vmcs12);
Sean Christopherson8e533242020-11-06 17:03:12 +08003504 vmcs12->vm_exit_reason = exit_reason.full;
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003505 if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson3731905ef2019-05-07 08:36:27 -07003506 vmx->nested.need_vmcs12_to_shadow_sync = true;
Jim Mattson671ddc72019-10-15 10:44:05 -07003507 return NVMX_VMENTRY_VMEXIT;
Sean Christopherson55d23752018-12-03 13:53:18 -08003508}
3509
3510/*
3511 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
3512 * for running an L2 nested guest.
3513 */
3514static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3515{
3516 struct vmcs12 *vmcs12;
Jim Mattson671ddc72019-10-15 10:44:05 -07003517 enum nvmx_vmentry_status status;
Sean Christopherson55d23752018-12-03 13:53:18 -08003518 struct vcpu_vmx *vmx = to_vmx(vcpu);
3519 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003520 enum nested_evmptrld_status evmptrld_status;
Sean Christopherson55d23752018-12-03 13:53:18 -08003521
3522 if (!nested_vmx_check_permission(vcpu))
3523 return 1;
3524
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003525 evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
3526 if (evmptrld_status == EVMPTRLD_ERROR) {
3527 kvm_queue_exception(vcpu, UD_VECTOR);
Sean Christopherson55d23752018-12-03 13:53:18 -08003528 return 1;
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003529 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003530
Eric Hankland018d70f2021-11-30 15:42:21 +08003531 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
3532
3533 if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
3534 return nested_vmx_failInvalid(vcpu);
3535
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003536 if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
Yu Zhang64c78502021-09-30 01:51:53 +08003537 vmx->nested.current_vmptr == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08003538 return nested_vmx_failInvalid(vcpu);
3539
3540 vmcs12 = get_vmcs12(vcpu);
3541
3542 /*
3543 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
3544 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
3545 * rather than RFLAGS.ZF, and no error number is stored to the
3546 * VM-instruction error field.
3547 */
Sean Christophersonfc595f32020-08-12 11:06:15 -07003548 if (CC(vmcs12->hdr.shadow_vmcs))
Sean Christopherson55d23752018-12-03 13:53:18 -08003549 return nested_vmx_failInvalid(vcpu);
3550
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003551 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02003552 copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
Sean Christopherson55d23752018-12-03 13:53:18 -08003553 /* Enlightened VMCS doesn't have launch state */
3554 vmcs12->launch_state = !launch;
3555 } else if (enable_shadow_vmcs) {
3556 copy_shadow_to_vmcs12(vmx);
3557 }
3558
3559 /*
3560 * The nested entry process starts with enforcing various prerequisites
3561 * on vmcs12 as required by the Intel SDM, and act appropriately when
3562 * they fail: As the SDM explains, some conditions should cause the
3563 * instruction to fail, while others will cause the instruction to seem
3564 * to succeed, but return an EXIT_REASON_INVALID_STATE.
3565 * To speed up the normal (success) code path, we should avoid checking
3566 * for misconfigurations which will anyway be caught by the processor
3567 * when using the merged vmcs02.
3568 */
Sean Christophersonfc595f32020-08-12 11:06:15 -07003569 if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003570 return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
Sean Christopherson55d23752018-12-03 13:53:18 -08003571
Sean Christophersonfc595f32020-08-12 11:06:15 -07003572 if (CC(vmcs12->launch_state == launch))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003573 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08003574 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3575 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3576
Paolo Bonzini98d9e852019-04-12 10:19:57 +02003577 if (nested_vmx_check_controls(vcpu, vmcs12))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003578 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson5478ba32019-04-11 12:18:06 -07003579
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02003580 if (nested_vmx_check_address_space_size(vcpu, vmcs12))
3581 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3582
Paolo Bonzini98d9e852019-04-12 10:19:57 +02003583 if (nested_vmx_check_host_state(vcpu, vmcs12))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003584 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003585
3586 /*
3587 * We're finally done with prerequisite checking, and can start with
3588 * the nested entry.
3589 */
3590 vmx->nested.nested_run_pending = 1;
Peter Shier850448f2020-05-26 14:51:06 -07003591 vmx->nested.has_preemption_timer_deadline = false;
Jim Mattson671ddc72019-10-15 10:44:05 -07003592 status = nested_vmx_enter_non_root_mode(vcpu, true);
3593 if (unlikely(status != NVMX_VMENTRY_SUCCESS))
3594 goto vmentry_failed;
Sean Christopherson55d23752018-12-03 13:53:18 -08003595
Sean Christopherson25bb2cf2020-08-12 10:51:29 -07003596 /* Emulate processing of posted interrupts on VM-Enter. */
3597 if (nested_cpu_has_posted_intr(vmcs12) &&
3598 kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
3599 vmx->nested.pi_pending = true;
3600 kvm_make_request(KVM_REQ_EVENT, vcpu);
3601 kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
3602 }
3603
Sean Christopherson55d23752018-12-03 13:53:18 -08003604 /* Hide L1D cache contents from the nested guest. */
3605 vmx->vcpu.arch.l1tf_flush_l1d = true;
3606
3607 /*
3608 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
3609 * also be used as part of restoring nVMX state for
3610 * snapshot restore (migration).
3611 *
3612 * In this flow, it is assumed that vmcs12 cache was
Ingo Molnar163b0992021-03-21 22:28:53 +01003613 * transferred as part of captured nVMX state and should
Sean Christopherson55d23752018-12-03 13:53:18 -08003614 * therefore not be read from guest memory (which may not
3615 * exist on destination host yet).
3616 */
3617 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3618
Yadong Qibf0cd882020-11-06 14:51:22 +08003619 switch (vmcs12->guest_activity_state) {
3620 case GUEST_ACTIVITY_HLT:
3621 /*
3622 * If we're entering a halted L2 vcpu and the L2 vcpu won't be
3623 * awakened by event injection or by an NMI-window VM-exit or
3624 * by an interrupt-window VM-exit, halt the vcpu.
3625 */
3626 if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3627 !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
3628 !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
3629 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3630 vmx->nested.nested_run_pending = 0;
Sean Christopherson14601792021-10-08 19:12:05 -07003631 return kvm_emulate_halt_noskip(vcpu);
Yadong Qibf0cd882020-11-06 14:51:22 +08003632 }
3633 break;
3634 case GUEST_ACTIVITY_WAIT_SIPI:
Sean Christopherson55d23752018-12-03 13:53:18 -08003635 vmx->nested.nested_run_pending = 0;
Yadong Qibf0cd882020-11-06 14:51:22 +08003636 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3637 break;
3638 default:
3639 break;
Sean Christopherson55d23752018-12-03 13:53:18 -08003640 }
Yadong Qibf0cd882020-11-06 14:51:22 +08003641
Sean Christopherson55d23752018-12-03 13:53:18 -08003642 return 1;
Jim Mattson671ddc72019-10-15 10:44:05 -07003643
3644vmentry_failed:
3645 vmx->nested.nested_run_pending = 0;
3646 if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
3647 return 0;
3648 if (status == NVMX_VMENTRY_VMEXIT)
3649 return 1;
3650 WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
Sean Christophersonb2656e42020-06-08 18:56:07 -07003651 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003652}
3653
3654/*
3655 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
Miaohe Lin67b0ae42019-12-11 14:26:22 +08003656 * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
Sean Christopherson55d23752018-12-03 13:53:18 -08003657 * This function returns the new value we should put in vmcs12.guest_cr0.
3658 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
3659 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
3660 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
3661 * didn't trap the bit, because if L1 did, so would L0).
3662 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
3663 * been modified by L2, and L1 knows it. So just leave the old value of
3664 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
3665 * isn't relevant, because if L0 traps this bit it can set it to anything.
3666 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
3667 * changed these bits, and therefore they need to be updated, but L0
3668 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
3669 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
3670 */
3671static inline unsigned long
3672vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3673{
3674 return
3675 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3676 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3677 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3678 vcpu->arch.cr0_guest_owned_bits));
3679}
3680
3681static inline unsigned long
3682vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3683{
3684 return
3685 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3686 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3687 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3688 vcpu->arch.cr4_guest_owned_bits));
3689}
3690
3691static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3692 struct vmcs12 *vmcs12)
3693{
3694 u32 idt_vectoring;
3695 unsigned int nr;
3696
3697 if (vcpu->arch.exception.injected) {
3698 nr = vcpu->arch.exception.nr;
3699 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3700
3701 if (kvm_exception_is_soft(nr)) {
3702 vmcs12->vm_exit_instruction_len =
3703 vcpu->arch.event_exit_inst_len;
3704 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3705 } else
3706 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3707
3708 if (vcpu->arch.exception.has_error_code) {
3709 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3710 vmcs12->idt_vectoring_error_code =
3711 vcpu->arch.exception.error_code;
3712 }
3713
3714 vmcs12->idt_vectoring_info_field = idt_vectoring;
3715 } else if (vcpu->arch.nmi_injected) {
3716 vmcs12->idt_vectoring_info_field =
3717 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3718 } else if (vcpu->arch.interrupt.injected) {
3719 nr = vcpu->arch.interrupt.nr;
3720 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3721
3722 if (vcpu->arch.interrupt.soft) {
3723 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3724 vmcs12->vm_entry_instruction_len =
3725 vcpu->arch.event_exit_inst_len;
3726 } else
3727 idt_vectoring |= INTR_TYPE_EXT_INTR;
3728
3729 vmcs12->idt_vectoring_info_field = idt_vectoring;
3730 }
3731}
3732
3733
Paolo Bonzini96b100c2020-03-17 18:32:50 +01003734void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003735{
3736 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3737 gfn_t gfn;
3738
3739 /*
3740 * Don't need to mark the APIC access page dirty; it is never
3741 * written to by the CPU during APIC virtualization.
3742 */
3743
3744 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3745 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3746 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3747 }
3748
3749 if (nested_cpu_has_posted_intr(vmcs12)) {
3750 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3751 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3752 }
3753}
3754
Jim Mattson650293c2021-06-04 10:26:02 -07003755static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003756{
3757 struct vcpu_vmx *vmx = to_vmx(vcpu);
3758 int max_irr;
3759 void *vapic_page;
3760 u16 status;
3761
Jim Mattson966eefb2021-06-04 10:26:06 -07003762 if (!vmx->nested.pi_pending)
Jim Mattson650293c2021-06-04 10:26:02 -07003763 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08003764
Jim Mattson966eefb2021-06-04 10:26:06 -07003765 if (!vmx->nested.pi_desc)
3766 goto mmio_needed;
3767
Sean Christopherson55d23752018-12-03 13:53:18 -08003768 vmx->nested.pi_pending = false;
Jim Mattson966eefb2021-06-04 10:26:06 -07003769
Sean Christopherson55d23752018-12-03 13:53:18 -08003770 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
Jim Mattson650293c2021-06-04 10:26:02 -07003771 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08003772
3773 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3774 if (max_irr != 256) {
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003775 vapic_page = vmx->nested.virtual_apic_map.hva;
3776 if (!vapic_page)
Jim Mattson0fe998b2021-06-04 10:26:05 -07003777 goto mmio_needed;
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003778
Sean Christopherson55d23752018-12-03 13:53:18 -08003779 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3780 vapic_page, &max_irr);
Sean Christopherson55d23752018-12-03 13:53:18 -08003781 status = vmcs_read16(GUEST_INTR_STATUS);
3782 if ((u8)max_irr > ((u8)status & 0xff)) {
3783 status &= ~0xff;
3784 status |= (u8)max_irr;
3785 vmcs_write16(GUEST_INTR_STATUS, status);
3786 }
3787 }
3788
3789 nested_mark_vmcs12_pages_dirty(vcpu);
Jim Mattson650293c2021-06-04 10:26:02 -07003790 return 0;
Jim Mattson0fe998b2021-06-04 10:26:05 -07003791
3792mmio_needed:
3793 kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
3794 return -ENXIO;
Sean Christopherson55d23752018-12-03 13:53:18 -08003795}
3796
3797static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3798 unsigned long exit_qual)
3799{
3800 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3801 unsigned int nr = vcpu->arch.exception.nr;
3802 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3803
3804 if (vcpu->arch.exception.has_error_code) {
3805 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3806 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3807 }
3808
3809 if (kvm_exception_is_soft(nr))
3810 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3811 else
3812 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3813
3814 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3815 vmx_get_nmi_mask(vcpu))
3816 intr_info |= INTR_INFO_UNBLOCK_NMI;
3817
3818 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3819}
3820
Oliver Upton684c0422020-02-07 02:36:05 -08003821/*
3822 * Returns true if a debug trap is pending delivery.
3823 *
3824 * In KVM, debug traps bear an exception payload. As such, the class of a #DB
3825 * exception may be inferred from the presence of an exception payload.
3826 */
3827static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
3828{
3829 return vcpu->arch.exception.pending &&
3830 vcpu->arch.exception.nr == DB_VECTOR &&
3831 vcpu->arch.exception.payload;
3832}
3833
3834/*
3835 * Certain VM-exits set the 'pending debug exceptions' field to indicate a
3836 * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
3837 * represents these debug traps with a payload that is said to be compatible
3838 * with the 'pending debug exceptions' field, write the payload to the VMCS
3839 * field if a VM-exit is delivered before the debug trap.
3840 */
3841static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
3842{
3843 if (vmx_pending_dbg_trap(vcpu))
3844 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
3845 vcpu->arch.exception.payload);
3846}
3847
Sean Christophersond2060bd2020-04-22 19:25:39 -07003848static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
3849{
3850 return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3851 to_vmx(vcpu)->nested.preemption_timer_expired;
3852}
3853
Sean Christophersona1c77ab2020-03-02 22:27:35 -08003854static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003855{
3856 struct vcpu_vmx *vmx = to_vmx(vcpu);
3857 unsigned long exit_qual;
3858 bool block_nested_events =
3859 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003860 bool mtf_pending = vmx->nested.mtf_pending;
Liran Alon4b9852f2019-08-26 13:24:49 +03003861 struct kvm_lapic *apic = vcpu->arch.apic;
3862
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003863 /*
3864 * Clear the MTF state. If a higher priority VM-exit is delivered first,
3865 * this state is discarded.
3866 */
Oliver Upton5c8beb42020-04-06 20:12:37 +00003867 if (!block_nested_events)
3868 vmx->nested.mtf_pending = false;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003869
Liran Alon4b9852f2019-08-26 13:24:49 +03003870 if (lapic_in_kernel(vcpu) &&
3871 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
3872 if (block_nested_events)
3873 return -EBUSY;
Oliver Upton684c0422020-02-07 02:36:05 -08003874 nested_vmx_update_pending_dbg(vcpu);
Liran Alone64a8502019-11-11 14:16:05 +02003875 clear_bit(KVM_APIC_INIT, &apic->pending_events);
Yadong Qibf0cd882020-11-06 14:51:22 +08003876 if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
3877 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
3878 return 0;
3879 }
3880
3881 if (lapic_in_kernel(vcpu) &&
3882 test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3883 if (block_nested_events)
3884 return -EBUSY;
3885
3886 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3887 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3888 nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
3889 apic->sipi_vector & 0xFFUL);
Liran Alon4b9852f2019-08-26 13:24:49 +03003890 return 0;
3891 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003892
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003893 /*
3894 * Process any exceptions that are not debug traps before MTF.
Maxim Levitsky4020da32021-04-01 17:38:14 +03003895 *
3896 * Note that only a pending nested run can block a pending exception.
3897 * Otherwise an injected NMI/interrupt should either be
3898 * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
3899 * while delivering the pending exception.
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003900 */
Maxim Levitsky4020da32021-04-01 17:38:14 +03003901
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003902 if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
Maxim Levitsky4020da32021-04-01 17:38:14 +03003903 if (vmx->nested.nested_run_pending)
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003904 return -EBUSY;
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003905 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3906 goto no_vmexit;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003907 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3908 return 0;
3909 }
3910
3911 if (mtf_pending) {
3912 if (block_nested_events)
3913 return -EBUSY;
3914 nested_vmx_update_pending_dbg(vcpu);
3915 nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
3916 return 0;
3917 }
3918
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003919 if (vcpu->arch.exception.pending) {
Maxim Levitsky4020da32021-04-01 17:38:14 +03003920 if (vmx->nested.nested_run_pending)
Sean Christopherson55d23752018-12-03 13:53:18 -08003921 return -EBUSY;
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003922 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3923 goto no_vmexit;
Sean Christopherson55d23752018-12-03 13:53:18 -08003924 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3925 return 0;
3926 }
3927
Sean Christophersond2060bd2020-04-22 19:25:39 -07003928 if (nested_vmx_preemption_timer_pending(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003929 if (block_nested_events)
3930 return -EBUSY;
3931 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3932 return 0;
3933 }
3934
Sean Christopherson1cd2f0b2020-04-22 19:25:46 -07003935 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
3936 if (block_nested_events)
3937 return -EBUSY;
3938 goto no_vmexit;
3939 }
3940
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003941 if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003942 if (block_nested_events)
3943 return -EBUSY;
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003944 if (!nested_exit_on_nmi(vcpu))
3945 goto no_vmexit;
3946
Sean Christopherson55d23752018-12-03 13:53:18 -08003947 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3948 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3949 INTR_INFO_VALID_MASK, 0);
3950 /*
3951 * The NMI-triggered VM exit counts as injection:
3952 * clear this one and block further NMIs.
3953 */
3954 vcpu->arch.nmi_pending = 0;
3955 vmx_set_nmi_mask(vcpu, true);
3956 return 0;
3957 }
3958
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003959 if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003960 if (block_nested_events)
3961 return -EBUSY;
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003962 if (!nested_exit_on_intr(vcpu))
3963 goto no_vmexit;
Sean Christopherson55d23752018-12-03 13:53:18 -08003964 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3965 return 0;
3966 }
3967
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003968no_vmexit:
Jim Mattson650293c2021-06-04 10:26:02 -07003969 return vmx_complete_nested_posted_interrupt(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003970}
3971
3972static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3973{
3974 ktime_t remaining =
3975 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3976 u64 value;
3977
3978 if (ktime_to_ns(remaining) <= 0)
3979 return 0;
3980
3981 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3982 do_div(value, 1000000);
3983 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3984}
3985
Sean Christopherson7952d762019-05-07 08:36:29 -07003986static bool is_vmcs12_ext_field(unsigned long field)
Sean Christopherson55d23752018-12-03 13:53:18 -08003987{
Sean Christopherson7952d762019-05-07 08:36:29 -07003988 switch (field) {
3989 case GUEST_ES_SELECTOR:
3990 case GUEST_CS_SELECTOR:
3991 case GUEST_SS_SELECTOR:
3992 case GUEST_DS_SELECTOR:
3993 case GUEST_FS_SELECTOR:
3994 case GUEST_GS_SELECTOR:
3995 case GUEST_LDTR_SELECTOR:
3996 case GUEST_TR_SELECTOR:
3997 case GUEST_ES_LIMIT:
3998 case GUEST_CS_LIMIT:
3999 case GUEST_SS_LIMIT:
4000 case GUEST_DS_LIMIT:
4001 case GUEST_FS_LIMIT:
4002 case GUEST_GS_LIMIT:
4003 case GUEST_LDTR_LIMIT:
4004 case GUEST_TR_LIMIT:
4005 case GUEST_GDTR_LIMIT:
4006 case GUEST_IDTR_LIMIT:
4007 case GUEST_ES_AR_BYTES:
4008 case GUEST_DS_AR_BYTES:
4009 case GUEST_FS_AR_BYTES:
4010 case GUEST_GS_AR_BYTES:
4011 case GUEST_LDTR_AR_BYTES:
4012 case GUEST_TR_AR_BYTES:
4013 case GUEST_ES_BASE:
4014 case GUEST_CS_BASE:
4015 case GUEST_SS_BASE:
4016 case GUEST_DS_BASE:
4017 case GUEST_FS_BASE:
4018 case GUEST_GS_BASE:
4019 case GUEST_LDTR_BASE:
4020 case GUEST_TR_BASE:
4021 case GUEST_GDTR_BASE:
4022 case GUEST_IDTR_BASE:
4023 case GUEST_PENDING_DBG_EXCEPTIONS:
4024 case GUEST_BNDCFGS:
4025 return true;
4026 default:
4027 break;
4028 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004029
Sean Christopherson7952d762019-05-07 08:36:29 -07004030 return false;
4031}
4032
4033static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4034 struct vmcs12 *vmcs12)
4035{
4036 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004037
4038 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
4039 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
4040 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
4041 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
4042 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
4043 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
4044 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
4045 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
4046 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
4047 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
4048 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
4049 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
4050 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
4051 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
4052 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
4053 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
4054 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
4055 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
4056 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004057 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
4058 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
4059 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
4060 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
4061 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
4062 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
4063 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
4064 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
4065 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
4066 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
4067 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
4068 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
4069 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
4070 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
4071 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
Sean Christopherson7952d762019-05-07 08:36:29 -07004072 vmcs12->guest_pending_dbg_exceptions =
4073 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
4074 if (kvm_mpx_supported())
4075 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
4076
4077 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
4078}
4079
4080static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4081 struct vmcs12 *vmcs12)
4082{
4083 struct vcpu_vmx *vmx = to_vmx(vcpu);
4084 int cpu;
4085
4086 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
4087 return;
4088
4089
4090 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
4091
4092 cpu = get_cpu();
4093 vmx->loaded_vmcs = &vmx->nested.vmcs02;
Sean Christopherson1af1bb02020-05-06 16:58:50 -07004094 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
Sean Christopherson7952d762019-05-07 08:36:29 -07004095
4096 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4097
4098 vmx->loaded_vmcs = &vmx->vmcs01;
Sean Christopherson1af1bb02020-05-06 16:58:50 -07004099 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
Sean Christopherson7952d762019-05-07 08:36:29 -07004100 put_cpu();
4101}
4102
4103/*
4104 * Update the guest state fields of vmcs12 to reflect changes that
4105 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
4106 * VM-entry controls is also updated, since this is really a guest
4107 * state bit.)
4108 */
4109static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4110{
4111 struct vcpu_vmx *vmx = to_vmx(vcpu);
4112
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004113 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson7952d762019-05-07 08:36:29 -07004114 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4115
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004116 vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
4117 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
Sean Christopherson7952d762019-05-07 08:36:29 -07004118
4119 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
4120 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
4121
4122 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
4123 vmcs12->guest_rip = kvm_rip_read(vcpu);
4124 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
4125
4126 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
4127 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004128
4129 vmcs12->guest_interruptibility_info =
4130 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
Sean Christopherson7952d762019-05-07 08:36:29 -07004131
Sean Christopherson55d23752018-12-03 13:53:18 -08004132 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
4133 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
Yadong Qibf0cd882020-11-06 14:51:22 +08004134 else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4135 vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
Sean Christopherson55d23752018-12-03 13:53:18 -08004136 else
4137 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
4138
Paolo Bonzinib4b65b52019-01-29 19:12:35 +01004139 if (nested_cpu_has_preemption_timer(vmcs12) &&
Peter Shier850448f2020-05-26 14:51:06 -07004140 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
4141 !vmx->nested.nested_run_pending)
4142 vmcs12->vmx_preemption_timer_value =
4143 vmx_get_preemption_timer_value(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004144
4145 /*
4146 * In some cases (usually, nested EPT), L2 is allowed to change its
4147 * own CR3 without exiting. If it has changed it, we must keep it.
4148 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
4149 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
4150 *
4151 * Additionally, restore L2's PDPTR to vmcs12.
4152 */
4153 if (enable_ept) {
4154 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Sean Christophersonc7554efc2019-05-07 09:06:40 -07004155 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
4156 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
4157 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
4158 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
4159 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
4160 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004161 }
4162
4163 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
4164
4165 if (nested_cpu_has_vid(vmcs12))
4166 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
4167
4168 vmcs12->vm_entry_controls =
4169 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
4170 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
4171
Sean Christopherson699a1ac2019-05-07 09:06:37 -07004172 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
Sean Christopherson55d23752018-12-03 13:53:18 -08004173 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
Sean Christopherson55d23752018-12-03 13:53:18 -08004174
Sean Christopherson55d23752018-12-03 13:53:18 -08004175 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
4176 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Sean Christopherson55d23752018-12-03 13:53:18 -08004177}
4178
4179/*
4180 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
4181 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
4182 * and this function updates it to reflect the changes to the guest state while
4183 * L2 was running (and perhaps made some exits which were handled directly by L0
4184 * without going back to L1), and to reflect the exit reason.
4185 * Note that we do not have to copy here all VMCS fields, just those that
4186 * could have changed by the L2 guest or the exit - i.e., the guest-state and
4187 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
4188 * which already writes to vmcs12 directly.
4189 */
4190static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004191 u32 vm_exit_reason, u32 exit_intr_info,
Sean Christopherson55d23752018-12-03 13:53:18 -08004192 unsigned long exit_qualification)
4193{
Sean Christopherson55d23752018-12-03 13:53:18 -08004194 /* update exit information fields: */
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004195 vmcs12->vm_exit_reason = vm_exit_reason;
Sean Christopherson3c0c2ad2021-04-12 16:21:37 +12004196 if (to_vmx(vcpu)->exit_reason.enclave_mode)
4197 vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
Sean Christopherson55d23752018-12-03 13:53:18 -08004198 vmcs12->exit_qualification = exit_qualification;
4199 vmcs12->vm_exit_intr_info = exit_intr_info;
4200
4201 vmcs12->idt_vectoring_info_field = 0;
4202 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4203 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4204
4205 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
4206 vmcs12->launch_state = 1;
4207
4208 /* vm_entry_intr_info_field is cleared on exit. Emulate this
4209 * instead of reading the real value. */
4210 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
4211
4212 /*
4213 * Transfer the event that L0 or L1 may wanted to inject into
4214 * L2 to IDT_VECTORING_INFO_FIELD.
4215 */
4216 vmcs12_save_pending_event(vcpu, vmcs12);
Krish Sadhukhana0d4f802018-12-04 19:00:13 -05004217
4218 /*
4219 * According to spec, there's no need to store the guest's
4220 * MSRs if the exit is due to a VM-entry failure that occurs
4221 * during or after loading the guest state. Since this exit
4222 * does not fall in that category, we need to save the MSRs.
4223 */
4224 if (nested_vmx_store_msr(vcpu,
4225 vmcs12->vm_exit_msr_store_addr,
4226 vmcs12->vm_exit_msr_store_count))
4227 nested_vmx_abort(vcpu,
4228 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Sean Christopherson55d23752018-12-03 13:53:18 -08004229 }
4230
4231 /*
4232 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
4233 * preserved above and would only end up incorrectly in L1.
4234 */
4235 vcpu->arch.nmi_injected = false;
4236 kvm_clear_exception_queue(vcpu);
4237 kvm_clear_interrupt_queue(vcpu);
4238}
4239
4240/*
4241 * A part of what we need to when the nested L2 guest exits and we want to
4242 * run its L1 parent, is to reset L1's guest state to the host state specified
4243 * in vmcs12.
4244 * This function is to be called not only on normal nested exit, but also on
4245 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
4246 * Failures During or After Loading Guest State").
4247 * This function should be called when the active VMCS is L1's (vmcs01).
4248 */
4249static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4250 struct vmcs12 *vmcs12)
4251{
Sean Christopherson68cda402020-05-11 15:05:29 -07004252 enum vm_entry_failure_code ignored;
Sean Christopherson55d23752018-12-03 13:53:18 -08004253 struct kvm_segment seg;
Sean Christopherson55d23752018-12-03 13:53:18 -08004254
4255 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
4256 vcpu->arch.efer = vmcs12->host_ia32_efer;
4257 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4258 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
4259 else
4260 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
4261 vmx_set_efer(vcpu, vcpu->arch.efer);
4262
Paolo Bonzinie9c16c72019-04-30 22:07:26 +02004263 kvm_rsp_write(vcpu, vmcs12->host_rsp);
4264 kvm_rip_write(vcpu, vmcs12->host_rip);
Sean Christopherson55d23752018-12-03 13:53:18 -08004265 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
4266 vmx_set_interrupt_shadow(vcpu, 0);
4267
4268 /*
4269 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
4270 * actually changed, because vmx_set_cr0 refers to efer set above.
4271 *
4272 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
4273 * (KVM doesn't change it);
4274 */
Sean Christophersonfa71e952020-07-02 21:04:22 -07004275 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
Sean Christopherson55d23752018-12-03 13:53:18 -08004276 vmx_set_cr0(vcpu, vmcs12->host_cr0);
4277
4278 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
4279 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4280 vmx_set_cr4(vcpu, vmcs12->host_cr4);
4281
4282 nested_ept_uninit_mmu_context(vcpu);
4283
4284 /*
4285 * Only PDPTE load can fail as the value of cr3 was checked on entry and
4286 * couldn't have changed.
4287 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03004288 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
Sean Christopherson55d23752018-12-03 13:53:18 -08004289 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
4290
Sean Christopherson50b265a2020-03-20 14:28:19 -07004291 nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
Sean Christopherson55d23752018-12-03 13:53:18 -08004292
4293 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
4294 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
4295 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
4296 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
4297 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
4298 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
4299 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
4300
4301 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
4302 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
4303 vmcs_write64(GUEST_BNDCFGS, 0);
4304
4305 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
4306 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
4307 vcpu->arch.pat = vmcs12->host_ia32_pat;
4308 }
4309 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Oliver Uptond1968422019-12-13 16:33:58 -08004310 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
4311 vmcs12->host_ia32_perf_global_ctrl));
Sean Christopherson55d23752018-12-03 13:53:18 -08004312
4313 /* Set L1 segment info according to Intel SDM
4314 27.5.2 Loading Host Segment and Descriptor-Table Registers */
4315 seg = (struct kvm_segment) {
4316 .base = 0,
4317 .limit = 0xFFFFFFFF,
4318 .selector = vmcs12->host_cs_selector,
4319 .type = 11,
4320 .present = 1,
4321 .s = 1,
4322 .g = 1
4323 };
4324 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4325 seg.l = 1;
4326 else
4327 seg.db = 1;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004328 __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004329 seg = (struct kvm_segment) {
4330 .base = 0,
4331 .limit = 0xFFFFFFFF,
4332 .type = 3,
4333 .present = 1,
4334 .s = 1,
4335 .db = 1,
4336 .g = 1
4337 };
4338 seg.selector = vmcs12->host_ds_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004339 __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004340 seg.selector = vmcs12->host_es_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004341 __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004342 seg.selector = vmcs12->host_ss_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004343 __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004344 seg.selector = vmcs12->host_fs_selector;
4345 seg.base = vmcs12->host_fs_base;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004346 __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004347 seg.selector = vmcs12->host_gs_selector;
4348 seg.base = vmcs12->host_gs_base;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004349 __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004350 seg = (struct kvm_segment) {
4351 .base = vmcs12->host_tr_base,
4352 .limit = 0x67,
4353 .selector = vmcs12->host_tr_selector,
4354 .type = 11,
4355 .present = 1
4356 };
Sean Christopherson816be9e2021-07-13 09:33:07 -07004357 __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
Sean Christopherson55d23752018-12-03 13:53:18 -08004358
Sean Christophersonafc8de02021-07-13 09:32:40 -07004359 memset(&seg, 0, sizeof(seg));
4360 seg.unusable = 1;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004361 __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
Sean Christopherson55d23752018-12-03 13:53:18 -08004362
4363 kvm_set_dr(vcpu, 7, 0x400);
4364 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4365
Sean Christopherson55d23752018-12-03 13:53:18 -08004366 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4367 vmcs12->vm_exit_msr_load_count))
4368 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Maxim Levitskydbab6102021-09-13 17:09:54 +03004369
4370 to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004371}
4372
4373static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
4374{
Sean Christophersoneb3db1b2020-09-23 11:03:58 -07004375 struct vmx_uret_msr *efer_msr;
Sean Christopherson55d23752018-12-03 13:53:18 -08004376 unsigned int i;
4377
4378 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
4379 return vmcs_read64(GUEST_IA32_EFER);
4380
4381 if (cpu_has_load_ia32_efer())
4382 return host_efer;
4383
4384 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
4385 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
4386 return vmx->msr_autoload.guest.val[i].value;
4387 }
4388
Sean Christophersond85a8032020-09-23 11:04:06 -07004389 efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
Sean Christopherson55d23752018-12-03 13:53:18 -08004390 if (efer_msr)
4391 return efer_msr->data;
4392
4393 return host_efer;
4394}
4395
4396static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
4397{
4398 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4399 struct vcpu_vmx *vmx = to_vmx(vcpu);
4400 struct vmx_msr_entry g, h;
Sean Christopherson55d23752018-12-03 13:53:18 -08004401 gpa_t gpa;
4402 u32 i, j;
4403
4404 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
4405
4406 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
4407 /*
4408 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
4409 * as vmcs01.GUEST_DR7 contains a userspace defined value
4410 * and vcpu->arch.dr7 is not squirreled away before the
4411 * nested VMENTER (not worth adding a variable in nested_vmx).
4412 */
4413 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
4414 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
4415 else
4416 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
4417 }
4418
4419 /*
4420 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
4421 * handle a variety of side effects to KVM's software model.
4422 */
4423 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
4424
Sean Christophersonfa71e952020-07-02 21:04:22 -07004425 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
Sean Christopherson55d23752018-12-03 13:53:18 -08004426 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
4427
4428 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4429 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
4430
4431 nested_ept_uninit_mmu_context(vcpu);
Sean Christophersonf087a022019-06-07 11:55:34 -07004432 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
Sean Christophersoncb3c1e22019-09-27 14:45:22 -07004433 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
Sean Christopherson55d23752018-12-03 13:53:18 -08004434
4435 /*
4436 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
4437 * from vmcs01 (if necessary). The PDPTRs are not loaded on
4438 * VMFail, like everything else we just need to ensure our
4439 * software model is up-to-date.
4440 */
Sean Christopherson9932b492020-04-15 13:34:50 -07004441 if (enable_ept && is_pae_paging(vcpu))
Sean Christophersonf087a022019-06-07 11:55:34 -07004442 ept_save_pdptrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004443
4444 kvm_mmu_reset_context(vcpu);
4445
Sean Christopherson55d23752018-12-03 13:53:18 -08004446 /*
4447 * This nasty bit of open coding is a compromise between blindly
4448 * loading L1's MSRs using the exit load lists (incorrect emulation
4449 * of VMFail), leaving the nested VM's MSRs in the software model
4450 * (incorrect behavior) and snapshotting the modified MSRs (too
4451 * expensive since the lists are unbound by hardware). For each
4452 * MSR that was (prematurely) loaded from the nested VMEntry load
4453 * list, reload it from the exit load list if it exists and differs
4454 * from the guest value. The intent is to stuff host state as
4455 * silently as possible, not to fully process the exit load list.
4456 */
Sean Christopherson55d23752018-12-03 13:53:18 -08004457 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
4458 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
4459 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
4460 pr_debug_ratelimited(
4461 "%s read MSR index failed (%u, 0x%08llx)\n",
4462 __func__, i, gpa);
4463 goto vmabort;
4464 }
4465
4466 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
4467 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
4468 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
4469 pr_debug_ratelimited(
4470 "%s read MSR failed (%u, 0x%08llx)\n",
4471 __func__, j, gpa);
4472 goto vmabort;
4473 }
4474 if (h.index != g.index)
4475 continue;
4476 if (h.value == g.value)
4477 break;
4478
4479 if (nested_vmx_load_msr_check(vcpu, &h)) {
4480 pr_debug_ratelimited(
4481 "%s check failed (%u, 0x%x, 0x%x)\n",
4482 __func__, j, h.index, h.reserved);
4483 goto vmabort;
4484 }
4485
Sean Christophersonf20935d2019-09-05 14:22:54 -07004486 if (kvm_set_msr(vcpu, h.index, h.value)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08004487 pr_debug_ratelimited(
4488 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
4489 __func__, j, h.index, h.value);
4490 goto vmabort;
4491 }
4492 }
4493 }
4494
4495 return;
4496
4497vmabort:
4498 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4499}
4500
4501/*
4502 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
4503 * and modify vmcs12 to make it see what it would expect to see there if
4504 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
4505 */
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004506void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
Sean Christopherson55d23752018-12-03 13:53:18 -08004507 u32 exit_intr_info, unsigned long exit_qualification)
4508{
4509 struct vcpu_vmx *vmx = to_vmx(vcpu);
4510 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4511
4512 /* trying to cancel vmlaunch/vmresume is a bug */
4513 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4514
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08004515 /* Similarly, triple faults in L2 should never escape. */
4516 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
4517
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02004518 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
4519 /*
4520 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
4521 * Enlightened VMCS after migration and we still need to
4522 * do that when something is forcing L2->L1 exit prior to
4523 * the first L2 run.
4524 */
4525 (void)nested_get_evmcs_page(vcpu);
4526 }
Maxim Levitskyf2c7ef32021-01-07 11:38:51 +02004527
Sean Christopherson40e5f9082021-11-25 01:49:43 +00004528 /* Service pending TLB flush requests for L2 before switching to L1. */
4529 kvm_service_local_tlb_flush_requests(vcpu);
Sean Christophersoneeeb4f62020-03-20 14:28:20 -07004530
Peter Shier43fea4e2020-08-20 16:05:45 -07004531 /*
4532 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
4533 * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are
4534 * up-to-date before switching to L1.
4535 */
4536 if (enable_ept && is_pae_paging(vcpu))
4537 vmx_ept_load_pdptrs(vcpu);
4538
Sean Christopherson55d23752018-12-03 13:53:18 -08004539 leave_guest_mode(vcpu);
4540
Paolo Bonzinib4b65b52019-01-29 19:12:35 +01004541 if (nested_cpu_has_preemption_timer(vmcs12))
4542 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4543
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01004544 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
4545 vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
4546 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
4547 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
4548 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004549
4550 if (likely(!vmx->fail)) {
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004551 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
Sean Christophersonf4f83162019-05-07 08:36:26 -07004552
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004553 if (vm_exit_reason != -1)
4554 prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
4555 exit_intr_info, exit_qualification);
Sean Christopherson55d23752018-12-03 13:53:18 -08004556
4557 /*
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004558 * Must happen outside of sync_vmcs02_to_vmcs12() as it will
Sean Christopherson55d23752018-12-03 13:53:18 -08004559 * also be used to capture vmcs12 cache as part of
4560 * capturing nVMX state for snapshot (migration).
4561 *
4562 * Otherwise, this flush will dirty guest memory at a
4563 * point it is already assumed by user-space to be
4564 * immutable.
4565 */
4566 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08004567 } else {
4568 /*
4569 * The only expected VM-instruction error is "VM entry with
4570 * invalid control field(s)." Anything else indicates a
4571 * problem with L0. And we should never get here with a
4572 * VMFail of any type if early consistency checks are enabled.
4573 */
4574 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4575 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4576 WARN_ON_ONCE(nested_early_check);
4577 }
4578
4579 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4580
4581 /* Update any VMCS fields that might have changed while L2 ran */
4582 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4583 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4584 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Ilias Stamatis1ab92872021-06-07 11:54:38 +01004585 if (kvm_has_tsc_control)
4586 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
4587
Liran Alon02d496cf2019-11-11 14:30:55 +02004588 if (vmx->nested.l1_tpr_threshold != -1)
4589 vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
Sean Christopherson55d23752018-12-03 13:53:18 -08004590
Sean Christopherson55d23752018-12-03 13:53:18 -08004591 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4592 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4593 vmx_set_virtual_apic_mode(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004594 }
4595
Makarand Sonarea85863c2021-02-12 16:50:12 -08004596 if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
4597 vmx->nested.update_vmcs01_cpu_dirty_logging = false;
4598 vmx_update_cpu_dirty_logging(vcpu);
4599 }
4600
Sean Christopherson55d23752018-12-03 13:53:18 -08004601 /* Unpin physical memory we referred to in vmcs02 */
4602 if (vmx->nested.apic_access_page) {
Liran Alonb11494b2019-11-21 00:31:47 +02004603 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -08004604 vmx->nested.apic_access_page = NULL;
4605 }
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01004606 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
KarimAllah Ahmed3278e042019-01-31 21:24:38 +01004607 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4608 vmx->nested.pi_desc = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -08004609
Sean Christopherson1196cb92020-03-20 14:28:23 -07004610 if (vmx->nested.reload_vmcs01_apic_access_page) {
4611 vmx->nested.reload_vmcs01_apic_access_page = false;
4612 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4613 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004614
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004615 if ((vm_exit_reason != -1) &&
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004616 (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004617 vmx->nested.need_vmcs12_to_shadow_sync = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08004618
4619 /* in case we halted in L2 */
4620 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4621
4622 if (likely(!vmx->fail)) {
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004623 if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
Sean Christophersona1c77ab2020-03-02 22:27:35 -08004624 nested_exit_intr_ack_set(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08004625 int irq = kvm_cpu_get_interrupt(vcpu);
4626 WARN_ON(irq < 0);
4627 vmcs12->vm_exit_intr_info = irq |
4628 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4629 }
4630
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004631 if (vm_exit_reason != -1)
Sean Christopherson55d23752018-12-03 13:53:18 -08004632 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4633 vmcs12->exit_qualification,
4634 vmcs12->idt_vectoring_info_field,
4635 vmcs12->vm_exit_intr_info,
4636 vmcs12->vm_exit_intr_error_code,
4637 KVM_ISA_VMX);
4638
4639 load_vmcs12_host_state(vcpu, vmcs12);
4640
4641 return;
4642 }
4643
4644 /*
4645 * After an early L2 VM-entry failure, we're now back
4646 * in L1 which thinks it just finished a VMLAUNCH or
4647 * VMRESUME instruction, so we need to set the failure
4648 * flag and the VM-instruction error field of the VMCS
4649 * accordingly, and skip the emulated instruction.
4650 */
Sean Christophersonb2656e42020-06-08 18:56:07 -07004651 (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08004652
4653 /*
4654 * Restore L1's host state to KVM's software model. We're here
4655 * because a consistency check was caught by hardware, which
4656 * means some amount of guest state has been propagated to KVM's
4657 * model and needs to be unwound to the host's state.
4658 */
4659 nested_vmx_restore_host_state(vcpu);
4660
4661 vmx->fail = 0;
4662}
4663
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08004664static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
4665{
4666 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
4667}
4668
Sean Christopherson55d23752018-12-03 13:53:18 -08004669/*
4670 * Decode the memory-address operand of a vmx instruction, as recorded on an
4671 * exit caused by such an instruction (run by a guest hypervisor).
4672 * On success, returns 0. When the operand is invalid, returns 1 and throws
Miaohe Lin49f933d2020-02-27 11:20:54 +08004673 * #UD, #GP, or #SS.
Sean Christopherson55d23752018-12-03 13:53:18 -08004674 */
4675int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004676 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
Sean Christopherson55d23752018-12-03 13:53:18 -08004677{
4678 gva_t off;
4679 bool exn;
4680 struct kvm_segment s;
4681
4682 /*
4683 * According to Vol. 3B, "Information for VM Exits Due to Instruction
4684 * Execution", on an exit, vmx_instruction_info holds most of the
4685 * addressing components of the operand. Only the displacement part
4686 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
4687 * For how an actual address is calculated from all these components,
4688 * refer to Vol. 1, "Operand Addressing".
4689 */
4690 int scaling = vmx_instruction_info & 3;
4691 int addr_size = (vmx_instruction_info >> 7) & 7;
4692 bool is_reg = vmx_instruction_info & (1u << 10);
4693 int seg_reg = (vmx_instruction_info >> 15) & 7;
4694 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4695 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4696 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4697 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4698
4699 if (is_reg) {
4700 kvm_queue_exception(vcpu, UD_VECTOR);
4701 return 1;
4702 }
4703
4704 /* Addr = segment_base + offset */
4705 /* offset = base + [index * scale] + displacement */
4706 off = exit_qualification; /* holds the displacement */
Sean Christopherson946c5222019-01-23 14:39:23 -08004707 if (addr_size == 1)
4708 off = (gva_t)sign_extend64(off, 31);
4709 else if (addr_size == 0)
4710 off = (gva_t)sign_extend64(off, 15);
Sean Christopherson55d23752018-12-03 13:53:18 -08004711 if (base_is_valid)
4712 off += kvm_register_read(vcpu, base_reg);
4713 if (index_is_valid)
Miaohe Line6302692020-02-15 10:44:22 +08004714 off += kvm_register_read(vcpu, index_reg) << scaling;
Sean Christopherson55d23752018-12-03 13:53:18 -08004715 vmx_get_segment(vcpu, &s, seg_reg);
Sean Christopherson55d23752018-12-03 13:53:18 -08004716
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004717 /*
4718 * The effective address, i.e. @off, of a memory operand is truncated
4719 * based on the address size of the instruction. Note that this is
4720 * the *effective address*, i.e. the address prior to accounting for
4721 * the segment's base.
4722 */
Sean Christopherson55d23752018-12-03 13:53:18 -08004723 if (addr_size == 1) /* 32 bit */
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004724 off &= 0xffffffff;
4725 else if (addr_size == 0) /* 16 bit */
4726 off &= 0xffff;
Sean Christopherson55d23752018-12-03 13:53:18 -08004727
4728 /* Checks for #GP/#SS exceptions. */
4729 exn = false;
4730 if (is_long_mode(vcpu)) {
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004731 /*
4732 * The virtual/linear address is never truncated in 64-bit
4733 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
4734 * address when using FS/GS with a non-zero base.
4735 */
Liran Alon6694e482019-07-15 18:47:44 +03004736 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4737 *ret = s.base + off;
4738 else
4739 *ret = off;
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004740
Sean Christopherson55d23752018-12-03 13:53:18 -08004741 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
4742 * non-canonical form. This is the only check on the memory
4743 * destination for long mode!
4744 */
4745 exn = is_noncanonical_address(*ret, vcpu);
Paolo Bonzinie0dfacb2019-01-30 17:25:38 +01004746 } else {
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004747 /*
4748 * When not in long mode, the virtual/linear address is
4749 * unconditionally truncated to 32 bits regardless of the
4750 * address size.
4751 */
4752 *ret = (s.base + off) & 0xffffffff;
4753
Sean Christopherson55d23752018-12-03 13:53:18 -08004754 /* Protected mode: apply checks for segment validity in the
4755 * following order:
4756 * - segment type check (#GP(0) may be thrown)
4757 * - usability check (#GP(0)/#SS(0))
4758 * - limit check (#GP(0)/#SS(0))
4759 */
4760 if (wr)
4761 /* #GP(0) if the destination operand is located in a
4762 * read-only data segment or any code segment.
4763 */
4764 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4765 else
4766 /* #GP(0) if the source operand is located in an
4767 * execute-only code segment
4768 */
4769 exn = ((s.type & 0xa) == 8);
4770 if (exn) {
4771 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4772 return 1;
4773 }
4774 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
4775 */
4776 exn = (s.unusable != 0);
Sean Christopherson34333cc2019-01-23 14:39:25 -08004777
4778 /*
4779 * Protected mode: #GP(0)/#SS(0) if the memory operand is
4780 * outside the segment limit. All CPUs that support VMX ignore
4781 * limit checks for flat segments, i.e. segments with base==0,
4782 * limit==0xffffffff and of type expand-up data or code.
Sean Christopherson55d23752018-12-03 13:53:18 -08004783 */
Sean Christopherson34333cc2019-01-23 14:39:25 -08004784 if (!(s.base == 0 && s.limit == 0xffffffff &&
4785 ((s.type & 8) || !(s.type & 4))))
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004786 exn = exn || ((u64)off + len - 1 > s.limit);
Sean Christopherson55d23752018-12-03 13:53:18 -08004787 }
4788 if (exn) {
4789 kvm_queue_exception_e(vcpu,
4790 seg_reg == VCPU_SREG_SS ?
4791 SS_VECTOR : GP_VECTOR,
4792 0);
4793 return 1;
4794 }
4795
4796 return 0;
4797}
4798
Oliver Upton03a8871a2019-11-13 16:17:20 -08004799void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
4800{
4801 struct vcpu_vmx *vmx;
4802
4803 if (!nested_vmx_allowed(vcpu))
4804 return;
4805
4806 vmx = to_vmx(vcpu);
Sean Christophersonafaf0b22020-03-21 13:26:00 -07004807 if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
Oliver Upton03a8871a2019-11-13 16:17:20 -08004808 vmx->nested.msrs.entry_ctls_high |=
4809 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4810 vmx->nested.msrs.exit_ctls_high |=
4811 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4812 } else {
4813 vmx->nested.msrs.entry_ctls_high &=
4814 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4815 vmx->nested.msrs.exit_ctls_high &=
Chenyi Qiangc6b177a2020-08-28 16:56:21 +08004816 ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
Oliver Upton03a8871a2019-11-13 16:17:20 -08004817 }
4818}
4819
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004820static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
4821 int *ret)
Sean Christopherson55d23752018-12-03 13:53:18 -08004822{
4823 gva_t gva;
4824 struct x86_exception e;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004825 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08004826
Sean Christopherson5addc232020-04-15 13:34:53 -07004827 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004828 vmcs_read32(VMX_INSTRUCTION_INFO), false,
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004829 sizeof(*vmpointer), &gva)) {
4830 *ret = 1;
4831 return -EINVAL;
4832 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004833
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004834 r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
4835 if (r != X86EMUL_CONTINUE) {
Babu Moger3f3393b2020-09-11 14:29:05 -05004836 *ret = kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004837 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08004838 }
4839
4840 return 0;
4841}
4842
4843/*
4844 * Allocate a shadow VMCS and associate it with the currently loaded
4845 * VMCS, unless such a shadow VMCS already exists. The newly allocated
4846 * VMCS is also VMCLEARed, so that it is ready for use.
4847 */
4848static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4849{
4850 struct vcpu_vmx *vmx = to_vmx(vcpu);
4851 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4852
4853 /*
4854 * We should allocate a shadow vmcs for vmcs01 only when L1
4855 * executes VMXON and free it when L1 executes VMXOFF.
4856 * As it is invalid to execute VMXON twice, we shouldn't reach
4857 * here when vmcs01 already have an allocated shadow vmcs.
4858 */
4859 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4860
4861 if (!loaded_vmcs->shadow_vmcs) {
4862 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4863 if (loaded_vmcs->shadow_vmcs)
4864 vmcs_clear(loaded_vmcs->shadow_vmcs);
4865 }
4866 return loaded_vmcs->shadow_vmcs;
4867}
4868
4869static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4870{
4871 struct vcpu_vmx *vmx = to_vmx(vcpu);
4872 int r;
4873
4874 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4875 if (r < 0)
4876 goto out_vmcs02;
4877
Ben Gardon41836832019-02-11 11:02:52 -08004878 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
Sean Christopherson55d23752018-12-03 13:53:18 -08004879 if (!vmx->nested.cached_vmcs12)
4880 goto out_cached_vmcs12;
4881
Paolo Bonzini8503fea2021-11-22 18:20:16 -05004882 vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
Ben Gardon41836832019-02-11 11:02:52 -08004883 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
Sean Christopherson55d23752018-12-03 13:53:18 -08004884 if (!vmx->nested.cached_shadow_vmcs12)
4885 goto out_cached_shadow_vmcs12;
4886
4887 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4888 goto out_shadow_vmcs;
4889
4890 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
Jim Mattsonada00982020-05-08 13:36:42 -07004891 HRTIMER_MODE_ABS_PINNED);
Sean Christopherson55d23752018-12-03 13:53:18 -08004892 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4893
4894 vmx->nested.vpid02 = allocate_vpid();
4895
4896 vmx->nested.vmcs02_initialized = false;
4897 vmx->nested.vmxon = true;
Luwei Kangee85dec2018-10-24 16:05:16 +08004898
Sean Christopherson2ef76192020-03-02 15:56:22 -08004899 if (vmx_pt_mode_is_host_guest()) {
Luwei Kangee85dec2018-10-24 16:05:16 +08004900 vmx->pt_desc.guest.ctl = 0;
Aaron Lewis476c9bd2020-09-25 16:34:18 +02004901 pt_update_intercept_for_msr(vcpu);
Luwei Kangee85dec2018-10-24 16:05:16 +08004902 }
4903
Sean Christopherson55d23752018-12-03 13:53:18 -08004904 return 0;
4905
4906out_shadow_vmcs:
4907 kfree(vmx->nested.cached_shadow_vmcs12);
4908
4909out_cached_shadow_vmcs12:
4910 kfree(vmx->nested.cached_vmcs12);
4911
4912out_cached_vmcs12:
4913 free_loaded_vmcs(&vmx->nested.vmcs02);
4914
4915out_vmcs02:
4916 return -ENOMEM;
4917}
4918
Yu Zhanged7023a2021-09-09 01:17:31 +08004919/* Emulate the VMXON instruction. */
Sean Christopherson55d23752018-12-03 13:53:18 -08004920static int handle_vmon(struct kvm_vcpu *vcpu)
4921{
4922 int ret;
4923 gpa_t vmptr;
KarimAllah Ahmed2e408932019-01-31 21:24:31 +01004924 uint32_t revision;
Sean Christopherson55d23752018-12-03 13:53:18 -08004925 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson32ad73d2019-12-20 20:44:55 -08004926 const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
4927 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
Sean Christopherson55d23752018-12-03 13:53:18 -08004928
4929 /*
4930 * The Intel VMX Instruction Reference lists a bunch of bits that are
4931 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
Sean Christophersonc2fe3cd2020-10-06 18:44:15 -07004932 * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
Sean Christopherson55d23752018-12-03 13:53:18 -08004933 * Otherwise, we should fail with #UD. But most faulting conditions
4934 * have already been checked by hardware, prior to the VM-exit for
4935 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
4936 * that bit set to 1 in non-root mode.
4937 */
4938 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4939 kvm_queue_exception(vcpu, UD_VECTOR);
4940 return 1;
4941 }
4942
4943 /* CPL=0 must be checked manually. */
4944 if (vmx_get_cpl(vcpu)) {
4945 kvm_inject_gp(vcpu, 0);
4946 return 1;
4947 }
4948
4949 if (vmx->nested.vmxon)
Sean Christophersonb2656e42020-06-08 18:56:07 -07004950 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Sean Christopherson55d23752018-12-03 13:53:18 -08004951
4952 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4953 != VMXON_NEEDED_FEATURES) {
4954 kvm_inject_gp(vcpu, 0);
4955 return 1;
4956 }
4957
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004958 if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
4959 return ret;
Sean Christopherson55d23752018-12-03 13:53:18 -08004960
4961 /*
4962 * SDM 3: 24.11.5
4963 * The first 4 bytes of VMXON region contain the supported
4964 * VMCS revision identifier
4965 *
4966 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
4967 * which replaces physical address width with 32
4968 */
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01004969 if (!page_address_valid(vcpu, vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08004970 return nested_vmx_failInvalid(vcpu);
4971
KarimAllah Ahmed2e408932019-01-31 21:24:31 +01004972 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4973 revision != VMCS12_REVISION)
Sean Christopherson55d23752018-12-03 13:53:18 -08004974 return nested_vmx_failInvalid(vcpu);
4975
Sean Christopherson55d23752018-12-03 13:53:18 -08004976 vmx->nested.vmxon_ptr = vmptr;
4977 ret = enter_vmx_operation(vcpu);
4978 if (ret)
4979 return ret;
4980
4981 return nested_vmx_succeed(vcpu);
4982}
4983
4984static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4985{
4986 struct vcpu_vmx *vmx = to_vmx(vcpu);
4987
Yu Zhang64c78502021-09-30 01:51:53 +08004988 if (vmx->nested.current_vmptr == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08004989 return;
4990
Sean Christopherson7952d762019-05-07 08:36:29 -07004991 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4992
Sean Christopherson55d23752018-12-03 13:53:18 -08004993 if (enable_shadow_vmcs) {
4994 /* copy to memory all shadowed fields in case
4995 they were modified */
4996 copy_shadow_to_vmcs12(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08004997 vmx_disable_shadow_vmcs(vmx);
4998 }
4999 vmx->nested.posted_intr_nv = -1;
5000
5001 /* Flush VMCS12 to guest memory */
5002 kvm_vcpu_write_guest_page(vcpu,
5003 vmx->nested.current_vmptr >> PAGE_SHIFT,
5004 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
5005
5006 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
5007
Yu Zhang64c78502021-09-30 01:51:53 +08005008 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08005009}
5010
5011/* Emulate the VMXOFF instruction */
5012static int handle_vmoff(struct kvm_vcpu *vcpu)
5013{
5014 if (!nested_vmx_check_permission(vcpu))
5015 return 1;
Liran Alon4b9852f2019-08-26 13:24:49 +03005016
Sean Christopherson55d23752018-12-03 13:53:18 -08005017 free_nested(vcpu);
Liran Alon4b9852f2019-08-26 13:24:49 +03005018
5019 /* Process a latched INIT during time CPU was in VMX operation */
5020 kvm_make_request(KVM_REQ_EVENT, vcpu);
5021
Sean Christopherson55d23752018-12-03 13:53:18 -08005022 return nested_vmx_succeed(vcpu);
5023}
5024
5025/* Emulate the VMCLEAR instruction */
5026static int handle_vmclear(struct kvm_vcpu *vcpu)
5027{
5028 struct vcpu_vmx *vmx = to_vmx(vcpu);
5029 u32 zero = 0;
5030 gpa_t vmptr;
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02005031 u64 evmcs_gpa;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005032 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005033
5034 if (!nested_vmx_check_permission(vcpu))
5035 return 1;
5036
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005037 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5038 return r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005039
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01005040 if (!page_address_valid(vcpu, vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005041 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005042
5043 if (vmptr == vmx->nested.vmxon_ptr)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005044 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
Sean Christopherson55d23752018-12-03 13:53:18 -08005045
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02005046 /*
5047 * When Enlightened VMEntry is enabled on the calling CPU we treat
5048 * memory area pointer by vmptr as Enlightened VMCS (as there's no good
5049 * way to distinguish it from VMCS12) and we must not corrupt it by
5050 * writing to the non-existent 'launch_state' field. The area doesn't
5051 * have to be the currently active EVMCS on the calling CPU and there's
5052 * nothing KVM has to do to transition it from 'active' to 'non-active'
5053 * state. It is possible that the area will stay mapped as
5054 * vmx->nested.hv_evmcs but this shouldn't be a problem.
5055 */
5056 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
5057 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005058 if (vmptr == vmx->nested.current_vmptr)
5059 nested_release_vmcs12(vcpu);
5060
5061 kvm_vcpu_write_guest(vcpu,
5062 vmptr + offsetof(struct vmcs12,
5063 launch_state),
5064 &zero, sizeof(zero));
Vitaly Kuznetsov3b19b812021-05-26 15:20:21 +02005065 } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
5066 nested_release_evmcs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005067 }
5068
5069 return nested_vmx_succeed(vcpu);
5070}
5071
Sean Christopherson55d23752018-12-03 13:53:18 -08005072/* Emulate the VMLAUNCH instruction */
5073static int handle_vmlaunch(struct kvm_vcpu *vcpu)
5074{
5075 return nested_vmx_run(vcpu, true);
5076}
5077
5078/* Emulate the VMRESUME instruction */
5079static int handle_vmresume(struct kvm_vcpu *vcpu)
5080{
5081
5082 return nested_vmx_run(vcpu, false);
5083}
5084
5085static int handle_vmread(struct kvm_vcpu *vcpu)
5086{
Jim Mattsondd2d6042019-12-06 15:46:35 -08005087 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5088 : get_vmcs12(vcpu);
Sean Christopherson5addc232020-04-15 13:34:53 -07005089 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005090 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5091 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzinif7eea632019-09-14 00:26:27 +02005092 struct x86_exception e;
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005093 unsigned long field;
5094 u64 value;
5095 gva_t gva = 0;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005096 short offset;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005097 int len, r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005098
5099 if (!nested_vmx_check_permission(vcpu))
5100 return 1;
5101
Jim Mattsondd2d6042019-12-06 15:46:35 -08005102 /*
Yu Zhang64c78502021-09-30 01:51:53 +08005103 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
Jim Mattsondd2d6042019-12-06 15:46:35 -08005104 * any VMREAD sets the ALU flags for VMfailInvalid.
5105 */
Yu Zhang64c78502021-09-30 01:51:53 +08005106 if (vmx->nested.current_vmptr == INVALID_GPA ||
Jim Mattsondd2d6042019-12-06 15:46:35 -08005107 (is_guest_mode(vcpu) &&
Yu Zhang64c78502021-09-30 01:51:53 +08005108 get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08005109 return nested_vmx_failInvalid(vcpu);
5110
Sean Christopherson55d23752018-12-03 13:53:18 -08005111 /* Decode instruction info and find the field to read */
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005112 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005113
5114 offset = vmcs_field_to_offset(field);
5115 if (offset < 0)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005116 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Sean Christopherson55d23752018-12-03 13:53:18 -08005117
Sean Christopherson7952d762019-05-07 08:36:29 -07005118 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
5119 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5120
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005121 /* Read the field, zero-extended to a u64 value */
5122 value = vmcs12_read_any(vmcs12, field, offset);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005123
Sean Christopherson55d23752018-12-03 13:53:18 -08005124 /*
5125 * Now copy part of this value to register or memory, as requested.
5126 * Note that the number of bits actually copied is 32 or 64 depending
5127 * on the guest's mode (32 or 64 bit), not on the given field's length.
5128 */
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005129 if (instr_info & BIT(10)) {
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005130 kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
Sean Christopherson55d23752018-12-03 13:53:18 -08005131 } else {
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005132 len = is_64_bit_mode(vcpu) ? 8 : 4;
Sean Christopherson55d23752018-12-03 13:53:18 -08005133 if (get_vmx_mem_address(vcpu, exit_qualification,
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005134 instr_info, true, len, &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005135 return 1;
5136 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005137 r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
5138 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005139 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005140 }
5141
5142 return nested_vmx_succeed(vcpu);
5143}
5144
Sean Christophersone2174292019-05-07 08:36:28 -07005145static bool is_shadow_field_rw(unsigned long field)
5146{
5147 switch (field) {
5148#define SHADOW_FIELD_RW(x, y) case x:
5149#include "vmcs_shadow_fields.h"
5150 return true;
5151 default:
5152 break;
5153 }
5154 return false;
5155}
5156
5157static bool is_shadow_field_ro(unsigned long field)
5158{
5159 switch (field) {
5160#define SHADOW_FIELD_RO(x, y) case x:
5161#include "vmcs_shadow_fields.h"
5162 return true;
5163 default:
5164 break;
5165 }
5166 return false;
5167}
Sean Christopherson55d23752018-12-03 13:53:18 -08005168
5169static int handle_vmwrite(struct kvm_vcpu *vcpu)
5170{
Jim Mattsondd2d6042019-12-06 15:46:35 -08005171 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5172 : get_vmcs12(vcpu);
Sean Christopherson5addc232020-04-15 13:34:53 -07005173 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005174 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5175 struct vcpu_vmx *vmx = to_vmx(vcpu);
5176 struct x86_exception e;
5177 unsigned long field;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005178 short offset;
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005179 gva_t gva;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005180 int len, r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005181
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005182 /*
5183 * The value to write might be 32 or 64 bits, depending on L1's long
Sean Christopherson55d23752018-12-03 13:53:18 -08005184 * mode, and eventually we need to write that into a field of several
5185 * possible lengths. The code below first zero-extends the value to 64
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005186 * bit (value), and then copies only the appropriate number of
Sean Christopherson55d23752018-12-03 13:53:18 -08005187 * bits into the vmcs12 field.
5188 */
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005189 u64 value = 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08005190
5191 if (!nested_vmx_check_permission(vcpu))
5192 return 1;
5193
Jim Mattsondd2d6042019-12-06 15:46:35 -08005194 /*
Yu Zhang64c78502021-09-30 01:51:53 +08005195 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
Jim Mattsondd2d6042019-12-06 15:46:35 -08005196 * any VMWRITE sets the ALU flags for VMfailInvalid.
5197 */
Yu Zhang64c78502021-09-30 01:51:53 +08005198 if (vmx->nested.current_vmptr == INVALID_GPA ||
Jim Mattsondd2d6042019-12-06 15:46:35 -08005199 (is_guest_mode(vcpu) &&
Yu Zhang64c78502021-09-30 01:51:53 +08005200 get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08005201 return nested_vmx_failInvalid(vcpu);
5202
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005203 if (instr_info & BIT(10))
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005204 value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
Sean Christopherson55d23752018-12-03 13:53:18 -08005205 else {
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005206 len = is_64_bit_mode(vcpu) ? 8 : 4;
Sean Christopherson55d23752018-12-03 13:53:18 -08005207 if (get_vmx_mem_address(vcpu, exit_qualification,
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005208 instr_info, false, len, &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005209 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005210 r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
5211 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005212 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005213 }
5214
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005215 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
Sean Christopherson55d23752018-12-03 13:53:18 -08005216
Jim Mattson693e02c2019-12-06 15:46:36 -08005217 offset = vmcs_field_to_offset(field);
5218 if (offset < 0)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005219 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Jim Mattson693e02c2019-12-06 15:46:36 -08005220
Sean Christopherson55d23752018-12-03 13:53:18 -08005221 /*
5222 * If the vCPU supports "VMWRITE to any supported field in the
5223 * VMCS," then the "read-only" fields are actually read/write.
5224 */
5225 if (vmcs_field_readonly(field) &&
5226 !nested_cpu_has_vmwrite_any_field(vcpu))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005227 return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Sean Christopherson55d23752018-12-03 13:53:18 -08005228
Jim Mattsondd2d6042019-12-06 15:46:35 -08005229 /*
5230 * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
5231 * vmcs12, else we may crush a field or consume a stale value.
5232 */
5233 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
5234 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08005235
5236 /*
Sean Christophersonb6437802019-05-07 08:36:24 -07005237 * Some Intel CPUs intentionally drop the reserved bits of the AR byte
5238 * fields on VMWRITE. Emulate this behavior to ensure consistent KVM
5239 * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
5240 * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
5241 * from L1 will return a different value than VMREAD from L2 (L1 sees
5242 * the stripped down value, L2 sees the full value as stored by KVM).
Sean Christopherson55d23752018-12-03 13:53:18 -08005243 */
Sean Christophersonb6437802019-05-07 08:36:24 -07005244 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005245 value &= 0x1f0ff;
Sean Christophersonb6437802019-05-07 08:36:24 -07005246
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005247 vmcs12_write_any(vmcs12, field, offset, value);
Sean Christopherson55d23752018-12-03 13:53:18 -08005248
5249 /*
Sean Christophersone2174292019-05-07 08:36:28 -07005250 * Do not track vmcs12 dirty-state if in guest-mode as we actually
5251 * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated
5252 * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
5253 * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
Sean Christopherson55d23752018-12-03 13:53:18 -08005254 */
Sean Christophersone2174292019-05-07 08:36:28 -07005255 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
5256 /*
5257 * L1 can read these fields without exiting, ensure the
5258 * shadow VMCS is up-to-date.
5259 */
5260 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
5261 preempt_disable();
5262 vmcs_load(vmx->vmcs01.shadow_vmcs);
Sean Christophersonfadcead2019-05-07 08:36:23 -07005263
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005264 __vmcs_writel(field, value);
Sean Christophersonfadcead2019-05-07 08:36:23 -07005265
Sean Christophersone2174292019-05-07 08:36:28 -07005266 vmcs_clear(vmx->vmcs01.shadow_vmcs);
5267 vmcs_load(vmx->loaded_vmcs->vmcs);
5268 preempt_enable();
Sean Christopherson55d23752018-12-03 13:53:18 -08005269 }
Sean Christophersone2174292019-05-07 08:36:28 -07005270 vmx->nested.dirty_vmcs12 = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005271 }
5272
5273 return nested_vmx_succeed(vcpu);
5274}
5275
5276static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
5277{
5278 vmx->nested.current_vmptr = vmptr;
5279 if (enable_shadow_vmcs) {
Sean Christophersonfe7f895d2019-05-07 12:17:57 -07005280 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005281 vmcs_write64(VMCS_LINK_POINTER,
5282 __pa(vmx->vmcs01.shadow_vmcs));
Sean Christopherson3731905ef2019-05-07 08:36:27 -07005283 vmx->nested.need_vmcs12_to_shadow_sync = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005284 }
5285 vmx->nested.dirty_vmcs12 = true;
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01005286 vmx->nested.force_msr_bitmap_recalc = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005287}
5288
5289/* Emulate the VMPTRLD instruction */
5290static int handle_vmptrld(struct kvm_vcpu *vcpu)
5291{
5292 struct vcpu_vmx *vmx = to_vmx(vcpu);
5293 gpa_t vmptr;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005294 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005295
5296 if (!nested_vmx_check_permission(vcpu))
5297 return 1;
5298
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005299 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5300 return r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005301
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01005302 if (!page_address_valid(vcpu, vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005303 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005304
5305 if (vmptr == vmx->nested.vmxon_ptr)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005306 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
Sean Christopherson55d23752018-12-03 13:53:18 -08005307
5308 /* Forbid normal VMPTRLD if Enlightened version was used */
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02005309 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08005310 return 1;
5311
5312 if (vmx->nested.current_vmptr != vmptr) {
David Woodhousecee66662021-11-15 16:50:26 +00005313 struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
5314 struct vmcs_hdr hdr;
Sean Christopherson55d23752018-12-03 13:53:18 -08005315
Paolo Bonzini8503fea2021-11-22 18:20:16 -05005316 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005317 /*
5318 * Reads from an unbacked page return all 1s,
5319 * which means that the 32 bits located at the
5320 * given physical address won't match the required
5321 * VMCS12_REVISION identifier.
5322 */
Sean Christophersonb2656e42020-06-08 18:56:07 -07005323 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005324 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Sean Christopherson55d23752018-12-03 13:53:18 -08005325 }
KarimAllah Ahmedb146b832019-01-31 21:24:35 +01005326
David Woodhousecee66662021-11-15 16:50:26 +00005327 if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
5328 offsetof(struct vmcs12, hdr),
5329 sizeof(hdr))) {
5330 return nested_vmx_fail(vcpu,
5331 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5332 }
KarimAllah Ahmedb146b832019-01-31 21:24:35 +01005333
David Woodhousecee66662021-11-15 16:50:26 +00005334 if (hdr.revision_id != VMCS12_REVISION ||
5335 (hdr.shadow_vmcs &&
Sean Christopherson55d23752018-12-03 13:53:18 -08005336 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
Sean Christophersonb2656e42020-06-08 18:56:07 -07005337 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005338 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5339 }
5340
5341 nested_release_vmcs12(vcpu);
5342
5343 /*
5344 * Load VMCS12 from guest memory since it is not already
5345 * cached.
5346 */
David Woodhousecee66662021-11-15 16:50:26 +00005347 if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
5348 VMCS12_SIZE)) {
5349 return nested_vmx_fail(vcpu,
5350 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5351 }
Sean Christopherson55d23752018-12-03 13:53:18 -08005352
5353 set_current_vmptr(vmx, vmptr);
5354 }
5355
5356 return nested_vmx_succeed(vcpu);
5357}
5358
5359/* Emulate the VMPTRST instruction */
5360static int handle_vmptrst(struct kvm_vcpu *vcpu)
5361{
Sean Christopherson5addc232020-04-15 13:34:53 -07005362 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005363 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5364 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
5365 struct x86_exception e;
5366 gva_t gva;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005367 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005368
5369 if (!nested_vmx_check_permission(vcpu))
5370 return 1;
5371
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02005372 if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005373 return 1;
5374
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005375 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
5376 true, sizeof(gpa_t), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005377 return 1;
5378 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005379 r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
5380 sizeof(gpa_t), &e);
5381 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005382 return kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005383
Sean Christopherson55d23752018-12-03 13:53:18 -08005384 return nested_vmx_succeed(vcpu);
5385}
5386
5387/* Emulate the INVEPT instruction */
5388static int handle_invept(struct kvm_vcpu *vcpu)
5389{
5390 struct vcpu_vmx *vmx = to_vmx(vcpu);
5391 u32 vmx_instruction_info, types;
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005392 unsigned long type, roots_to_free;
5393 struct kvm_mmu *mmu;
Sean Christopherson55d23752018-12-03 13:53:18 -08005394 gva_t gva;
5395 struct x86_exception e;
5396 struct {
5397 u64 eptp, gpa;
5398 } operand;
Vipin Sharma329bd562021-11-09 17:44:25 +00005399 int i, r, gpr_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08005400
5401 if (!(vmx->nested.msrs.secondary_ctls_high &
5402 SECONDARY_EXEC_ENABLE_EPT) ||
5403 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
5404 kvm_queue_exception(vcpu, UD_VECTOR);
5405 return 1;
5406 }
5407
5408 if (!nested_vmx_check_permission(vcpu))
5409 return 1;
5410
5411 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Vipin Sharma329bd562021-11-09 17:44:25 +00005412 gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
5413 type = kvm_register_read(vcpu, gpr_index);
Sean Christopherson55d23752018-12-03 13:53:18 -08005414
5415 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
5416
5417 if (type >= 32 || !(types & (1 << type)))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005418 return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christopherson55d23752018-12-03 13:53:18 -08005419
5420 /* According to the Intel VMX instruction reference, the memory
5421 * operand is read even if it isn't needed (e.g., for type==global)
5422 */
Sean Christopherson5addc232020-04-15 13:34:53 -07005423 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005424 vmx_instruction_info, false, sizeof(operand), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005425 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005426 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5427 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005428 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005429
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005430 /*
5431 * Nested EPT roots are always held through guest_mmu,
5432 * not root_mmu.
5433 */
5434 mmu = &vcpu->arch.guest_mmu;
5435
Sean Christopherson55d23752018-12-03 13:53:18 -08005436 switch (type) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005437 case VMX_EPT_EXTENT_CONTEXT:
Sean Christophersoneed00302020-03-20 14:27:58 -07005438 if (!nested_vmx_check_eptp(vcpu, operand.eptp))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005439 return nested_vmx_fail(vcpu,
Sean Christophersoneed00302020-03-20 14:27:58 -07005440 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christophersonf8aa7e32020-03-20 14:27:59 -07005441
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005442 roots_to_free = 0;
Sean Christophersonbe01e8e2020-03-20 14:28:32 -07005443 if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005444 operand.eptp))
5445 roots_to_free |= KVM_MMU_ROOT_CURRENT;
5446
5447 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5448 if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
Sean Christophersonbe01e8e2020-03-20 14:28:32 -07005449 mmu->prev_roots[i].pgd,
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005450 operand.eptp))
5451 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5452 }
5453 break;
Sean Christophersoneed00302020-03-20 14:27:58 -07005454 case VMX_EPT_EXTENT_GLOBAL:
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005455 roots_to_free = KVM_MMU_ROOTS_ALL;
Sean Christopherson55d23752018-12-03 13:53:18 -08005456 break;
5457 default:
Sean Christophersonf9336e32020-05-04 08:35:06 -07005458 BUG();
Sean Christopherson55d23752018-12-03 13:53:18 -08005459 break;
5460 }
5461
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005462 if (roots_to_free)
5463 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
5464
Sean Christopherson55d23752018-12-03 13:53:18 -08005465 return nested_vmx_succeed(vcpu);
5466}
5467
5468static int handle_invvpid(struct kvm_vcpu *vcpu)
5469{
5470 struct vcpu_vmx *vmx = to_vmx(vcpu);
5471 u32 vmx_instruction_info;
5472 unsigned long type, types;
5473 gva_t gva;
5474 struct x86_exception e;
5475 struct {
5476 u64 vpid;
5477 u64 gla;
5478 } operand;
5479 u16 vpid02;
Vipin Sharma329bd562021-11-09 17:44:25 +00005480 int r, gpr_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08005481
5482 if (!(vmx->nested.msrs.secondary_ctls_high &
5483 SECONDARY_EXEC_ENABLE_VPID) ||
5484 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
5485 kvm_queue_exception(vcpu, UD_VECTOR);
5486 return 1;
5487 }
5488
5489 if (!nested_vmx_check_permission(vcpu))
5490 return 1;
5491
5492 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Vipin Sharma329bd562021-11-09 17:44:25 +00005493 gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
5494 type = kvm_register_read(vcpu, gpr_index);
Sean Christopherson55d23752018-12-03 13:53:18 -08005495
5496 types = (vmx->nested.msrs.vpid_caps &
5497 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
5498
5499 if (type >= 32 || !(types & (1 << type)))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005500 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005501 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5502
5503 /* according to the intel vmx instruction reference, the memory
5504 * operand is read even if it isn't needed (e.g., for type==global)
5505 */
Sean Christopherson5addc232020-04-15 13:34:53 -07005506 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005507 vmx_instruction_info, false, sizeof(operand), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005508 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005509 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5510 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005511 return kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005512
Sean Christopherson55d23752018-12-03 13:53:18 -08005513 if (operand.vpid >> 16)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005514 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005515 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5516
5517 vpid02 = nested_get_vpid02(vcpu);
5518 switch (type) {
5519 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
5520 if (!operand.vpid ||
5521 is_noncanonical_address(operand.gla, vcpu))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005522 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005523 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christophersonbc41d0c2020-03-20 14:28:09 -07005524 vpid_sync_vcpu_addr(vpid02, operand.gla);
Sean Christopherson55d23752018-12-03 13:53:18 -08005525 break;
5526 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
5527 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
5528 if (!operand.vpid)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005529 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005530 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christopherson446ace42020-03-20 14:28:05 -07005531 vpid_sync_context(vpid02);
Sean Christopherson55d23752018-12-03 13:53:18 -08005532 break;
5533 case VMX_VPID_EXTENT_ALL_CONTEXT:
Sean Christopherson446ace42020-03-20 14:28:05 -07005534 vpid_sync_context(vpid02);
Sean Christopherson55d23752018-12-03 13:53:18 -08005535 break;
5536 default:
5537 WARN_ON_ONCE(1);
5538 return kvm_skip_emulated_instruction(vcpu);
5539 }
5540
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005541 /*
5542 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
Sean Christopherson25b62c62021-06-09 16:42:29 -07005543 * linear mappings for L2 (tagged with L2's VPID). Free all guest
5544 * roots as VPIDs are not tracked in the MMU role.
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005545 *
5546 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
5547 * an MMU when EPT is disabled.
5548 *
5549 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
5550 */
5551 if (!enable_ept)
Sean Christopherson25b62c62021-06-09 16:42:29 -07005552 kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005553
Sean Christopherson55d23752018-12-03 13:53:18 -08005554 return nested_vmx_succeed(vcpu);
5555}
5556
5557static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
5558 struct vmcs12 *vmcs12)
5559{
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005560 u32 index = kvm_rcx_read(vcpu);
Sean Christophersonac6389a2020-03-02 18:02:38 -08005561 u64 new_eptp;
Sean Christopherson55d23752018-12-03 13:53:18 -08005562
Sean Christophersonc5ffd402021-06-09 16:42:35 -07005563 if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005564 return 1;
Sean Christopherson55d23752018-12-03 13:53:18 -08005565 if (index >= VMFUNC_EPTP_ENTRIES)
5566 return 1;
5567
Sean Christopherson55d23752018-12-03 13:53:18 -08005568 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
Sean Christophersonac6389a2020-03-02 18:02:38 -08005569 &new_eptp, index * 8, 8))
Sean Christopherson55d23752018-12-03 13:53:18 -08005570 return 1;
5571
Sean Christopherson55d23752018-12-03 13:53:18 -08005572 /*
5573 * If the (L2) guest does a vmfunc to the currently
5574 * active ept pointer, we don't have to do anything else
5575 */
Sean Christophersonac6389a2020-03-02 18:02:38 -08005576 if (vmcs12->ept_pointer != new_eptp) {
5577 if (!nested_vmx_check_eptp(vcpu, new_eptp))
Sean Christopherson55d23752018-12-03 13:53:18 -08005578 return 1;
5579
Sean Christophersonac6389a2020-03-02 18:02:38 -08005580 vmcs12->ept_pointer = new_eptp;
Sean Christopherson39353ab2021-06-09 16:42:31 -07005581 nested_ept_new_eptp(vcpu);
Sean Christophersonc805f5d2021-03-04 17:10:57 -08005582
Sean Christopherson39353ab2021-06-09 16:42:31 -07005583 if (!nested_cpu_has_vpid(vmcs12))
5584 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005585 }
5586
5587 return 0;
5588}
5589
5590static int handle_vmfunc(struct kvm_vcpu *vcpu)
5591{
5592 struct vcpu_vmx *vmx = to_vmx(vcpu);
5593 struct vmcs12 *vmcs12;
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005594 u32 function = kvm_rax_read(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005595
5596 /*
5597 * VMFUNC is only supported for nested guests, but we always enable the
5598 * secondary control for simplicity; for non-nested mode, fake that we
5599 * didn't by injecting #UD.
5600 */
5601 if (!is_guest_mode(vcpu)) {
5602 kvm_queue_exception(vcpu, UD_VECTOR);
5603 return 1;
5604 }
5605
5606 vmcs12 = get_vmcs12(vcpu);
Sean Christopherson546e8392021-06-09 16:42:34 -07005607
5608 /*
5609 * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
5610 * is enabled in vmcs02 if and only if it's enabled in vmcs12.
5611 */
5612 if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
5613 kvm_queue_exception(vcpu, UD_VECTOR);
5614 return 1;
5615 }
5616
Sean Christopherson0e752252021-06-09 16:42:22 -07005617 if (!(vmcs12->vm_function_control & BIT_ULL(function)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005618 goto fail;
5619
5620 switch (function) {
5621 case 0:
5622 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5623 goto fail;
5624 break;
5625 default:
5626 goto fail;
5627 }
5628 return kvm_skip_emulated_instruction(vcpu);
5629
5630fail:
Sean Christopherson8e533242020-11-06 17:03:12 +08005631 /*
5632 * This is effectively a reflected VM-Exit, as opposed to a synthesized
5633 * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
5634 * EXIT_REASON_VMFUNC as the exit reason.
5635 */
5636 nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
Sean Christopherson87915852020-04-15 13:34:54 -07005637 vmx_get_intr_info(vcpu),
Sean Christopherson5addc232020-04-15 13:34:53 -07005638 vmx_get_exit_qual(vcpu));
Sean Christopherson55d23752018-12-03 13:53:18 -08005639 return 1;
5640}
5641
Oliver Uptone71237d2020-02-04 15:26:30 -08005642/*
5643 * Return true if an IO instruction with the specified port and size should cause
5644 * a VM-exit into L1.
5645 */
5646bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
5647 int size)
Sean Christopherson55d23752018-12-03 13:53:18 -08005648{
Oliver Uptone71237d2020-02-04 15:26:30 -08005649 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005650 gpa_t bitmap, last_bitmap;
Sean Christopherson55d23752018-12-03 13:53:18 -08005651 u8 b;
5652
Yu Zhang64c78502021-09-30 01:51:53 +08005653 last_bitmap = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08005654 b = -1;
5655
5656 while (size > 0) {
5657 if (port < 0x8000)
5658 bitmap = vmcs12->io_bitmap_a;
5659 else if (port < 0x10000)
5660 bitmap = vmcs12->io_bitmap_b;
5661 else
5662 return true;
5663 bitmap += (port & 0x7fff) / 8;
5664
5665 if (last_bitmap != bitmap)
5666 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5667 return true;
5668 if (b & (1 << (port & 7)))
5669 return true;
5670
5671 port++;
5672 size--;
5673 last_bitmap = bitmap;
5674 }
5675
5676 return false;
5677}
5678
Oliver Uptone71237d2020-02-04 15:26:30 -08005679static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5680 struct vmcs12 *vmcs12)
5681{
5682 unsigned long exit_qualification;
Oliver Upton35a57132020-02-04 15:26:31 -08005683 unsigned short port;
Oliver Uptone71237d2020-02-04 15:26:30 -08005684 int size;
5685
5686 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5687 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5688
Sean Christopherson5addc232020-04-15 13:34:53 -07005689 exit_qualification = vmx_get_exit_qual(vcpu);
Oliver Uptone71237d2020-02-04 15:26:30 -08005690
5691 port = exit_qualification >> 16;
5692 size = (exit_qualification & 7) + 1;
5693
5694 return nested_vmx_check_io_bitmaps(vcpu, port, size);
5695}
5696
Sean Christopherson55d23752018-12-03 13:53:18 -08005697/*
Miaohe Lin463bfee2020-02-14 10:44:05 +08005698 * Return 1 if we should exit from L2 to L1 to handle an MSR access,
Sean Christopherson55d23752018-12-03 13:53:18 -08005699 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
5700 * disinterest in the current event (read or write a specific MSR) by using an
5701 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
5702 */
5703static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
Sean Christopherson8e533242020-11-06 17:03:12 +08005704 struct vmcs12 *vmcs12,
5705 union vmx_exit_reason exit_reason)
Sean Christopherson55d23752018-12-03 13:53:18 -08005706{
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005707 u32 msr_index = kvm_rcx_read(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005708 gpa_t bitmap;
5709
5710 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5711 return true;
5712
5713 /*
5714 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
5715 * for the four combinations of read/write and low/high MSR numbers.
5716 * First we need to figure out which of the four to use:
5717 */
5718 bitmap = vmcs12->msr_bitmap;
Sean Christopherson8e533242020-11-06 17:03:12 +08005719 if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
Sean Christopherson55d23752018-12-03 13:53:18 -08005720 bitmap += 2048;
5721 if (msr_index >= 0xc0000000) {
5722 msr_index -= 0xc0000000;
5723 bitmap += 1024;
5724 }
5725
5726 /* Then read the msr_index'th bit from this bitmap: */
5727 if (msr_index < 1024*8) {
5728 unsigned char b;
5729 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5730 return true;
5731 return 1 & (b >> (msr_index & 7));
5732 } else
5733 return true; /* let L1 handle the wrong parameter */
5734}
5735
5736/*
5737 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
5738 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
5739 * intercept (via guest_host_mask etc.) the current event.
5740 */
5741static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5742 struct vmcs12 *vmcs12)
5743{
Sean Christopherson5addc232020-04-15 13:34:53 -07005744 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005745 int cr = exit_qualification & 15;
5746 int reg;
5747 unsigned long val;
5748
5749 switch ((exit_qualification >> 4) & 3) {
5750 case 0: /* mov to cr */
5751 reg = (exit_qualification >> 8) & 15;
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005752 val = kvm_register_read(vcpu, reg);
Sean Christopherson55d23752018-12-03 13:53:18 -08005753 switch (cr) {
5754 case 0:
5755 if (vmcs12->cr0_guest_host_mask &
5756 (val ^ vmcs12->cr0_read_shadow))
5757 return true;
5758 break;
5759 case 3:
Sean Christopherson55d23752018-12-03 13:53:18 -08005760 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5761 return true;
5762 break;
5763 case 4:
5764 if (vmcs12->cr4_guest_host_mask &
5765 (vmcs12->cr4_read_shadow ^ val))
5766 return true;
5767 break;
5768 case 8:
5769 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5770 return true;
5771 break;
5772 }
5773 break;
5774 case 2: /* clts */
5775 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5776 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5777 return true;
5778 break;
5779 case 1: /* mov from cr */
5780 switch (cr) {
5781 case 3:
5782 if (vmcs12->cpu_based_vm_exec_control &
5783 CPU_BASED_CR3_STORE_EXITING)
5784 return true;
5785 break;
5786 case 8:
5787 if (vmcs12->cpu_based_vm_exec_control &
5788 CPU_BASED_CR8_STORE_EXITING)
5789 return true;
5790 break;
5791 }
5792 break;
5793 case 3: /* lmsw */
5794 /*
5795 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
5796 * cr0. Other attempted changes are ignored, with no exit.
5797 */
5798 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5799 if (vmcs12->cr0_guest_host_mask & 0xe &
5800 (val ^ vmcs12->cr0_read_shadow))
5801 return true;
5802 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5803 !(vmcs12->cr0_read_shadow & 0x1) &&
5804 (val & 0x1))
5805 return true;
5806 break;
5807 }
5808 return false;
5809}
5810
Sean Christopherson72add912021-04-12 16:21:42 +12005811static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
5812 struct vmcs12 *vmcs12)
5813{
5814 u32 encls_leaf;
5815
5816 if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
5817 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
5818 return false;
5819
5820 encls_leaf = kvm_rax_read(vcpu);
5821 if (encls_leaf > 62)
5822 encls_leaf = 63;
5823 return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
5824}
5825
Sean Christopherson55d23752018-12-03 13:53:18 -08005826static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5827 struct vmcs12 *vmcs12, gpa_t bitmap)
5828{
5829 u32 vmx_instruction_info;
5830 unsigned long field;
5831 u8 b;
5832
5833 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5834 return true;
5835
5836 /* Decode instruction info and find the field to access */
5837 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5838 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5839
5840 /* Out-of-range fields always cause a VM exit from L2 to L1 */
5841 if (field >> 15)
5842 return true;
5843
5844 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5845 return true;
5846
5847 return 1 & (b >> (field & 7));
5848}
5849
Oliver Uptonb045ae92020-04-14 22:47:45 +00005850static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
5851{
5852 u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
5853
5854 if (nested_cpu_has_mtf(vmcs12))
5855 return true;
5856
5857 /*
5858 * An MTF VM-exit may be injected into the guest by setting the
5859 * interruption-type to 7 (other event) and the vector field to 0. Such
5860 * is the case regardless of the 'monitor trap flag' VM-execution
5861 * control.
5862 */
5863 return entry_intr_info == (INTR_INFO_VALID_MASK
5864 | INTR_TYPE_OTHER_EVENT);
5865}
5866
Sean Christopherson55d23752018-12-03 13:53:18 -08005867/*
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005868 * Return true if L0 wants to handle an exit from L2 regardless of whether or not
5869 * L1 wants the exit. Only call this when in is_guest_mode (L2).
Sean Christopherson55d23752018-12-03 13:53:18 -08005870 */
Sean Christopherson8e533242020-11-06 17:03:12 +08005871static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
5872 union vmx_exit_reason exit_reason)
Sean Christopherson55d23752018-12-03 13:53:18 -08005873{
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005874 u32 intr_info;
5875
Sean Christopherson8e533242020-11-06 17:03:12 +08005876 switch ((u16)exit_reason.basic) {
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005877 case EXIT_REASON_EXCEPTION_NMI:
Sean Christopherson87915852020-04-15 13:34:54 -07005878 intr_info = vmx_get_intr_info(vcpu);
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005879 if (is_nmi(intr_info))
5880 return true;
5881 else if (is_page_fault(intr_info))
Sean Christopherson18712c12021-08-11 21:56:15 -07005882 return vcpu->arch.apf.host_apf_flags ||
5883 vmx_need_pf_intercept(vcpu);
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005884 else if (is_debug(intr_info) &&
5885 vcpu->guest_debug &
5886 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5887 return true;
5888 else if (is_breakpoint(intr_info) &&
5889 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5890 return true;
Sean Christophersonb33bb782021-06-22 10:22:44 -07005891 else if (is_alignment_check(intr_info) &&
5892 !vmx_guest_inject_ac(vcpu))
5893 return true;
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005894 return false;
5895 case EXIT_REASON_EXTERNAL_INTERRUPT:
5896 return true;
5897 case EXIT_REASON_MCE_DURING_VMENTRY:
5898 return true;
5899 case EXIT_REASON_EPT_VIOLATION:
5900 /*
5901 * L0 always deals with the EPT violation. If nested EPT is
5902 * used, and the nested mmu code discovers that the address is
5903 * missing in the guest EPT table (EPT12), the EPT violation
5904 * will be injected with nested_ept_inject_page_fault()
5905 */
5906 return true;
5907 case EXIT_REASON_EPT_MISCONFIG:
5908 /*
5909 * L2 never uses directly L1's EPT, but rather L0's own EPT
5910 * table (shadow on EPT) or a merged EPT table that L0 built
5911 * (EPT on EPT). So any problems with the structure of the
5912 * table is L0's fault.
5913 */
5914 return true;
5915 case EXIT_REASON_PREEMPTION_TIMER:
5916 return true;
5917 case EXIT_REASON_PML_FULL:
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08005918 /*
5919 * PML is emulated for an L1 VMM and should never be enabled in
5920 * vmcs02, always "handle" PML_FULL by exiting to userspace.
5921 */
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005922 return true;
5923 case EXIT_REASON_VMFUNC:
5924 /* VM functions are emulated through L2->L0 vmexits. */
5925 return true;
Chenyi Qiang24a996a2021-09-14 17:50:41 +08005926 case EXIT_REASON_BUS_LOCK:
5927 /*
5928 * At present, bus lock VM exit is never exposed to L1.
5929 * Handle L2's bus locks in L0 directly.
5930 */
5931 return true;
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005932 default:
5933 break;
5934 }
5935 return false;
5936}
5937
5938/*
5939 * Return 1 if L1 wants to intercept an exit from L2. Only call this when in
5940 * is_guest_mode (L2).
5941 */
Sean Christopherson8e533242020-11-06 17:03:12 +08005942static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
5943 union vmx_exit_reason exit_reason)
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005944{
Sean Christopherson55d23752018-12-03 13:53:18 -08005945 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson9bd4af22020-04-21 00:53:27 -07005946 u32 intr_info;
Sean Christopherson55d23752018-12-03 13:53:18 -08005947
Sean Christopherson8e533242020-11-06 17:03:12 +08005948 switch ((u16)exit_reason.basic) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005949 case EXIT_REASON_EXCEPTION_NMI:
Sean Christopherson87915852020-04-15 13:34:54 -07005950 intr_info = vmx_get_intr_info(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005951 if (is_nmi(intr_info))
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005952 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005953 else if (is_page_fault(intr_info))
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005954 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005955 return vmcs12->exception_bitmap &
5956 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5957 case EXIT_REASON_EXTERNAL_INTERRUPT:
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005958 return nested_exit_on_intr(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005959 case EXIT_REASON_TRIPLE_FAULT:
5960 return true;
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08005961 case EXIT_REASON_INTERRUPT_WINDOW:
5962 return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08005963 case EXIT_REASON_NMI_WINDOW:
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08005964 return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08005965 case EXIT_REASON_TASK_SWITCH:
5966 return true;
5967 case EXIT_REASON_CPUID:
5968 return true;
5969 case EXIT_REASON_HLT:
5970 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5971 case EXIT_REASON_INVD:
5972 return true;
5973 case EXIT_REASON_INVLPG:
5974 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5975 case EXIT_REASON_RDPMC:
5976 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5977 case EXIT_REASON_RDRAND:
5978 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5979 case EXIT_REASON_RDSEED:
5980 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5981 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5982 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5983 case EXIT_REASON_VMREAD:
5984 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5985 vmcs12->vmread_bitmap);
5986 case EXIT_REASON_VMWRITE:
5987 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5988 vmcs12->vmwrite_bitmap);
5989 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5990 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5991 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5992 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5993 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5994 /*
5995 * VMX instructions trap unconditionally. This allows L1 to
5996 * emulate them for its L2 guest, i.e., allows 3-level nesting!
5997 */
5998 return true;
5999 case EXIT_REASON_CR_ACCESS:
6000 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
6001 case EXIT_REASON_DR_ACCESS:
6002 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
6003 case EXIT_REASON_IO_INSTRUCTION:
6004 return nested_vmx_exit_handled_io(vcpu, vmcs12);
6005 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
6006 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
6007 case EXIT_REASON_MSR_READ:
6008 case EXIT_REASON_MSR_WRITE:
6009 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
6010 case EXIT_REASON_INVALID_STATE:
6011 return true;
6012 case EXIT_REASON_MWAIT_INSTRUCTION:
6013 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
6014 case EXIT_REASON_MONITOR_TRAP_FLAG:
Oliver Uptonb045ae92020-04-14 22:47:45 +00006015 return nested_vmx_exit_handled_mtf(vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006016 case EXIT_REASON_MONITOR_INSTRUCTION:
6017 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
6018 case EXIT_REASON_PAUSE_INSTRUCTION:
6019 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
6020 nested_cpu_has2(vmcs12,
6021 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
6022 case EXIT_REASON_MCE_DURING_VMENTRY:
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006023 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006024 case EXIT_REASON_TPR_BELOW_THRESHOLD:
6025 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
6026 case EXIT_REASON_APIC_ACCESS:
6027 case EXIT_REASON_APIC_WRITE:
6028 case EXIT_REASON_EOI_INDUCED:
6029 /*
6030 * The controls for "virtualize APIC accesses," "APIC-
6031 * register virtualization," and "virtual-interrupt
6032 * delivery" only come from vmcs12.
6033 */
6034 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006035 case EXIT_REASON_INVPCID:
6036 return
6037 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
6038 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
6039 case EXIT_REASON_WBINVD:
6040 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6041 case EXIT_REASON_XSETBV:
6042 return true;
6043 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
6044 /*
6045 * This should never happen, since it is not possible to
6046 * set XSS to a non-zero value---neither in L1 nor in L2.
6047 * If if it were, XSS would have to be checked against
6048 * the XSS exit bitmap in vmcs12.
6049 */
6050 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Tao Xubf653b72019-07-16 14:55:51 +08006051 case EXIT_REASON_UMWAIT:
6052 case EXIT_REASON_TPAUSE:
6053 return nested_cpu_has2(vmcs12,
6054 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
Sean Christopherson72add912021-04-12 16:21:42 +12006055 case EXIT_REASON_ENCLS:
6056 return nested_vmx_exit_handled_encls(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006057 default:
6058 return true;
6059 }
6060}
6061
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006062/*
6063 * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was
6064 * reflected into L1.
6065 */
Sean Christophersonf47baae2020-04-15 10:55:16 -07006066bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006067{
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006068 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson8e533242020-11-06 17:03:12 +08006069 union vmx_exit_reason exit_reason = vmx->exit_reason;
Sean Christopherson87796552020-04-22 17:11:27 -07006070 unsigned long exit_qual;
6071 u32 exit_intr_info;
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006072
6073 WARN_ON_ONCE(vmx->nested.nested_run_pending);
6074
6075 /*
6076 * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
6077 * has already loaded L2's state.
6078 */
6079 if (unlikely(vmx->fail)) {
6080 trace_kvm_nested_vmenter_failed(
6081 "hardware VM-instruction error: ",
6082 vmcs_read32(VM_INSTRUCTION_ERROR));
6083 exit_intr_info = 0;
6084 exit_qual = 0;
6085 goto reflect_vmexit;
6086 }
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006087
David Edmondson0a62a032021-09-20 11:37:35 +01006088 trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX);
Sean Christopherson236871b2020-04-15 10:55:13 -07006089
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006090 /* If L0 (KVM) wants the exit, it trumps L1's desires. */
6091 if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
6092 return false;
6093
6094 /* If L1 doesn't want the exit, handle it in L0. */
6095 if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006096 return false;
6097
6098 /*
Sean Christopherson1d283062020-04-15 10:55:15 -07006099 * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For
6100 * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
6101 * need to be synthesized by querying the in-kernel LAPIC, but external
6102 * interrupts are never reflected to L1 so it's a non-issue.
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006103 */
Sean Christopherson02f19652020-09-23 13:13:49 -07006104 exit_intr_info = vmx_get_intr_info(vcpu);
Sean Christophersonf315f2b2020-09-23 13:13:45 -07006105 if (is_exception_with_error_code(exit_intr_info)) {
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006106 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6107
6108 vmcs12->vm_exit_intr_error_code =
6109 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
6110 }
Sean Christopherson02f19652020-09-23 13:13:49 -07006111 exit_qual = vmx_get_exit_qual(vcpu);
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006112
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006113reflect_vmexit:
Sean Christopherson8e533242020-11-06 17:03:12 +08006114 nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006115 return true;
6116}
Sean Christopherson55d23752018-12-03 13:53:18 -08006117
6118static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
6119 struct kvm_nested_state __user *user_kvm_nested_state,
6120 u32 user_data_size)
6121{
6122 struct vcpu_vmx *vmx;
6123 struct vmcs12 *vmcs12;
6124 struct kvm_nested_state kvm_state = {
6125 .flags = 0,
Liran Alon6ca00df2019-06-16 15:03:10 +03006126 .format = KVM_STATE_NESTED_FORMAT_VMX,
Sean Christopherson55d23752018-12-03 13:53:18 -08006127 .size = sizeof(kvm_state),
Peter Shier850448f2020-05-26 14:51:06 -07006128 .hdr.vmx.flags = 0,
Yu Zhang64c78502021-09-30 01:51:53 +08006129 .hdr.vmx.vmxon_pa = INVALID_GPA,
6130 .hdr.vmx.vmcs12_pa = INVALID_GPA,
Peter Shier850448f2020-05-26 14:51:06 -07006131 .hdr.vmx.preemption_timer_deadline = 0,
Sean Christopherson55d23752018-12-03 13:53:18 -08006132 };
Liran Alon6ca00df2019-06-16 15:03:10 +03006133 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6134 &user_kvm_nested_state->data.vmx[0];
Sean Christopherson55d23752018-12-03 13:53:18 -08006135
6136 if (!vcpu)
Liran Alon6ca00df2019-06-16 15:03:10 +03006137 return kvm_state.size + sizeof(*user_vmx_nested_state);
Sean Christopherson55d23752018-12-03 13:53:18 -08006138
6139 vmx = to_vmx(vcpu);
6140 vmcs12 = get_vmcs12(vcpu);
6141
Sean Christopherson55d23752018-12-03 13:53:18 -08006142 if (nested_vmx_allowed(vcpu) &&
6143 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006144 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
6145 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
Sean Christopherson55d23752018-12-03 13:53:18 -08006146
6147 if (vmx_has_valid_vmcs12(vcpu)) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006148 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006149
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02006150 /* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
6151 if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
Liran Alon323d73a2019-06-26 16:09:27 +03006152 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
6153
Sean Christopherson55d23752018-12-03 13:53:18 -08006154 if (is_guest_mode(vcpu) &&
6155 nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006156 vmcs12->vmcs_link_pointer != INVALID_GPA)
Liran Alon6ca00df2019-06-16 15:03:10 +03006157 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006158 }
6159
6160 if (vmx->nested.smm.vmxon)
Liran Alon6ca00df2019-06-16 15:03:10 +03006161 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
Sean Christopherson55d23752018-12-03 13:53:18 -08006162
6163 if (vmx->nested.smm.guest_mode)
Liran Alon6ca00df2019-06-16 15:03:10 +03006164 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
Sean Christopherson55d23752018-12-03 13:53:18 -08006165
6166 if (is_guest_mode(vcpu)) {
6167 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
6168
6169 if (vmx->nested.nested_run_pending)
6170 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08006171
6172 if (vmx->nested.mtf_pending)
6173 kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
Peter Shier850448f2020-05-26 14:51:06 -07006174
6175 if (nested_cpu_has_preemption_timer(vmcs12) &&
6176 vmx->nested.has_preemption_timer_deadline) {
6177 kvm_state.hdr.vmx.flags |=
6178 KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
6179 kvm_state.hdr.vmx.preemption_timer_deadline =
6180 vmx->nested.preemption_timer_deadline;
6181 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006182 }
6183 }
6184
6185 if (user_data_size < kvm_state.size)
6186 goto out;
6187
6188 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
6189 return -EFAULT;
6190
6191 if (!vmx_has_valid_vmcs12(vcpu))
6192 goto out;
6193
6194 /*
6195 * When running L2, the authoritative vmcs12 state is in the
6196 * vmcs02. When running L1, the authoritative vmcs12 state is
6197 * in the shadow or enlightened vmcs linked to vmcs01, unless
Sean Christopherson3731905ef2019-05-07 08:36:27 -07006198 * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
Sean Christopherson55d23752018-12-03 13:53:18 -08006199 * vmcs12 state is in the vmcs12 already.
6200 */
6201 if (is_guest_mode(vcpu)) {
Sean Christopherson3731905ef2019-05-07 08:36:27 -07006202 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
Sean Christopherson7952d762019-05-07 08:36:29 -07006203 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
Maxim Levitskyd51e1d32021-01-14 22:54:47 +02006204 } else {
6205 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
6206 if (!vmx->nested.need_vmcs12_to_shadow_sync) {
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02006207 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02006208 /*
6209 * L1 hypervisor is not obliged to keep eVMCS
6210 * clean fields data always up-to-date while
6211 * not in guest mode, 'hv_clean_fields' is only
6212 * supposed to be actual upon vmentry so we need
6213 * to ignore it here and do full copy.
6214 */
6215 copy_enlightened_to_vmcs12(vmx, 0);
Maxim Levitskyd51e1d32021-01-14 22:54:47 +02006216 else if (enable_shadow_vmcs)
6217 copy_shadow_to_vmcs12(vmx);
6218 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006219 }
6220
Liran Alon6ca00df2019-06-16 15:03:10 +03006221 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
6222 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
6223
Tom Roeder3a33d032019-01-24 13:48:20 -08006224 /*
6225 * Copy over the full allocated size of vmcs12 rather than just the size
6226 * of the struct.
6227 */
Liran Alon6ca00df2019-06-16 15:03:10 +03006228 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
Sean Christopherson55d23752018-12-03 13:53:18 -08006229 return -EFAULT;
6230
6231 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006232 vmcs12->vmcs_link_pointer != INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006233 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
Tom Roeder3a33d032019-01-24 13:48:20 -08006234 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
Sean Christopherson55d23752018-12-03 13:53:18 -08006235 return -EFAULT;
6236 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006237out:
6238 return kvm_state.size;
6239}
6240
6241/*
6242 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
6243 */
6244void vmx_leave_nested(struct kvm_vcpu *vcpu)
6245{
6246 if (is_guest_mode(vcpu)) {
6247 to_vmx(vcpu)->nested.nested_run_pending = 0;
6248 nested_vmx_vmexit(vcpu, -1, 0, 0);
6249 }
6250 free_nested(vcpu);
6251}
6252
6253static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
6254 struct kvm_nested_state __user *user_kvm_nested_state,
6255 struct kvm_nested_state *kvm_state)
6256{
6257 struct vcpu_vmx *vmx = to_vmx(vcpu);
6258 struct vmcs12 *vmcs12;
Sean Christopherson68cda402020-05-11 15:05:29 -07006259 enum vm_entry_failure_code ignored;
Liran Alon6ca00df2019-06-16 15:03:10 +03006260 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6261 &user_kvm_nested_state->data.vmx[0];
Sean Christopherson55d23752018-12-03 13:53:18 -08006262 int ret;
6263
Liran Alon6ca00df2019-06-16 15:03:10 +03006264 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
Sean Christopherson55d23752018-12-03 13:53:18 -08006265 return -EINVAL;
6266
Yu Zhang64c78502021-09-30 01:51:53 +08006267 if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006268 if (kvm_state->hdr.vmx.smm.flags)
Sean Christopherson55d23752018-12-03 13:53:18 -08006269 return -EINVAL;
6270
Yu Zhang64c78502021-09-30 01:51:53 +08006271 if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08006272 return -EINVAL;
6273
Liran Alon323d73a2019-06-26 16:09:27 +03006274 /*
6275 * KVM_STATE_NESTED_EVMCS used to signal that KVM should
6276 * enable eVMCS capability on vCPU. However, since then
6277 * code was changed such that flag signals vmcs12 should
6278 * be copied into eVMCS in guest memory.
6279 *
6280 * To preserve backwards compatability, allow user
6281 * to set this flag even when there is no VMXON region.
6282 */
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006283 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
6284 return -EINVAL;
6285 } else {
6286 if (!nested_vmx_allowed(vcpu))
6287 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006288
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006289 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
6290 return -EINVAL;
Liran Alon323d73a2019-06-26 16:09:27 +03006291 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006292
Liran Alon6ca00df2019-06-16 15:03:10 +03006293 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
Sean Christopherson55d23752018-12-03 13:53:18 -08006294 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6295 return -EINVAL;
6296
Liran Alon6ca00df2019-06-16 15:03:10 +03006297 if (kvm_state->hdr.vmx.smm.flags &
Sean Christopherson55d23752018-12-03 13:53:18 -08006298 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
6299 return -EINVAL;
6300
Paolo Bonzini5e105c82020-07-27 08:55:09 -04006301 if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
6302 return -EINVAL;
6303
Sean Christopherson55d23752018-12-03 13:53:18 -08006304 /*
6305 * SMM temporarily disables VMX, so we cannot be in guest mode,
6306 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
6307 * must be zero.
6308 */
Liran Alon65b712f12019-06-25 14:26:42 +03006309 if (is_smm(vcpu) ?
6310 (kvm_state->flags &
6311 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
6312 : kvm_state->hdr.vmx.smm.flags)
Sean Christopherson55d23752018-12-03 13:53:18 -08006313 return -EINVAL;
6314
Liran Alon6ca00df2019-06-16 15:03:10 +03006315 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6316 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
Sean Christopherson55d23752018-12-03 13:53:18 -08006317 return -EINVAL;
6318
Liran Alon323d73a2019-06-26 16:09:27 +03006319 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
6320 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006321 return -EINVAL;
6322
Liran Alon323d73a2019-06-26 16:09:27 +03006323 vmx_leave_nested(vcpu);
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006324
Yu Zhang64c78502021-09-30 01:51:53 +08006325 if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08006326 return 0;
6327
Liran Alon6ca00df2019-06-16 15:03:10 +03006328 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
Sean Christopherson55d23752018-12-03 13:53:18 -08006329 ret = enter_vmx_operation(vcpu);
6330 if (ret)
6331 return ret;
6332
Paolo Bonzini0f02bd02020-07-27 09:00:37 -04006333 /* Empty 'VMXON' state is permitted if no VMCS loaded */
6334 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
6335 /* See vmx_has_valid_vmcs12. */
6336 if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
6337 (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
Yu Zhang64c78502021-09-30 01:51:53 +08006338 (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA))
Paolo Bonzini0f02bd02020-07-27 09:00:37 -04006339 return -EINVAL;
6340 else
6341 return 0;
6342 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006343
Yu Zhang64c78502021-09-30 01:51:53 +08006344 if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006345 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
6346 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
Sean Christopherson55d23752018-12-03 13:53:18 -08006347 return -EINVAL;
6348
Liran Alon6ca00df2019-06-16 15:03:10 +03006349 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
Sean Christopherson55d23752018-12-03 13:53:18 -08006350 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
6351 /*
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01006352 * nested_vmx_handle_enlightened_vmptrld() cannot be called
6353 * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
6354 * restored yet. EVMCS will be mapped from
6355 * nested_get_vmcs12_pages().
Sean Christopherson55d23752018-12-03 13:53:18 -08006356 */
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02006357 vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
Paolo Bonzini729c15c2020-09-22 06:53:57 -04006358 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08006359 } else {
6360 return -EINVAL;
6361 }
6362
Liran Alon6ca00df2019-06-16 15:03:10 +03006363 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006364 vmx->nested.smm.vmxon = true;
6365 vmx->nested.vmxon = false;
6366
Liran Alon6ca00df2019-06-16 15:03:10 +03006367 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
Sean Christopherson55d23752018-12-03 13:53:18 -08006368 vmx->nested.smm.guest_mode = true;
6369 }
6370
6371 vmcs12 = get_vmcs12(vcpu);
Liran Alon6ca00df2019-06-16 15:03:10 +03006372 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08006373 return -EFAULT;
6374
6375 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
6376 return -EINVAL;
6377
6378 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6379 return 0;
6380
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006381 vmx->nested.nested_run_pending =
6382 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
6383
Oliver Upton5ef8acb2020-02-07 02:36:07 -08006384 vmx->nested.mtf_pending =
6385 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
6386
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006387 ret = -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006388 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006389 vmcs12->vmcs_link_pointer != INVALID_GPA) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006390 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
6391
Liran Alon6ca00df2019-06-16 15:03:10 +03006392 if (kvm_state->size <
6393 sizeof(*kvm_state) +
6394 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006395 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006396
6397 if (copy_from_user(shadow_vmcs12,
Liran Alon6ca00df2019-06-16 15:03:10 +03006398 user_vmx_nested_state->shadow_vmcs12,
6399 sizeof(*shadow_vmcs12))) {
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006400 ret = -EFAULT;
6401 goto error_guest_mode;
6402 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006403
6404 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
6405 !shadow_vmcs12->hdr.shadow_vmcs)
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006406 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006407 }
6408
Paolo Bonzini83d31e52020-07-09 13:12:09 -04006409 vmx->nested.has_preemption_timer_deadline = false;
Peter Shier850448f2020-05-26 14:51:06 -07006410 if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
6411 vmx->nested.has_preemption_timer_deadline = true;
6412 vmx->nested.preemption_timer_deadline =
6413 kvm_state->hdr.vmx.preemption_timer_deadline;
6414 }
6415
Sean Christopherson5478ba32019-04-11 12:18:06 -07006416 if (nested_vmx_check_controls(vcpu, vmcs12) ||
6417 nested_vmx_check_host_state(vcpu, vmcs12) ||
Sean Christopherson68cda402020-05-11 15:05:29 -07006418 nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006419 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006420
6421 vmx->nested.dirty_vmcs12 = true;
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01006422 vmx->nested.force_msr_bitmap_recalc = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006423 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006424 if (ret)
6425 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006426
6427 return 0;
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006428
6429error_guest_mode:
6430 vmx->nested.nested_run_pending = 0;
6431 return ret;
Sean Christopherson55d23752018-12-03 13:53:18 -08006432}
6433
Xiaoyao Li1b842922019-10-20 17:11:01 +08006434void nested_vmx_set_vmcs_shadowing_bitmap(void)
Sean Christopherson55d23752018-12-03 13:53:18 -08006435{
6436 if (enable_shadow_vmcs) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006437 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
Sean Christophersonfadcead2019-05-07 08:36:23 -07006438 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
Sean Christopherson55d23752018-12-03 13:53:18 -08006439 }
6440}
6441
6442/*
Sean Christophersonba1f8242021-06-18 14:46:58 -07006443 * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo
6444 * that madness to get the encoding for comparison.
6445 */
6446#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
6447
6448static u64 nested_vmx_calc_vmcs_enum_msr(void)
6449{
6450 /*
6451 * Note these are the so called "index" of the VMCS field encoding, not
6452 * the index into vmcs12.
6453 */
6454 unsigned int max_idx, idx;
6455 int i;
6456
6457 /*
6458 * For better or worse, KVM allows VMREAD/VMWRITE to all fields in
6459 * vmcs12, regardless of whether or not the associated feature is
6460 * exposed to L1. Simply find the field with the highest index.
6461 */
6462 max_idx = 0;
6463 for (i = 0; i < nr_vmcs12_fields; i++) {
6464 /* The vmcs12 table is very, very sparsely populated. */
6465 if (!vmcs_field_to_offset_table[i])
6466 continue;
6467
6468 idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
6469 if (idx > max_idx)
6470 max_idx = idx;
6471 }
6472
6473 return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
6474}
6475
6476/*
Sean Christopherson55d23752018-12-03 13:53:18 -08006477 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
6478 * returned for the various VMX controls MSRs when nested VMX is enabled.
6479 * The same values should also be used to verify that vmcs12 control fields are
6480 * valid during nested entry from L1 to L2.
6481 * Each of these control msrs has a low and high 32-bit half: A low bit is on
6482 * if the corresponding bit in the (32-bit) control field *must* be on, and a
6483 * bit in the high half is on if the corresponding bit in the control field
6484 * may be on. See also vmx_control_verify().
6485 */
Vitaly Kuznetsova4443262020-02-20 18:22:04 +01006486void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
Sean Christopherson55d23752018-12-03 13:53:18 -08006487{
6488 /*
6489 * Note that as a general rule, the high half of the MSRs (bits in
6490 * the control fields which may be 1) should be initialized by the
6491 * intersection of the underlying hardware's MSR (i.e., features which
6492 * can be supported) and the list of features we want to expose -
6493 * because they are known to be properly supported in our code.
6494 * Also, usually, the low half of the MSRs (bits which must be 1) can
6495 * be set to 0, meaning that L1 may turn off any of these bits. The
6496 * reason is that if one of these bits is necessary, it will appear
6497 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
6498 * fields of vmcs01 and vmcs02, will turn these bits off - and
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006499 * nested_vmx_l1_wants_exit() will not pass related exits to L1.
Sean Christopherson55d23752018-12-03 13:53:18 -08006500 * These rules have exceptions below.
6501 */
6502
6503 /* pin-based controls */
6504 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
6505 msrs->pinbased_ctls_low,
6506 msrs->pinbased_ctls_high);
6507 msrs->pinbased_ctls_low |=
6508 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6509 msrs->pinbased_ctls_high &=
6510 PIN_BASED_EXT_INTR_MASK |
6511 PIN_BASED_NMI_EXITING |
6512 PIN_BASED_VIRTUAL_NMIS |
Vitaly Kuznetsova4443262020-02-20 18:22:04 +01006513 (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
Sean Christopherson55d23752018-12-03 13:53:18 -08006514 msrs->pinbased_ctls_high |=
6515 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6516 PIN_BASED_VMX_PREEMPTION_TIMER;
6517
6518 /* exit controls */
6519 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
6520 msrs->exit_ctls_low,
6521 msrs->exit_ctls_high);
6522 msrs->exit_ctls_low =
6523 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
6524
6525 msrs->exit_ctls_high &=
6526#ifdef CONFIG_X86_64
6527 VM_EXIT_HOST_ADDR_SPACE_SIZE |
6528#endif
Chenyi Qiangefc83132020-08-28 16:56:18 +08006529 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
6530 VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006531 msrs->exit_ctls_high |=
6532 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
6533 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
6534 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
6535
6536 /* We support free control of debug control saving. */
6537 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
6538
6539 /* entry controls */
6540 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
6541 msrs->entry_ctls_low,
6542 msrs->entry_ctls_high);
6543 msrs->entry_ctls_low =
6544 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
6545 msrs->entry_ctls_high &=
6546#ifdef CONFIG_X86_64
6547 VM_ENTRY_IA32E_MODE |
6548#endif
Chenyi Qiangefc83132020-08-28 16:56:18 +08006549 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
6550 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006551 msrs->entry_ctls_high |=
6552 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
6553
6554 /* We support free control of debug control loading. */
6555 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
6556
6557 /* cpu-based controls */
6558 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
6559 msrs->procbased_ctls_low,
6560 msrs->procbased_ctls_high);
6561 msrs->procbased_ctls_low =
6562 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6563 msrs->procbased_ctls_high &=
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08006564 CPU_BASED_INTR_WINDOW_EXITING |
Xiaoyao Li5e3d3942019-12-06 16:45:26 +08006565 CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
Sean Christopherson55d23752018-12-03 13:53:18 -08006566 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
6567 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
6568 CPU_BASED_CR3_STORE_EXITING |
6569#ifdef CONFIG_X86_64
6570 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
6571#endif
6572 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
6573 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
6574 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
6575 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
6576 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
6577 /*
6578 * We can allow some features even when not supported by the
6579 * hardware. For example, L1 can specify an MSR bitmap - and we
6580 * can use it to avoid exits to L1 - even when L0 runs L2
6581 * without MSR bitmaps.
6582 */
6583 msrs->procbased_ctls_high |=
6584 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6585 CPU_BASED_USE_MSR_BITMAPS;
6586
6587 /* We support free control of CR3 access interception. */
6588 msrs->procbased_ctls_low &=
6589 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
6590
6591 /*
6592 * secondary cpu-based controls. Do not include those that
Xiaoyao Li7c1b7612020-07-09 12:34:25 +08006593 * depend on CPUID bits, they are added later by
6594 * vmx_vcpu_after_set_cpuid.
Sean Christopherson55d23752018-12-03 13:53:18 -08006595 */
Vitaly Kuznetsov6b1971c2019-02-07 11:42:14 +01006596 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
6597 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
6598 msrs->secondary_ctls_low,
6599 msrs->secondary_ctls_high);
6600
Sean Christopherson55d23752018-12-03 13:53:18 -08006601 msrs->secondary_ctls_low = 0;
6602 msrs->secondary_ctls_high &=
6603 SECONDARY_EXEC_DESC |
Sean Christopherson7f3603b2020-09-23 09:50:47 -07006604 SECONDARY_EXEC_ENABLE_RDTSCP |
Sean Christopherson55d23752018-12-03 13:53:18 -08006605 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini6defc592019-07-02 14:39:29 +02006606 SECONDARY_EXEC_WBINVD_EXITING |
Sean Christopherson55d23752018-12-03 13:53:18 -08006607 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6608 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini6defc592019-07-02 14:39:29 +02006609 SECONDARY_EXEC_RDRAND_EXITING |
6610 SECONDARY_EXEC_ENABLE_INVPCID |
6611 SECONDARY_EXEC_RDSEED_EXITING |
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01006612 SECONDARY_EXEC_XSAVES |
6613 SECONDARY_EXEC_TSC_SCALING;
Sean Christopherson55d23752018-12-03 13:53:18 -08006614
6615 /*
6616 * We can emulate "VMCS shadowing," even if the hardware
6617 * doesn't support it.
6618 */
6619 msrs->secondary_ctls_high |=
6620 SECONDARY_EXEC_SHADOW_VMCS;
6621
6622 if (enable_ept) {
6623 /* nested EPT: emulate EPT also to L1 */
6624 msrs->secondary_ctls_high |=
6625 SECONDARY_EXEC_ENABLE_EPT;
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08006626 msrs->ept_caps =
6627 VMX_EPT_PAGE_WALK_4_BIT |
6628 VMX_EPT_PAGE_WALK_5_BIT |
6629 VMX_EPTP_WB_BIT |
Sean Christopherson96d47012020-03-02 18:02:40 -08006630 VMX_EPT_INVEPT_BIT |
6631 VMX_EPT_EXECUTE_ONLY_BIT;
6632
Sean Christopherson55d23752018-12-03 13:53:18 -08006633 msrs->ept_caps &= ept_caps;
6634 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
6635 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
6636 VMX_EPT_1GB_PAGE_BIT;
6637 if (enable_ept_ad_bits) {
6638 msrs->secondary_ctls_high |=
6639 SECONDARY_EXEC_ENABLE_PML;
6640 msrs->ept_caps |= VMX_EPT_AD_BIT;
6641 }
6642 }
6643
6644 if (cpu_has_vmx_vmfunc()) {
6645 msrs->secondary_ctls_high |=
6646 SECONDARY_EXEC_ENABLE_VMFUNC;
6647 /*
6648 * Advertise EPTP switching unconditionally
6649 * since we emulate it
6650 */
6651 if (enable_ept)
6652 msrs->vmfunc_controls =
6653 VMX_VMFUNC_EPTP_SWITCHING;
6654 }
6655
6656 /*
6657 * Old versions of KVM use the single-context version without
6658 * checking for support, so declare that it is supported even
6659 * though it is treated as global context. The alternative is
6660 * not failing the single-context invvpid, and it is worse.
6661 */
6662 if (enable_vpid) {
6663 msrs->secondary_ctls_high |=
6664 SECONDARY_EXEC_ENABLE_VPID;
6665 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
6666 VMX_VPID_EXTENT_SUPPORTED_MASK;
6667 }
6668
6669 if (enable_unrestricted_guest)
6670 msrs->secondary_ctls_high |=
6671 SECONDARY_EXEC_UNRESTRICTED_GUEST;
6672
6673 if (flexpriority_enabled)
6674 msrs->secondary_ctls_high |=
6675 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6676
Sean Christopherson72add912021-04-12 16:21:42 +12006677 if (enable_sgx)
6678 msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
6679
Sean Christopherson55d23752018-12-03 13:53:18 -08006680 /* miscellaneous data */
6681 rdmsr(MSR_IA32_VMX_MISC,
6682 msrs->misc_low,
6683 msrs->misc_high);
6684 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
6685 msrs->misc_low |=
6686 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
6687 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Yadong Qibf0cd882020-11-06 14:51:22 +08006688 VMX_MISC_ACTIVITY_HLT |
6689 VMX_MISC_ACTIVITY_WAIT_SIPI;
Sean Christopherson55d23752018-12-03 13:53:18 -08006690 msrs->misc_high = 0;
6691
6692 /*
6693 * This MSR reports some information about VMX support. We
6694 * should return information about the VMX we emulate for the
6695 * guest, and the VMCS structure we give it - not about the
6696 * VMX support of the underlying hardware.
6697 */
6698 msrs->basic =
6699 VMCS12_REVISION |
6700 VMX_BASIC_TRUE_CTLS |
6701 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
6702 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
6703
6704 if (cpu_has_vmx_basic_inout())
6705 msrs->basic |= VMX_BASIC_INOUT;
6706
6707 /*
6708 * These MSRs specify bits which the guest must keep fixed on
6709 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
6710 * We picked the standard core2 setting.
6711 */
6712#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
6713#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
6714 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
6715 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
6716
6717 /* These MSRs specify bits which the guest must keep fixed off. */
6718 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
6719 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
6720
Sean Christophersonba1f8242021-06-18 14:46:58 -07006721 msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
Sean Christopherson55d23752018-12-03 13:53:18 -08006722}
6723
6724void nested_vmx_hardware_unsetup(void)
6725{
6726 int i;
6727
6728 if (enable_shadow_vmcs) {
6729 for (i = 0; i < VMX_BITMAP_NR; i++)
6730 free_page((unsigned long)vmx_bitmap[i]);
6731 }
6732}
6733
Sean Christopherson6c1c6e52020-05-06 13:46:53 -07006734__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
Sean Christopherson55d23752018-12-03 13:53:18 -08006735{
6736 int i;
6737
6738 if (!cpu_has_vmx_shadow_vmcs())
6739 enable_shadow_vmcs = 0;
6740 if (enable_shadow_vmcs) {
6741 for (i = 0; i < VMX_BITMAP_NR; i++) {
Ben Gardon41836832019-02-11 11:02:52 -08006742 /*
6743 * The vmx_bitmap is not tied to a VM and so should
6744 * not be charged to a memcg.
6745 */
Sean Christopherson55d23752018-12-03 13:53:18 -08006746 vmx_bitmap[i] = (unsigned long *)
6747 __get_free_page(GFP_KERNEL);
6748 if (!vmx_bitmap[i]) {
6749 nested_vmx_hardware_unsetup();
6750 return -ENOMEM;
6751 }
6752 }
6753
6754 init_vmcs_shadow_fields();
6755 }
6756
Liran Aloncc877672019-11-18 21:11:21 +02006757 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
6758 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
6759 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
6760 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
6761 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
6762 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
6763 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
6764 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
6765 exit_handlers[EXIT_REASON_VMON] = handle_vmon;
6766 exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
6767 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
6768 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
Sean Christopherson55d23752018-12-03 13:53:18 -08006769
Sean Christopherson55d23752018-12-03 13:53:18 -08006770 return 0;
6771}
Paolo Bonzini33b22172020-04-17 10:24:18 -04006772
6773struct kvm_x86_nested_ops vmx_nested_ops = {
6774 .check_events = vmx_check_nested_events,
Sean Christophersond2060bd2020-04-22 19:25:39 -07006775 .hv_timer_pending = nested_vmx_preemption_timer_pending,
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08006776 .triple_fault = nested_vmx_triple_fault,
Paolo Bonzini33b22172020-04-17 10:24:18 -04006777 .get_state = vmx_get_nested_state,
6778 .set_state = vmx_set_nested_state,
Paolo Bonzini9a78e152021-01-08 11:43:08 -05006779 .get_nested_state_pages = vmx_get_nested_state_pages,
Sean Christopherson02f5fb22020-06-22 14:58:32 -07006780 .write_log_dirty = nested_vmx_write_pml_buffer,
Paolo Bonzini33b22172020-04-17 10:24:18 -04006781 .enable_evmcs = nested_enable_evmcs,
6782 .get_evmcs_version = nested_get_evmcs_version,
6783};