blob: e7aa9576441c76341827b0a1106dad6e38d87c48 [file] [log] [blame]
Sean Christopherson55d23752018-12-03 13:53:18 -08001// SPDX-License-Identifier: GPL-2.0
2
Julien Thierry00089c02020-09-04 16:30:25 +01003#include <linux/objtool.h>
Sean Christopherson55d23752018-12-03 13:53:18 -08004#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
Oliver Uptonbfc6ad62019-11-13 16:17:16 -080013#include "pmu.h"
Sean Christopherson72add912021-04-12 16:21:42 +120014#include "sgx.h"
Sean Christopherson55d23752018-12-03 13:53:18 -080015#include "trace.h"
Uros Bizjak150f17b2020-12-30 16:26:57 -080016#include "vmx.h"
Sean Christopherson55d23752018-12-03 13:53:18 -080017#include "x86.h"
18
19static bool __read_mostly enable_shadow_vmcs = 1;
20module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
21
22static bool __read_mostly nested_early_check = 0;
23module_param(nested_early_check, bool, S_IRUGO);
24
Sean Christopherson648fc8a2021-02-03 16:01:16 -080025#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
Sean Christopherson5497b952019-07-11 08:58:29 -070026
Sean Christopherson55d23752018-12-03 13:53:18 -080027/*
28 * Hyper-V requires all of these, so mark them as supported even though
29 * they are just treated the same as all-context.
30 */
31#define VMX_VPID_EXTENT_SUPPORTED_MASK \
32 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
33 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
34 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
35 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
36
37#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
38
39enum {
40 VMX_VMREAD_BITMAP,
41 VMX_VMWRITE_BITMAP,
42 VMX_BITMAP_NR
43};
44static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
45
46#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
47#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
48
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070049struct shadow_vmcs_field {
50 u16 encoding;
51 u16 offset;
52};
53static struct shadow_vmcs_field shadow_read_only_fields[] = {
54#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
Sean Christopherson55d23752018-12-03 13:53:18 -080055#include "vmcs_shadow_fields.h"
56};
57static int max_shadow_read_only_fields =
58 ARRAY_SIZE(shadow_read_only_fields);
59
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070060static struct shadow_vmcs_field shadow_read_write_fields[] = {
61#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
Sean Christopherson55d23752018-12-03 13:53:18 -080062#include "vmcs_shadow_fields.h"
63};
64static int max_shadow_read_write_fields =
65 ARRAY_SIZE(shadow_read_write_fields);
66
Yi Wang8997f652019-01-21 15:27:05 +080067static void init_vmcs_shadow_fields(void)
Sean Christopherson55d23752018-12-03 13:53:18 -080068{
69 int i, j;
70
71 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
72 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
73
74 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070075 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
76 u16 field = entry.encoding;
Sean Christopherson55d23752018-12-03 13:53:18 -080077
78 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
79 (i + 1 == max_shadow_read_only_fields ||
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070080 shadow_read_only_fields[i + 1].encoding != field + 1))
Sean Christopherson55d23752018-12-03 13:53:18 -080081 pr_err("Missing field from shadow_read_only_field %x\n",
82 field + 1);
83
84 clear_bit(field, vmx_vmread_bitmap);
Sean Christopherson55d23752018-12-03 13:53:18 -080085 if (field & 1)
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070086#ifdef CONFIG_X86_64
Sean Christopherson55d23752018-12-03 13:53:18 -080087 continue;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070088#else
89 entry.offset += sizeof(u32);
Sean Christopherson55d23752018-12-03 13:53:18 -080090#endif
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070091 shadow_read_only_fields[j++] = entry;
Sean Christopherson55d23752018-12-03 13:53:18 -080092 }
93 max_shadow_read_only_fields = j;
94
95 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
Sean Christopherson1c6f0b42019-05-07 08:36:25 -070096 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
97 u16 field = entry.encoding;
Sean Christopherson55d23752018-12-03 13:53:18 -080098
99 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
100 (i + 1 == max_shadow_read_write_fields ||
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700101 shadow_read_write_fields[i + 1].encoding != field + 1))
Sean Christopherson55d23752018-12-03 13:53:18 -0800102 pr_err("Missing field from shadow_read_write_field %x\n",
103 field + 1);
104
Sean Christophersonb6437802019-05-07 08:36:24 -0700105 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
106 field <= GUEST_TR_AR_BYTES,
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700107 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
Sean Christophersonb6437802019-05-07 08:36:24 -0700108
Sean Christopherson55d23752018-12-03 13:53:18 -0800109 /*
110 * PML and the preemption timer can be emulated, but the
111 * processor cannot vmwrite to fields that don't exist
112 * on bare metal.
113 */
114 switch (field) {
115 case GUEST_PML_INDEX:
116 if (!cpu_has_vmx_pml())
117 continue;
118 break;
119 case VMX_PREEMPTION_TIMER_VALUE:
120 if (!cpu_has_vmx_preemption_timer())
121 continue;
122 break;
123 case GUEST_INTR_STATUS:
124 if (!cpu_has_vmx_apicv())
125 continue;
126 break;
127 default:
128 break;
129 }
130
131 clear_bit(field, vmx_vmwrite_bitmap);
132 clear_bit(field, vmx_vmread_bitmap);
Sean Christopherson55d23752018-12-03 13:53:18 -0800133 if (field & 1)
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700134#ifdef CONFIG_X86_64
Sean Christopherson55d23752018-12-03 13:53:18 -0800135 continue;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700136#else
137 entry.offset += sizeof(u32);
Sean Christopherson55d23752018-12-03 13:53:18 -0800138#endif
Sean Christopherson1c6f0b42019-05-07 08:36:25 -0700139 shadow_read_write_fields[j++] = entry;
Sean Christopherson55d23752018-12-03 13:53:18 -0800140 }
141 max_shadow_read_write_fields = j;
142}
143
144/*
145 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
146 * set the success or error code of an emulated VMX instruction (as specified
147 * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
148 * instruction.
149 */
150static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
151{
152 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
153 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
154 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
155 return kvm_skip_emulated_instruction(vcpu);
156}
157
158static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
159{
160 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
161 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
162 X86_EFLAGS_SF | X86_EFLAGS_OF))
163 | X86_EFLAGS_CF);
164 return kvm_skip_emulated_instruction(vcpu);
165}
166
167static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
168 u32 vm_instruction_error)
169{
Sean Christopherson55d23752018-12-03 13:53:18 -0800170 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
171 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
172 X86_EFLAGS_SF | X86_EFLAGS_OF))
173 | X86_EFLAGS_ZF);
174 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
175 /*
Vitaly Kuznetsovb7685cf2021-05-26 15:20:23 +0200176 * We don't need to force sync to shadow VMCS because
177 * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
178 * fields and thus must be synced.
Sean Christopherson55d23752018-12-03 13:53:18 -0800179 */
Vitaly Kuznetsovb7685cf2021-05-26 15:20:23 +0200180 if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
181 to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
182
Sean Christopherson55d23752018-12-03 13:53:18 -0800183 return kvm_skip_emulated_instruction(vcpu);
184}
185
Sean Christophersonb2656e42020-06-08 18:56:07 -0700186static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
187{
188 struct vcpu_vmx *vmx = to_vmx(vcpu);
189
190 /*
191 * failValid writes the error number to the current VMCS, which
192 * can't be done if there isn't a current VMCS.
193 */
Yu Zhang64c78502021-09-30 01:51:53 +0800194 if (vmx->nested.current_vmptr == INVALID_GPA &&
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200195 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -0700196 return nested_vmx_failInvalid(vcpu);
197
198 return nested_vmx_failValid(vcpu, vm_instruction_error);
199}
200
Sean Christopherson55d23752018-12-03 13:53:18 -0800201static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
202{
203 /* TODO: not to reset guest simply here. */
204 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
205 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
206}
207
Marc Orrf0b51052019-09-17 11:50:57 -0700208static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
209{
210 return fixed_bits_valid(control, low, high);
211}
212
213static inline u64 vmx_control_msr(u32 low, u32 high)
214{
215 return low | ((u64)high << 32);
216}
217
Sean Christopherson55d23752018-12-03 13:53:18 -0800218static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
219{
Sean Christophersonfe7f895d2019-05-07 12:17:57 -0700220 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
Yu Zhang64c78502021-09-30 01:51:53 +0800221 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
Paolo Bonzini88dddc12019-07-19 18:41:10 +0200222 vmx->nested.need_vmcs12_to_shadow_sync = false;
Sean Christopherson55d23752018-12-03 13:53:18 -0800223}
224
225static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
226{
227 struct vcpu_vmx *vmx = to_vmx(vcpu);
228
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200229 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
230 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
231 vmx->nested.hv_evmcs = NULL;
232 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800233
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +0200234 vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
Sean Christopherson55d23752018-12-03 13:53:18 -0800235}
236
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700237static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
238 struct loaded_vmcs *prev)
239{
240 struct vmcs_host_state *dest, *src;
241
242 if (unlikely(!vmx->guest_state_loaded))
243 return;
244
245 src = &prev->host_state;
246 dest = &vmx->loaded_vmcs->host_state;
247
248 vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
249 dest->ldt_sel = src->ldt_sel;
250#ifdef CONFIG_X86_64
251 dest->ds_sel = src->ds_sel;
252 dest->es_sel = src->es_sel;
253#endif
254}
255
256static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
257{
258 struct vcpu_vmx *vmx = to_vmx(vcpu);
259 struct loaded_vmcs *prev;
260 int cpu;
261
Sean Christopherson138534a2020-09-23 11:44:52 -0700262 if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700263 return;
264
265 cpu = get_cpu();
266 prev = vmx->loaded_vmcs;
267 vmx->loaded_vmcs = vmcs;
268 vmx_vcpu_load_vmcs(vcpu, cpu, prev);
269 vmx_sync_vmcs_host_state(vmx, prev);
270 put_cpu();
271
Paolo Bonzini41e68b62021-11-26 07:00:15 -0500272 vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;
273
274 /*
275 * All lazily updated registers will be reloaded from VMCS12 on both
276 * vmentry and vmexit.
277 */
278 vcpu->arch.regs_dirty = 0;
Sean Christophersonc61ca2f2020-09-23 11:44:49 -0700279}
280
Sean Christopherson55d23752018-12-03 13:53:18 -0800281/*
282 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
283 * just stops using VMX.
284 */
285static void free_nested(struct kvm_vcpu *vcpu)
286{
287 struct vcpu_vmx *vmx = to_vmx(vcpu);
288
Sean Christophersondf82a242020-09-23 11:44:50 -0700289 if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
290 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
291
Sean Christopherson55d23752018-12-03 13:53:18 -0800292 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
293 return;
294
Paolo Bonzini729c15c2020-09-22 06:53:57 -0400295 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Jan Kiszkacf645272019-07-21 13:52:18 +0200296
Sean Christopherson55d23752018-12-03 13:53:18 -0800297 vmx->nested.vmxon = false;
298 vmx->nested.smm.vmxon = false;
Vitaly Kuznetsovfeb31622021-09-30 01:51:54 +0800299 vmx->nested.vmxon_ptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -0800300 free_vpid(vmx->nested.vpid02);
301 vmx->nested.posted_intr_nv = -1;
Yu Zhang64c78502021-09-30 01:51:53 +0800302 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -0800303 if (enable_shadow_vmcs) {
304 vmx_disable_shadow_vmcs(vmx);
305 vmcs_clear(vmx->vmcs01.shadow_vmcs);
306 free_vmcs(vmx->vmcs01.shadow_vmcs);
307 vmx->vmcs01.shadow_vmcs = NULL;
308 }
309 kfree(vmx->nested.cached_vmcs12);
Jan Kiszkac6bf2ae2019-07-21 16:01:36 +0200310 vmx->nested.cached_vmcs12 = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800311 kfree(vmx->nested.cached_shadow_vmcs12);
Jan Kiszkac6bf2ae2019-07-21 16:01:36 +0200312 vmx->nested.cached_shadow_vmcs12 = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800313 /* Unpin physical memory we referred to in the vmcs02 */
314 if (vmx->nested.apic_access_page) {
Liran Alonb11494b2019-11-21 00:31:47 +0200315 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -0800316 vmx->nested.apic_access_page = NULL;
317 }
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +0100318 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
KarimAllah Ahmed3278e042019-01-31 21:24:38 +0100319 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
320 vmx->nested.pi_desc = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800321
322 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
323
324 nested_release_evmcs(vcpu);
325
326 free_loaded_vmcs(&vmx->nested.vmcs02);
327}
328
Sean Christopherson55d23752018-12-03 13:53:18 -0800329/*
330 * Ensure that the current vmcs of the logical processor is the
331 * vmcs01 of the vcpu before calling free_nested().
332 */
333void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
334{
335 vcpu_load(vcpu);
Paolo Bonzinib4b65b52019-01-29 19:12:35 +0100336 vmx_leave_nested(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800337 vcpu_put(vcpu);
338}
339
Junaid Shahid85aa8882021-08-06 15:22:29 -0700340#define EPTP_PA_MASK GENMASK_ULL(51, 12)
341
342static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
343{
344 return VALID_PAGE(root_hpa) &&
345 ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
346}
347
348static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
349 gpa_t addr)
350{
351 uint i;
352 struct kvm_mmu_root_info *cached_root;
353
354 WARN_ON_ONCE(!mmu_is_nested(vcpu));
355
356 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
357 cached_root = &vcpu->arch.mmu->prev_roots[i];
358
359 if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
360 eptp))
361 vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
362 }
363}
364
Sean Christopherson55d23752018-12-03 13:53:18 -0800365static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
366 struct x86_exception *fault)
367{
368 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
369 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700370 u32 vm_exit_reason;
Sean Christopherson55d23752018-12-03 13:53:18 -0800371 unsigned long exit_qualification = vcpu->arch.exit_qualification;
372
373 if (vmx->nested.pml_full) {
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700374 vm_exit_reason = EXIT_REASON_PML_FULL;
Sean Christopherson55d23752018-12-03 13:53:18 -0800375 vmx->nested.pml_full = false;
376 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
Junaid Shahid85aa8882021-08-06 15:22:29 -0700377 } else {
378 if (fault->error_code & PFERR_RSVD_MASK)
379 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
380 else
381 vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
382
383 /*
384 * Although the caller (kvm_inject_emulated_page_fault) would
385 * have already synced the faulting address in the shadow EPT
386 * tables for the current EPTP12, we also need to sync it for
387 * any other cached EPTP02s based on the same EP4TA, since the
388 * TLB associates mappings to the EP4TA rather than the full EPTP.
389 */
390 nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
391 fault->address);
392 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800393
Sean Christopherson4dcefa32020-04-15 10:55:18 -0700394 nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
Sean Christopherson55d23752018-12-03 13:53:18 -0800395 vmcs12->guest_physical_address = fault->address;
396}
397
Sean Christopherson39353ab2021-06-09 16:42:31 -0700398static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
399{
Lai Jiangshancc022ae2021-11-24 20:20:49 +0800400 struct vcpu_vmx *vmx = to_vmx(vcpu);
401 bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
402 int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);
403
404 kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
Sean Christopherson39353ab2021-06-09 16:42:31 -0700405 nested_ept_ad_enabled(vcpu),
406 nested_ept_get_eptp(vcpu));
407}
408
Sean Christopherson55d23752018-12-03 13:53:18 -0800409static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
410{
411 WARN_ON(mmu_is_nested(vcpu));
412
413 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
Sean Christopherson39353ab2021-06-09 16:42:31 -0700414 nested_ept_new_eptp(vcpu);
Sean Christophersond8dd54e2020-03-02 18:02:39 -0800415 vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
Sean Christopherson55d23752018-12-03 13:53:18 -0800416 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
417 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
418
419 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
420}
421
422static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
423{
424 vcpu->arch.mmu = &vcpu->arch.root_mmu;
425 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
426}
427
428static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
429 u16 error_code)
430{
431 bool inequality, bit;
432
433 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
434 inequality =
435 (error_code & vmcs12->page_fault_error_code_mask) !=
436 vmcs12->page_fault_error_code_match;
437 return inequality ^ bit;
438}
439
440
441/*
442 * KVM wants to inject page-faults which it got to the guest. This function
443 * checks whether in a nested guest, we need to inject them to L1 or L2.
444 */
445static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
446{
447 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
448 unsigned int nr = vcpu->arch.exception.nr;
449 bool has_payload = vcpu->arch.exception.has_payload;
450 unsigned long payload = vcpu->arch.exception.payload;
451
452 if (nr == PF_VECTOR) {
453 if (vcpu->arch.exception.nested_apf) {
454 *exit_qual = vcpu->arch.apf.nested_apf_token;
455 return 1;
456 }
457 if (nested_vmx_is_page_fault_vmexit(vmcs12,
458 vcpu->arch.exception.error_code)) {
459 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
460 return 1;
461 }
462 } else if (vmcs12->exception_bitmap & (1u << nr)) {
463 if (nr == DB_VECTOR) {
464 if (!has_payload) {
465 payload = vcpu->arch.dr6;
Chenyi Qiang9a3ecd52021-02-02 17:04:31 +0800466 payload &= ~DR6_BT;
467 payload ^= DR6_ACTIVE_LOW;
Sean Christopherson55d23752018-12-03 13:53:18 -0800468 }
469 *exit_qual = payload;
470 } else
471 *exit_qual = 0;
472 return 1;
473 }
474
475 return 0;
476}
477
478
479static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
480 struct x86_exception *fault)
481{
482 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
483
484 WARN_ON(!is_guest_mode(vcpu));
485
486 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
487 !to_vmx(vcpu)->nested.nested_run_pending) {
488 vmcs12->vm_exit_intr_error_code = fault->error_code;
489 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
490 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
491 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
492 fault->address);
493 } else {
494 kvm_inject_page_fault(vcpu, fault);
495 }
496}
497
Sean Christopherson55d23752018-12-03 13:53:18 -0800498static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
499 struct vmcs12 *vmcs12)
500{
501 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
502 return 0;
503
Sean Christopherson5497b952019-07-11 08:58:29 -0700504 if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
505 CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800506 return -EINVAL;
507
508 return 0;
509}
510
511static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
512 struct vmcs12 *vmcs12)
513{
514 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
515 return 0;
516
Sean Christopherson5497b952019-07-11 08:58:29 -0700517 if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800518 return -EINVAL;
519
520 return 0;
521}
522
523static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
524 struct vmcs12 *vmcs12)
525{
526 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
527 return 0;
528
Sean Christopherson5497b952019-07-11 08:58:29 -0700529 if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800530 return -EINVAL;
531
532 return 0;
533}
534
535/*
Sean Christophersona5e0c252021-11-09 01:30:47 +0000536 * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
537 * itself utilizing x2APIC. All MSRs were previously set to be intercepted,
538 * only the "disable intercept" case needs to be handled.
Sean Christopherson55d23752018-12-03 13:53:18 -0800539 */
Sean Christophersona5e0c252021-11-09 01:30:47 +0000540static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
541 unsigned long *msr_bitmap_l0,
542 u32 msr, int type)
Sean Christopherson55d23752018-12-03 13:53:18 -0800543{
Sean Christophersona5e0c252021-11-09 01:30:47 +0000544 if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
545 vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
Sean Christopherson55d23752018-12-03 13:53:18 -0800546
Sean Christophersona5e0c252021-11-09 01:30:47 +0000547 if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
548 vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
Sean Christopherson55d23752018-12-03 13:53:18 -0800549}
550
Miaohe Linffdbd502020-02-07 23:22:45 +0800551static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
552{
Marc Orracff7842019-04-01 23:55:59 -0700553 int msr;
554
555 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
556 unsigned word = msr / BITS_PER_LONG;
557
558 msr_bitmap[word] = ~0;
559 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
560 }
561}
562
Sean Christopherson67f4b992021-11-09 01:30:45 +0000563#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
564static inline \
565void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
566 unsigned long *msr_bitmap_l1, \
567 unsigned long *msr_bitmap_l0, u32 msr) \
568{ \
569 if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
570 vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
571 vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
572 else \
573 vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
574}
575BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
576BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
577
578static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
579 unsigned long *msr_bitmap_l1,
580 unsigned long *msr_bitmap_l0,
581 u32 msr, int types)
582{
583 if (types & MSR_TYPE_R)
584 nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
585 msr_bitmap_l0, msr);
586 if (types & MSR_TYPE_W)
587 nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
588 msr_bitmap_l0, msr);
589}
590
Sean Christopherson55d23752018-12-03 13:53:18 -0800591/*
592 * Merge L0's and L1's MSR bitmap, return false to indicate that
593 * we do not use the hardware.
594 */
595static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
596 struct vmcs12 *vmcs12)
597{
Sean Christopherson67f4b992021-11-09 01:30:45 +0000598 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800599 int msr;
Sean Christopherson55d23752018-12-03 13:53:18 -0800600 unsigned long *msr_bitmap_l1;
Sean Christopherson67f4b992021-11-09 01:30:45 +0000601 unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
602 struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
Sean Christopherson55d23752018-12-03 13:53:18 -0800603
604 /* Nothing to do if the MSR bitmap is not in use. */
605 if (!cpu_has_vmx_msr_bitmap() ||
606 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
607 return false;
608
KarimAllah Ahmed31f0b6c2019-01-31 21:24:36 +0100609 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
Sean Christopherson55d23752018-12-03 13:53:18 -0800610 return false;
611
KarimAllah Ahmed31f0b6c2019-01-31 21:24:36 +0100612 msr_bitmap_l1 = (unsigned long *)map->hva;
Sean Christopherson55d23752018-12-03 13:53:18 -0800613
Marc Orracff7842019-04-01 23:55:59 -0700614 /*
615 * To keep the control flow simple, pay eight 8-byte writes (sixteen
616 * 4-byte writes on 32-bit systems) up front to enable intercepts for
Sean Christophersona5e0c252021-11-09 01:30:47 +0000617 * the x2APIC MSR range and selectively toggle those relevant to L2.
Marc Orracff7842019-04-01 23:55:59 -0700618 */
619 enable_x2apic_msr_intercepts(msr_bitmap_l0);
Sean Christopherson55d23752018-12-03 13:53:18 -0800620
Marc Orracff7842019-04-01 23:55:59 -0700621 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
622 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
623 /*
624 * L0 need not intercept reads for MSRs between 0x800
625 * and 0x8ff, it just lets the processor take the value
626 * from the virtual-APIC page; take those 256 bits
627 * directly from the L1 bitmap.
628 */
629 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
630 unsigned word = msr / BITS_PER_LONG;
631
632 msr_bitmap_l0[word] = msr_bitmap_l1[word];
633 }
634 }
635
Sean Christophersona5e0c252021-11-09 01:30:47 +0000636 nested_vmx_disable_intercept_for_x2apic_msr(
Sean Christopherson55d23752018-12-03 13:53:18 -0800637 msr_bitmap_l1, msr_bitmap_l0,
Marc Orracff7842019-04-01 23:55:59 -0700638 X2APIC_MSR(APIC_TASKPRI),
Marc Orrc73f4c92019-04-01 23:56:00 -0700639 MSR_TYPE_R | MSR_TYPE_W);
Marc Orracff7842019-04-01 23:55:59 -0700640
641 if (nested_cpu_has_vid(vmcs12)) {
Sean Christophersona5e0c252021-11-09 01:30:47 +0000642 nested_vmx_disable_intercept_for_x2apic_msr(
Marc Orracff7842019-04-01 23:55:59 -0700643 msr_bitmap_l1, msr_bitmap_l0,
644 X2APIC_MSR(APIC_EOI),
645 MSR_TYPE_W);
Sean Christophersona5e0c252021-11-09 01:30:47 +0000646 nested_vmx_disable_intercept_for_x2apic_msr(
Marc Orracff7842019-04-01 23:55:59 -0700647 msr_bitmap_l1, msr_bitmap_l0,
648 X2APIC_MSR(APIC_SELF_IPI),
649 MSR_TYPE_W);
650 }
Sean Christopherson55d23752018-12-03 13:53:18 -0800651 }
652
Sean Christophersond69129b2019-05-08 07:32:15 -0700653 /*
Sean Christopherson67f4b992021-11-09 01:30:45 +0000654 * Always check vmcs01's bitmap to honor userspace MSR filters and any
655 * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
Sean Christophersond69129b2019-05-08 07:32:15 -0700656 */
Sean Christopherson67f4b992021-11-09 01:30:45 +0000657#ifdef CONFIG_X86_64
658 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
659 MSR_FS_BASE, MSR_TYPE_RW);
Sean Christopherson55d23752018-12-03 13:53:18 -0800660
Sean Christopherson67f4b992021-11-09 01:30:45 +0000661 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
662 MSR_GS_BASE, MSR_TYPE_RW);
Sean Christopherson55d23752018-12-03 13:53:18 -0800663
Sean Christopherson67f4b992021-11-09 01:30:45 +0000664 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
665 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
666#endif
667 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
668 MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
669
670 nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
671 MSR_IA32_PRED_CMD, MSR_TYPE_W);
672
673 kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
Sean Christopherson55d23752018-12-03 13:53:18 -0800674
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +0100675 vmx->nested.force_msr_bitmap_recalc = false;
676
Sean Christopherson55d23752018-12-03 13:53:18 -0800677 return true;
678}
679
680static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
681 struct vmcs12 *vmcs12)
682{
David Woodhouse297d5972021-11-15 16:50:24 +0000683 struct vcpu_vmx *vmx = to_vmx(vcpu);
684 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
Sean Christopherson55d23752018-12-03 13:53:18 -0800685
686 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
Yu Zhang64c78502021-09-30 01:51:53 +0800687 vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -0800688 return;
689
David Woodhouse297d5972021-11-15 16:50:24 +0000690 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
691 kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
692 vmcs12->vmcs_link_pointer, VMCS12_SIZE))
KarimAllah Ahmed88925302019-01-31 21:24:41 +0100693 return;
Sean Christopherson55d23752018-12-03 13:53:18 -0800694
David Woodhouse297d5972021-11-15 16:50:24 +0000695 kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
696 VMCS12_SIZE);
Sean Christopherson55d23752018-12-03 13:53:18 -0800697}
698
699static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
700 struct vmcs12 *vmcs12)
701{
702 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Woodhouse297d5972021-11-15 16:50:24 +0000703 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
Sean Christopherson55d23752018-12-03 13:53:18 -0800704
705 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
Yu Zhang64c78502021-09-30 01:51:53 +0800706 vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -0800707 return;
708
David Woodhouse297d5972021-11-15 16:50:24 +0000709 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
710 kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
711 vmcs12->vmcs_link_pointer, VMCS12_SIZE))
712 return;
713
714 kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
715 VMCS12_SIZE);
Sean Christopherson55d23752018-12-03 13:53:18 -0800716}
717
718/*
719 * In nested virtualization, check if L1 has set
720 * VM_EXIT_ACK_INTR_ON_EXIT
721 */
722static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
723{
724 return get_vmcs12(vcpu)->vm_exit_controls &
725 VM_EXIT_ACK_INTR_ON_EXIT;
726}
727
Sean Christopherson55d23752018-12-03 13:53:18 -0800728static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
729 struct vmcs12 *vmcs12)
730{
731 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
Sean Christopherson5497b952019-07-11 08:58:29 -0700732 CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800733 return -EINVAL;
734 else
735 return 0;
736}
737
738static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
739 struct vmcs12 *vmcs12)
740{
741 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
742 !nested_cpu_has_apic_reg_virt(vmcs12) &&
743 !nested_cpu_has_vid(vmcs12) &&
744 !nested_cpu_has_posted_intr(vmcs12))
745 return 0;
746
747 /*
748 * If virtualize x2apic mode is enabled,
749 * virtualize apic access must be disabled.
750 */
Sean Christopherson5497b952019-07-11 08:58:29 -0700751 if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
752 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800753 return -EINVAL;
754
755 /*
756 * If virtual interrupt delivery is enabled,
757 * we must exit on external interrupts.
758 */
Sean Christopherson5497b952019-07-11 08:58:29 -0700759 if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800760 return -EINVAL;
761
762 /*
763 * bits 15:8 should be zero in posted_intr_nv,
764 * the descriptor address has been already checked
765 * in nested_get_vmcs12_pages.
766 *
767 * bits 5:0 of posted_intr_desc_addr should be zero.
768 */
769 if (nested_cpu_has_posted_intr(vmcs12) &&
Sean Christopherson5497b952019-07-11 08:58:29 -0700770 (CC(!nested_cpu_has_vid(vmcs12)) ||
771 CC(!nested_exit_intr_ack_set(vcpu)) ||
772 CC((vmcs12->posted_intr_nv & 0xff00)) ||
Sean Christopherson636e8b72021-02-03 16:01:10 -0800773 CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
Sean Christopherson55d23752018-12-03 13:53:18 -0800774 return -EINVAL;
775
776 /* tpr shadow is needed by all apicv features. */
Sean Christopherson5497b952019-07-11 08:58:29 -0700777 if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800778 return -EINVAL;
779
780 return 0;
781}
782
783static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500784 u32 count, u64 addr)
Sean Christopherson55d23752018-12-03 13:53:18 -0800785{
Sean Christopherson55d23752018-12-03 13:53:18 -0800786 if (count == 0)
787 return 0;
Sean Christopherson636e8b72021-02-03 16:01:10 -0800788
789 if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
790 !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800791 return -EINVAL;
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500792
Sean Christopherson55d23752018-12-03 13:53:18 -0800793 return 0;
794}
795
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500796static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
797 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -0800798{
Sean Christopherson5497b952019-07-11 08:58:29 -0700799 if (CC(nested_vmx_check_msr_switch(vcpu,
800 vmcs12->vm_exit_msr_load_count,
801 vmcs12->vm_exit_msr_load_addr)) ||
802 CC(nested_vmx_check_msr_switch(vcpu,
803 vmcs12->vm_exit_msr_store_count,
804 vmcs12->vm_exit_msr_store_addr)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800805 return -EINVAL;
Sean Christophersonf9b245e2018-12-12 13:30:08 -0500806
Sean Christopherson55d23752018-12-03 13:53:18 -0800807 return 0;
808}
809
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -0500810static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
811 struct vmcs12 *vmcs12)
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500812{
Sean Christopherson5497b952019-07-11 08:58:29 -0700813 if (CC(nested_vmx_check_msr_switch(vcpu,
814 vmcs12->vm_entry_msr_load_count,
815 vmcs12->vm_entry_msr_load_addr)))
Krish Sadhukhan61446ba2018-12-12 13:30:09 -0500816 return -EINVAL;
817
818 return 0;
819}
820
Sean Christopherson55d23752018-12-03 13:53:18 -0800821static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
822 struct vmcs12 *vmcs12)
823{
824 if (!nested_cpu_has_pml(vmcs12))
825 return 0;
826
Sean Christopherson5497b952019-07-11 08:58:29 -0700827 if (CC(!nested_cpu_has_ept(vmcs12)) ||
828 CC(!page_address_valid(vcpu, vmcs12->pml_address)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800829 return -EINVAL;
830
831 return 0;
832}
833
834static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
835 struct vmcs12 *vmcs12)
836{
Sean Christopherson5497b952019-07-11 08:58:29 -0700837 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
838 !nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800839 return -EINVAL;
840 return 0;
841}
842
843static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
844 struct vmcs12 *vmcs12)
845{
Sean Christopherson5497b952019-07-11 08:58:29 -0700846 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
847 !nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800848 return -EINVAL;
849 return 0;
850}
851
852static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
853 struct vmcs12 *vmcs12)
854{
855 if (!nested_cpu_has_shadow_vmcs(vmcs12))
856 return 0;
857
Sean Christopherson5497b952019-07-11 08:58:29 -0700858 if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
859 CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
Sean Christopherson55d23752018-12-03 13:53:18 -0800860 return -EINVAL;
861
862 return 0;
863}
864
865static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
866 struct vmx_msr_entry *e)
867{
868 /* x2APIC MSR accesses are not allowed */
Sean Christopherson5497b952019-07-11 08:58:29 -0700869 if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
Sean Christopherson55d23752018-12-03 13:53:18 -0800870 return -EINVAL;
Sean Christopherson5497b952019-07-11 08:58:29 -0700871 if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
872 CC(e->index == MSR_IA32_UCODE_REV))
Sean Christopherson55d23752018-12-03 13:53:18 -0800873 return -EINVAL;
Sean Christopherson5497b952019-07-11 08:58:29 -0700874 if (CC(e->reserved != 0))
Sean Christopherson55d23752018-12-03 13:53:18 -0800875 return -EINVAL;
876 return 0;
877}
878
879static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
880 struct vmx_msr_entry *e)
881{
Sean Christopherson5497b952019-07-11 08:58:29 -0700882 if (CC(e->index == MSR_FS_BASE) ||
883 CC(e->index == MSR_GS_BASE) ||
884 CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
Sean Christopherson55d23752018-12-03 13:53:18 -0800885 nested_vmx_msr_check_common(vcpu, e))
886 return -EINVAL;
887 return 0;
888}
889
890static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
891 struct vmx_msr_entry *e)
892{
Sean Christopherson5497b952019-07-11 08:58:29 -0700893 if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
Sean Christopherson55d23752018-12-03 13:53:18 -0800894 nested_vmx_msr_check_common(vcpu, e))
895 return -EINVAL;
896 return 0;
897}
898
Marc Orrf0b51052019-09-17 11:50:57 -0700899static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
900{
901 struct vcpu_vmx *vmx = to_vmx(vcpu);
902 u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
903 vmx->nested.msrs.misc_high);
904
905 return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
906}
907
Sean Christopherson55d23752018-12-03 13:53:18 -0800908/*
909 * Load guest's/host's msr at nested entry/exit.
910 * return 0 for success, entry index for failure.
Marc Orrf0b51052019-09-17 11:50:57 -0700911 *
912 * One of the failure modes for MSR load/store is when a list exceeds the
913 * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
914 * as possible, process all valid entries before failing rather than precheck
915 * for a capacity violation.
Sean Christopherson55d23752018-12-03 13:53:18 -0800916 */
917static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
918{
919 u32 i;
920 struct vmx_msr_entry e;
Marc Orrf0b51052019-09-17 11:50:57 -0700921 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -0800922
Sean Christopherson55d23752018-12-03 13:53:18 -0800923 for (i = 0; i < count; i++) {
Marc Orrf0b51052019-09-17 11:50:57 -0700924 if (unlikely(i >= max_msr_list_size))
925 goto fail;
926
Sean Christopherson55d23752018-12-03 13:53:18 -0800927 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
928 &e, sizeof(e))) {
929 pr_debug_ratelimited(
930 "%s cannot read MSR entry (%u, 0x%08llx)\n",
931 __func__, i, gpa + i * sizeof(e));
932 goto fail;
933 }
934 if (nested_vmx_load_msr_check(vcpu, &e)) {
935 pr_debug_ratelimited(
936 "%s check failed (%u, 0x%x, 0x%x)\n",
937 __func__, i, e.index, e.reserved);
938 goto fail;
939 }
Sean Christophersonf20935d2019-09-05 14:22:54 -0700940 if (kvm_set_msr(vcpu, e.index, e.value)) {
Sean Christopherson55d23752018-12-03 13:53:18 -0800941 pr_debug_ratelimited(
942 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
943 __func__, i, e.index, e.value);
944 goto fail;
945 }
946 }
947 return 0;
948fail:
Sean Christopherson68cda402020-05-11 15:05:29 -0700949 /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
Sean Christopherson55d23752018-12-03 13:53:18 -0800950 return i + 1;
951}
952
Aaron Lewis662f1d12019-11-07 21:14:39 -0800953static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
954 u32 msr_index,
955 u64 *data)
956{
957 struct vcpu_vmx *vmx = to_vmx(vcpu);
958
959 /*
960 * If the L0 hypervisor stored a more accurate value for the TSC that
961 * does not include the time taken for emulation of the L2->L1
962 * VM-exit in L0, use the more accurate value.
963 */
964 if (msr_index == MSR_IA32_TSC) {
Sean Christophersona128a932020-09-23 11:03:57 -0700965 int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
966 MSR_IA32_TSC);
Aaron Lewis662f1d12019-11-07 21:14:39 -0800967
Sean Christophersona128a932020-09-23 11:03:57 -0700968 if (i >= 0) {
969 u64 val = vmx->msr_autostore.guest.val[i].value;
Aaron Lewis662f1d12019-11-07 21:14:39 -0800970
971 *data = kvm_read_l1_tsc(vcpu, val);
972 return true;
973 }
974 }
975
976 if (kvm_get_msr(vcpu, msr_index, data)) {
977 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
978 msr_index);
979 return false;
980 }
981 return true;
982}
983
Aaron Lewis365d3d52019-11-07 21:14:36 -0800984static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
985 struct vmx_msr_entry *e)
986{
987 if (kvm_vcpu_read_guest(vcpu,
988 gpa + i * sizeof(*e),
989 e, 2 * sizeof(u32))) {
990 pr_debug_ratelimited(
991 "%s cannot read MSR entry (%u, 0x%08llx)\n",
992 __func__, i, gpa + i * sizeof(*e));
993 return false;
994 }
995 if (nested_vmx_store_msr_check(vcpu, e)) {
996 pr_debug_ratelimited(
997 "%s check failed (%u, 0x%x, 0x%x)\n",
998 __func__, i, e->index, e->reserved);
999 return false;
1000 }
1001 return true;
1002}
1003
Sean Christopherson55d23752018-12-03 13:53:18 -08001004static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
1005{
Sean Christophersonf20935d2019-09-05 14:22:54 -07001006 u64 data;
Sean Christopherson55d23752018-12-03 13:53:18 -08001007 u32 i;
1008 struct vmx_msr_entry e;
Marc Orrf0b51052019-09-17 11:50:57 -07001009 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08001010
1011 for (i = 0; i < count; i++) {
Marc Orrf0b51052019-09-17 11:50:57 -07001012 if (unlikely(i >= max_msr_list_size))
1013 return -EINVAL;
1014
Aaron Lewis365d3d52019-11-07 21:14:36 -08001015 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
Sean Christopherson55d23752018-12-03 13:53:18 -08001016 return -EINVAL;
Aaron Lewis365d3d52019-11-07 21:14:36 -08001017
Aaron Lewis662f1d12019-11-07 21:14:39 -08001018 if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
Sean Christopherson55d23752018-12-03 13:53:18 -08001019 return -EINVAL;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001020
Sean Christopherson55d23752018-12-03 13:53:18 -08001021 if (kvm_vcpu_write_guest(vcpu,
1022 gpa + i * sizeof(e) +
1023 offsetof(struct vmx_msr_entry, value),
Sean Christophersonf20935d2019-09-05 14:22:54 -07001024 &data, sizeof(data))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001025 pr_debug_ratelimited(
1026 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Sean Christophersonf20935d2019-09-05 14:22:54 -07001027 __func__, i, e.index, data);
Sean Christopherson55d23752018-12-03 13:53:18 -08001028 return -EINVAL;
1029 }
1030 }
1031 return 0;
1032}
1033
Aaron Lewis662f1d12019-11-07 21:14:39 -08001034static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
1035{
1036 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1037 u32 count = vmcs12->vm_exit_msr_store_count;
1038 u64 gpa = vmcs12->vm_exit_msr_store_addr;
1039 struct vmx_msr_entry e;
1040 u32 i;
1041
1042 for (i = 0; i < count; i++) {
1043 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1044 return false;
1045
1046 if (e.index == msr_index)
1047 return true;
1048 }
1049 return false;
1050}
1051
1052static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1053 u32 msr_index)
1054{
1055 struct vcpu_vmx *vmx = to_vmx(vcpu);
1056 struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
1057 bool in_vmcs12_store_list;
Sean Christophersona128a932020-09-23 11:03:57 -07001058 int msr_autostore_slot;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001059 bool in_autostore_list;
1060 int last;
1061
Sean Christophersona128a932020-09-23 11:03:57 -07001062 msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1063 in_autostore_list = msr_autostore_slot >= 0;
Aaron Lewis662f1d12019-11-07 21:14:39 -08001064 in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1065
1066 if (in_vmcs12_store_list && !in_autostore_list) {
Sean Christophersonce833b22020-09-23 11:03:56 -07001067 if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
Aaron Lewis662f1d12019-11-07 21:14:39 -08001068 /*
1069 * Emulated VMEntry does not fail here. Instead a less
1070 * accurate value will be returned by
1071 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
1072 * instead of reading the value from the vmcs02 VMExit
1073 * MSR-store area.
1074 */
1075 pr_warn_ratelimited(
1076 "Not enough msr entries in msr_autostore. Can't add msr %x\n",
1077 msr_index);
1078 return;
1079 }
1080 last = autostore->nr++;
1081 autostore->val[last].index = msr_index;
1082 } else if (!in_vmcs12_store_list && in_autostore_list) {
1083 last = --autostore->nr;
Sean Christophersona128a932020-09-23 11:03:57 -07001084 autostore->val[msr_autostore_slot] = autostore->val[last];
Aaron Lewis662f1d12019-11-07 21:14:39 -08001085 }
1086}
1087
Sean Christopherson55d23752018-12-03 13:53:18 -08001088/*
Sean Christophersonea79a752020-02-04 07:32:59 -08001089 * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
1090 * emulating VM-Entry into a guest with EPT enabled. On failure, the expected
1091 * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
1092 * @entry_failure_code.
Sean Christopherson55d23752018-12-03 13:53:18 -08001093 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03001094static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
1095 bool nested_ept, bool reload_pdptrs,
Sean Christopherson68cda402020-05-11 15:05:29 -07001096 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08001097{
Sean Christopherson636e8b72021-02-03 16:01:10 -08001098 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
Sean Christopherson0cc69202020-05-01 21:32:26 -07001099 *entry_failure_code = ENTRY_FAIL_DEFAULT;
1100 return -EINVAL;
1101 }
Sean Christopherson55d23752018-12-03 13:53:18 -08001102
Sean Christopherson0cc69202020-05-01 21:32:26 -07001103 /*
1104 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
1105 * must not be dereferenced.
1106 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03001107 if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
Lai Jiangshan2df4a5e2021-11-24 20:20:52 +08001108 CC(!load_pdptrs(vcpu, cr3))) {
Sean Christophersonbcb72d02021-06-07 12:01:56 +03001109 *entry_failure_code = ENTRY_FAIL_PDPTE;
1110 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08001111 }
1112
Sean Christopherson50a41792021-06-09 16:42:28 -07001113 if (!nested_ept)
Sean Christophersonb5129102021-06-09 16:42:27 -07001114 kvm_mmu_new_pgd(vcpu, cr3);
Sean Christopherson07ffaf32021-06-09 16:42:21 -07001115
Sean Christopherson55d23752018-12-03 13:53:18 -08001116 vcpu->arch.cr3 = cr3;
Lai Jiangshan3883bc9d2021-11-08 20:44:02 +08001117 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
Sean Christopherson55d23752018-12-03 13:53:18 -08001118
Sean Christopherson616007c2021-06-22 10:57:34 -07001119 /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
Sean Christophersonc9060662021-06-09 16:42:33 -07001120 kvm_init_mmu(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08001121
1122 return 0;
1123}
1124
1125/*
1126 * Returns if KVM is able to config CPU to tag TLB entries
1127 * populated by L2 differently than TLB entries populated
1128 * by L1.
1129 *
Liran Alon992edea2019-11-20 14:24:52 +02001130 * If L0 uses EPT, L1 and L2 run with different EPTP because
1131 * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
1132 * are tagged with different EPTP.
Sean Christopherson55d23752018-12-03 13:53:18 -08001133 *
1134 * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
1135 * with different VPID (L1 entries are tagged with vmx->vpid
1136 * while L2 entries are tagged with vmx->nested.vpid02).
1137 */
1138static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
1139{
1140 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1141
Liran Alon992edea2019-11-20 14:24:52 +02001142 return enable_ept ||
Sean Christopherson55d23752018-12-03 13:53:18 -08001143 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1144}
1145
Sean Christopherson50b265a2020-03-20 14:28:19 -07001146static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
1147 struct vmcs12 *vmcs12,
1148 bool is_vmenter)
1149{
1150 struct vcpu_vmx *vmx = to_vmx(vcpu);
1151
1152 /*
Sean Christopherson50a41792021-06-09 16:42:28 -07001153 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
1154 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
1155 * full TLB flush from the guest's perspective. This is required even
1156 * if VPID is disabled in the host as KVM may need to synchronize the
1157 * MMU in response to the guest TLB flush.
1158 *
1159 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
1160 * EPT is a special snowflake, as guest-physical mappings aren't
1161 * flushed on VPID invalidations, including VM-Enter or VM-Exit with
1162 * VPID disabled. As a result, KVM _never_ needs to sync nEPT
1163 * entries on VM-Enter because L1 can't rely on VM-Enter to flush
1164 * those mappings.
Sean Christopherson50b265a2020-03-20 14:28:19 -07001165 */
Sean Christopherson50a41792021-06-09 16:42:28 -07001166 if (!nested_cpu_has_vpid(vmcs12)) {
1167 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001168 return;
Sean Christopherson50a41792021-06-09 16:42:28 -07001169 }
1170
1171 /* L2 should never have a VPID if VPID is disabled. */
1172 WARN_ON(!enable_vpid);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001173
1174 /*
Sean Christopherson712494d2021-11-25 01:49:44 +00001175 * VPID is enabled and in use by vmcs12. If vpid12 is changing, then
1176 * emulate a guest TLB flush as KVM does not track vpid12 history nor
1177 * is the VPID incorporated into the MMU context. I.e. KVM must assume
1178 * that the new vpid12 has never been used and thus represents a new
1179 * guest ASID that cannot have entries in the TLB.
Sean Christopherson50b265a2020-03-20 14:28:19 -07001180 */
Sean Christopherson712494d2021-11-25 01:49:44 +00001181 if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
Sean Christopherson50b265a2020-03-20 14:28:19 -07001182 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Sean Christopherson712494d2021-11-25 01:49:44 +00001183 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1184 return;
Sean Christopherson50b265a2020-03-20 14:28:19 -07001185 }
Sean Christopherson712494d2021-11-25 01:49:44 +00001186
1187 /*
1188 * If VPID is enabled, used by vmc12, and vpid12 is not changing but
1189 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
1190 * KVM was unable to allocate a VPID for L2, flush the current context
1191 * as the effective ASID is common to both L1 and L2.
1192 */
1193 if (!nested_has_guest_tlb_tag(vcpu))
1194 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
Sean Christopherson50b265a2020-03-20 14:28:19 -07001195}
1196
Sean Christopherson55d23752018-12-03 13:53:18 -08001197static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1198{
1199 superset &= mask;
1200 subset &= mask;
1201
1202 return (superset | subset) == superset;
1203}
1204
1205static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1206{
1207 const u64 feature_and_reserved =
1208 /* feature (except bit 48; see below) */
1209 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1210 /* reserved */
1211 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1212 u64 vmx_basic = vmx->nested.msrs.basic;
1213
1214 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1215 return -EINVAL;
1216
1217 /*
1218 * KVM does not emulate a version of VMX that constrains physical
1219 * addresses of VMX structures (e.g. VMCS) to 32-bits.
1220 */
1221 if (data & BIT_ULL(48))
1222 return -EINVAL;
1223
1224 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1225 vmx_basic_vmcs_revision_id(data))
1226 return -EINVAL;
1227
1228 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1229 return -EINVAL;
1230
1231 vmx->nested.msrs.basic = data;
1232 return 0;
1233}
1234
1235static int
1236vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1237{
1238 u64 supported;
1239 u32 *lowp, *highp;
1240
1241 switch (msr_index) {
1242 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1243 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1244 highp = &vmx->nested.msrs.pinbased_ctls_high;
1245 break;
1246 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1247 lowp = &vmx->nested.msrs.procbased_ctls_low;
1248 highp = &vmx->nested.msrs.procbased_ctls_high;
1249 break;
1250 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1251 lowp = &vmx->nested.msrs.exit_ctls_low;
1252 highp = &vmx->nested.msrs.exit_ctls_high;
1253 break;
1254 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1255 lowp = &vmx->nested.msrs.entry_ctls_low;
1256 highp = &vmx->nested.msrs.entry_ctls_high;
1257 break;
1258 case MSR_IA32_VMX_PROCBASED_CTLS2:
1259 lowp = &vmx->nested.msrs.secondary_ctls_low;
1260 highp = &vmx->nested.msrs.secondary_ctls_high;
1261 break;
1262 default:
1263 BUG();
1264 }
1265
1266 supported = vmx_control_msr(*lowp, *highp);
1267
1268 /* Check must-be-1 bits are still 1. */
1269 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1270 return -EINVAL;
1271
1272 /* Check must-be-0 bits are still 0. */
1273 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1274 return -EINVAL;
1275
1276 *lowp = data;
1277 *highp = data >> 32;
1278 return 0;
1279}
1280
1281static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1282{
1283 const u64 feature_and_reserved_bits =
1284 /* feature */
1285 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1286 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1287 /* reserved */
1288 GENMASK_ULL(13, 9) | BIT_ULL(31);
1289 u64 vmx_misc;
1290
1291 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1292 vmx->nested.msrs.misc_high);
1293
1294 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1295 return -EINVAL;
1296
1297 if ((vmx->nested.msrs.pinbased_ctls_high &
1298 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1299 vmx_misc_preemption_timer_rate(data) !=
1300 vmx_misc_preemption_timer_rate(vmx_misc))
1301 return -EINVAL;
1302
1303 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1304 return -EINVAL;
1305
1306 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1307 return -EINVAL;
1308
1309 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1310 return -EINVAL;
1311
1312 vmx->nested.msrs.misc_low = data;
1313 vmx->nested.msrs.misc_high = data >> 32;
1314
Sean Christopherson55d23752018-12-03 13:53:18 -08001315 return 0;
1316}
1317
1318static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1319{
1320 u64 vmx_ept_vpid_cap;
1321
1322 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1323 vmx->nested.msrs.vpid_caps);
1324
1325 /* Every bit is either reserved or a feature bit. */
1326 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1327 return -EINVAL;
1328
1329 vmx->nested.msrs.ept_caps = data;
1330 vmx->nested.msrs.vpid_caps = data >> 32;
1331 return 0;
1332}
1333
1334static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1335{
1336 u64 *msr;
1337
1338 switch (msr_index) {
1339 case MSR_IA32_VMX_CR0_FIXED0:
1340 msr = &vmx->nested.msrs.cr0_fixed0;
1341 break;
1342 case MSR_IA32_VMX_CR4_FIXED0:
1343 msr = &vmx->nested.msrs.cr4_fixed0;
1344 break;
1345 default:
1346 BUG();
1347 }
1348
1349 /*
1350 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
1351 * must be 1 in the restored value.
1352 */
1353 if (!is_bitwise_subset(data, *msr, -1ULL))
1354 return -EINVAL;
1355
1356 *msr = data;
1357 return 0;
1358}
1359
1360/*
1361 * Called when userspace is restoring VMX MSRs.
1362 *
1363 * Returns 0 on success, non-0 otherwise.
1364 */
1365int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1366{
1367 struct vcpu_vmx *vmx = to_vmx(vcpu);
1368
1369 /*
1370 * Don't allow changes to the VMX capability MSRs while the vCPU
1371 * is in VMX operation.
1372 */
1373 if (vmx->nested.vmxon)
1374 return -EBUSY;
1375
1376 switch (msr_index) {
1377 case MSR_IA32_VMX_BASIC:
1378 return vmx_restore_vmx_basic(vmx, data);
1379 case MSR_IA32_VMX_PINBASED_CTLS:
1380 case MSR_IA32_VMX_PROCBASED_CTLS:
1381 case MSR_IA32_VMX_EXIT_CTLS:
1382 case MSR_IA32_VMX_ENTRY_CTLS:
1383 /*
1384 * The "non-true" VMX capability MSRs are generated from the
1385 * "true" MSRs, so we do not support restoring them directly.
1386 *
1387 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
1388 * should restore the "true" MSRs with the must-be-1 bits
1389 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
1390 * DEFAULT SETTINGS".
1391 */
1392 return -EINVAL;
1393 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1394 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1395 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1396 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1397 case MSR_IA32_VMX_PROCBASED_CTLS2:
1398 return vmx_restore_control_msr(vmx, msr_index, data);
1399 case MSR_IA32_VMX_MISC:
1400 return vmx_restore_vmx_misc(vmx, data);
1401 case MSR_IA32_VMX_CR0_FIXED0:
1402 case MSR_IA32_VMX_CR4_FIXED0:
1403 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1404 case MSR_IA32_VMX_CR0_FIXED1:
1405 case MSR_IA32_VMX_CR4_FIXED1:
1406 /*
1407 * These MSRs are generated based on the vCPU's CPUID, so we
1408 * do not support restoring them directly.
1409 */
1410 return -EINVAL;
1411 case MSR_IA32_VMX_EPT_VPID_CAP:
1412 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1413 case MSR_IA32_VMX_VMCS_ENUM:
1414 vmx->nested.msrs.vmcs_enum = data;
1415 return 0;
Paolo Bonzinie8a70bd2019-07-02 14:40:40 +02001416 case MSR_IA32_VMX_VMFUNC:
1417 if (data & ~vmx->nested.msrs.vmfunc_controls)
1418 return -EINVAL;
1419 vmx->nested.msrs.vmfunc_controls = data;
1420 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08001421 default:
1422 /*
1423 * The rest of the VMX capability MSRs do not support restore.
1424 */
1425 return -EINVAL;
1426 }
1427}
1428
1429/* Returns 0 on success, non-0 otherwise. */
1430int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1431{
1432 switch (msr_index) {
1433 case MSR_IA32_VMX_BASIC:
1434 *pdata = msrs->basic;
1435 break;
1436 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1437 case MSR_IA32_VMX_PINBASED_CTLS:
1438 *pdata = vmx_control_msr(
1439 msrs->pinbased_ctls_low,
1440 msrs->pinbased_ctls_high);
1441 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1442 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1443 break;
1444 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1445 case MSR_IA32_VMX_PROCBASED_CTLS:
1446 *pdata = vmx_control_msr(
1447 msrs->procbased_ctls_low,
1448 msrs->procbased_ctls_high);
1449 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1450 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1451 break;
1452 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1453 case MSR_IA32_VMX_EXIT_CTLS:
1454 *pdata = vmx_control_msr(
1455 msrs->exit_ctls_low,
1456 msrs->exit_ctls_high);
1457 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1458 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1459 break;
1460 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1461 case MSR_IA32_VMX_ENTRY_CTLS:
1462 *pdata = vmx_control_msr(
1463 msrs->entry_ctls_low,
1464 msrs->entry_ctls_high);
1465 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1466 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1467 break;
1468 case MSR_IA32_VMX_MISC:
1469 *pdata = vmx_control_msr(
1470 msrs->misc_low,
1471 msrs->misc_high);
1472 break;
1473 case MSR_IA32_VMX_CR0_FIXED0:
1474 *pdata = msrs->cr0_fixed0;
1475 break;
1476 case MSR_IA32_VMX_CR0_FIXED1:
1477 *pdata = msrs->cr0_fixed1;
1478 break;
1479 case MSR_IA32_VMX_CR4_FIXED0:
1480 *pdata = msrs->cr4_fixed0;
1481 break;
1482 case MSR_IA32_VMX_CR4_FIXED1:
1483 *pdata = msrs->cr4_fixed1;
1484 break;
1485 case MSR_IA32_VMX_VMCS_ENUM:
1486 *pdata = msrs->vmcs_enum;
1487 break;
1488 case MSR_IA32_VMX_PROCBASED_CTLS2:
1489 *pdata = vmx_control_msr(
1490 msrs->secondary_ctls_low,
1491 msrs->secondary_ctls_high);
1492 break;
1493 case MSR_IA32_VMX_EPT_VPID_CAP:
1494 *pdata = msrs->ept_caps |
1495 ((u64)msrs->vpid_caps << 32);
1496 break;
1497 case MSR_IA32_VMX_VMFUNC:
1498 *pdata = msrs->vmfunc_controls;
1499 break;
1500 default:
1501 return 1;
1502 }
1503
1504 return 0;
1505}
1506
1507/*
Sean Christophersonfadcead2019-05-07 08:36:23 -07001508 * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
1509 * been modified by the L1 guest. Note, "writable" in this context means
1510 * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
1511 * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
1512 * VM-exit information fields (which are actually writable if the vCPU is
1513 * configured to support "VMWRITE to any supported field in the VMCS").
Sean Christopherson55d23752018-12-03 13:53:18 -08001514 */
1515static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1516{
Sean Christopherson55d23752018-12-03 13:53:18 -08001517 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Sean Christophersonfadcead2019-05-07 08:36:23 -07001518 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001519 struct shadow_vmcs_field field;
1520 unsigned long val;
Sean Christophersonfadcead2019-05-07 08:36:23 -07001521 int i;
Sean Christopherson55d23752018-12-03 13:53:18 -08001522
Paolo Bonzini88dddc12019-07-19 18:41:10 +02001523 if (WARN_ON(!shadow_vmcs))
1524 return;
1525
Sean Christopherson55d23752018-12-03 13:53:18 -08001526 preempt_disable();
1527
1528 vmcs_load(shadow_vmcs);
1529
Sean Christophersonfadcead2019-05-07 08:36:23 -07001530 for (i = 0; i < max_shadow_read_write_fields; i++) {
1531 field = shadow_read_write_fields[i];
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001532 val = __vmcs_readl(field.encoding);
1533 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
Sean Christopherson55d23752018-12-03 13:53:18 -08001534 }
1535
1536 vmcs_clear(shadow_vmcs);
1537 vmcs_load(vmx->loaded_vmcs->vmcs);
1538
1539 preempt_enable();
1540}
1541
1542static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1543{
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001544 const struct shadow_vmcs_field *fields[] = {
Sean Christopherson55d23752018-12-03 13:53:18 -08001545 shadow_read_write_fields,
1546 shadow_read_only_fields
1547 };
1548 const int max_fields[] = {
1549 max_shadow_read_write_fields,
1550 max_shadow_read_only_fields
1551 };
Sean Christopherson55d23752018-12-03 13:53:18 -08001552 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001553 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1554 struct shadow_vmcs_field field;
1555 unsigned long val;
1556 int i, q;
Sean Christopherson55d23752018-12-03 13:53:18 -08001557
Paolo Bonzini88dddc12019-07-19 18:41:10 +02001558 if (WARN_ON(!shadow_vmcs))
1559 return;
1560
Sean Christopherson55d23752018-12-03 13:53:18 -08001561 vmcs_load(shadow_vmcs);
1562
1563 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1564 for (i = 0; i < max_fields[q]; i++) {
1565 field = fields[q][i];
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07001566 val = vmcs12_read_any(vmcs12, field.encoding,
1567 field.offset);
1568 __vmcs_writel(field.encoding, val);
Sean Christopherson55d23752018-12-03 13:53:18 -08001569 }
1570 }
1571
1572 vmcs_clear(shadow_vmcs);
1573 vmcs_load(vmx->loaded_vmcs->vmcs);
1574}
1575
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001576static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
Sean Christopherson55d23752018-12-03 13:53:18 -08001577{
1578 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1579 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1580
1581 /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
1582 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1583 vmcs12->guest_rip = evmcs->guest_rip;
1584
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001585 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001586 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1587 vmcs12->guest_rsp = evmcs->guest_rsp;
1588 vmcs12->guest_rflags = evmcs->guest_rflags;
1589 vmcs12->guest_interruptibility_info =
1590 evmcs->guest_interruptibility_info;
1591 }
1592
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001593 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001594 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1595 vmcs12->cpu_based_vm_exec_control =
1596 evmcs->cpu_based_vm_exec_control;
1597 }
1598
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001599 if (unlikely(!(hv_clean_fields &
Vitaly Kuznetsovf9bc5222019-06-13 13:35:02 +02001600 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001601 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1602 }
1603
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001604 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001605 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1606 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1607 }
1608
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001609 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001610 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1611 vmcs12->vm_entry_intr_info_field =
1612 evmcs->vm_entry_intr_info_field;
1613 vmcs12->vm_entry_exception_error_code =
1614 evmcs->vm_entry_exception_error_code;
1615 vmcs12->vm_entry_instruction_len =
1616 evmcs->vm_entry_instruction_len;
1617 }
1618
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001619 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001620 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1621 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1622 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1623 vmcs12->host_cr0 = evmcs->host_cr0;
1624 vmcs12->host_cr3 = evmcs->host_cr3;
1625 vmcs12->host_cr4 = evmcs->host_cr4;
1626 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1627 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1628 vmcs12->host_rip = evmcs->host_rip;
1629 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1630 vmcs12->host_es_selector = evmcs->host_es_selector;
1631 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1632 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1633 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1634 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1635 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1636 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1637 }
1638
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001639 if (unlikely(!(hv_clean_fields &
Vitaly Kuznetsovf9bc5222019-06-13 13:35:02 +02001640 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08001641 vmcs12->pin_based_vm_exec_control =
1642 evmcs->pin_based_vm_exec_control;
1643 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1644 vmcs12->secondary_vm_exec_control =
1645 evmcs->secondary_vm_exec_control;
1646 }
1647
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001648 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001649 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1650 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1651 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1652 }
1653
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001654 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001655 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1656 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1657 }
1658
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001659 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001660 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1661 vmcs12->guest_es_base = evmcs->guest_es_base;
1662 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1663 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1664 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1665 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1666 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1667 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1668 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1669 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1670 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1671 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1672 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1673 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1674 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1675 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1676 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1677 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1678 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1679 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1680 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1681 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1682 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1683 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1684 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1685 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1686 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1687 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1688 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1689 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1690 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1691 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1692 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1693 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1694 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1695 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1696 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1697 }
1698
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001699 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001700 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1701 vmcs12->tsc_offset = evmcs->tsc_offset;
1702 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1703 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1704 }
1705
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001706 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001707 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1708 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1709 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1710 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1711 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1712 vmcs12->guest_cr0 = evmcs->guest_cr0;
1713 vmcs12->guest_cr3 = evmcs->guest_cr3;
1714 vmcs12->guest_cr4 = evmcs->guest_cr4;
1715 vmcs12->guest_dr7 = evmcs->guest_dr7;
1716 }
1717
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001718 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001719 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1720 vmcs12->host_fs_base = evmcs->host_fs_base;
1721 vmcs12->host_gs_base = evmcs->host_gs_base;
1722 vmcs12->host_tr_base = evmcs->host_tr_base;
1723 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1724 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1725 vmcs12->host_rsp = evmcs->host_rsp;
1726 }
1727
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001728 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001729 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1730 vmcs12->ept_pointer = evmcs->ept_pointer;
1731 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1732 }
1733
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02001734 if (unlikely(!(hv_clean_fields &
Sean Christopherson55d23752018-12-03 13:53:18 -08001735 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1736 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1737 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1738 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1739 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1740 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1741 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1742 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1743 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1744 vmcs12->guest_pending_dbg_exceptions =
1745 evmcs->guest_pending_dbg_exceptions;
1746 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1747 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1748 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1749 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1750 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1751 }
1752
1753 /*
1754 * Not used?
1755 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
1756 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
1757 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
Sean Christopherson55d23752018-12-03 13:53:18 -08001758 * vmcs12->page_fault_error_code_mask =
1759 * evmcs->page_fault_error_code_mask;
1760 * vmcs12->page_fault_error_code_match =
1761 * evmcs->page_fault_error_code_match;
1762 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
1763 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
1764 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
1765 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
1766 */
1767
1768 /*
1769 * Read only fields:
1770 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
1771 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
1772 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
1773 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
1774 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
1775 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
1776 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
1777 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
1778 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
1779 * vmcs12->exit_qualification = evmcs->exit_qualification;
1780 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
1781 *
1782 * Not present in struct vmcs12:
1783 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
1784 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
1785 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
1786 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
1787 */
1788
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001789 return;
Sean Christopherson55d23752018-12-03 13:53:18 -08001790}
1791
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001792static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
Sean Christopherson55d23752018-12-03 13:53:18 -08001793{
1794 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1795 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1796
1797 /*
1798 * Should not be changed by KVM:
1799 *
1800 * evmcs->host_es_selector = vmcs12->host_es_selector;
1801 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
1802 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
1803 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
1804 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
1805 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
1806 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
1807 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
1808 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
1809 * evmcs->host_cr0 = vmcs12->host_cr0;
1810 * evmcs->host_cr3 = vmcs12->host_cr3;
1811 * evmcs->host_cr4 = vmcs12->host_cr4;
1812 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
1813 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
1814 * evmcs->host_rip = vmcs12->host_rip;
1815 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
1816 * evmcs->host_fs_base = vmcs12->host_fs_base;
1817 * evmcs->host_gs_base = vmcs12->host_gs_base;
1818 * evmcs->host_tr_base = vmcs12->host_tr_base;
1819 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
1820 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
1821 * evmcs->host_rsp = vmcs12->host_rsp;
Sean Christopherson3731905ef2019-05-07 08:36:27 -07001822 * sync_vmcs02_to_vmcs12() doesn't read these:
Sean Christopherson55d23752018-12-03 13:53:18 -08001823 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
1824 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
1825 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
1826 * evmcs->ept_pointer = vmcs12->ept_pointer;
1827 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
1828 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
1829 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
1830 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
Sean Christopherson55d23752018-12-03 13:53:18 -08001831 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
1832 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
1833 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
1834 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
1835 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
1836 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
1837 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
1838 * evmcs->page_fault_error_code_mask =
1839 * vmcs12->page_fault_error_code_mask;
1840 * evmcs->page_fault_error_code_match =
1841 * vmcs12->page_fault_error_code_match;
1842 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
1843 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
1844 * evmcs->tsc_offset = vmcs12->tsc_offset;
1845 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
1846 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
1847 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
1848 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
1849 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
1850 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
1851 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
1852 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
1853 *
1854 * Not present in struct vmcs12:
1855 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
1856 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
1857 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
1858 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
1859 */
1860
1861 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1862 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1863 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1864 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1865 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1866 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1867 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1868 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1869
1870 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1871 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1872 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1873 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1874 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1875 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1876 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1877 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1878 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1879 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1880
1881 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1882 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1883 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1884 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1885 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1886 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1887 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1888 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1889
1890 evmcs->guest_es_base = vmcs12->guest_es_base;
1891 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1892 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1893 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1894 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1895 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1896 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1897 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1898 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1899 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1900
1901 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1902 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1903
1904 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1905 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1906 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1907 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1908
1909 evmcs->guest_pending_dbg_exceptions =
1910 vmcs12->guest_pending_dbg_exceptions;
1911 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1912 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1913
1914 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1915 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1916
1917 evmcs->guest_cr0 = vmcs12->guest_cr0;
1918 evmcs->guest_cr3 = vmcs12->guest_cr3;
1919 evmcs->guest_cr4 = vmcs12->guest_cr4;
1920 evmcs->guest_dr7 = vmcs12->guest_dr7;
1921
1922 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1923
1924 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1925 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1926 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1927 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1928 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1929 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1930 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1931 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1932
1933 evmcs->exit_qualification = vmcs12->exit_qualification;
1934
1935 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1936 evmcs->guest_rsp = vmcs12->guest_rsp;
1937 evmcs->guest_rflags = vmcs12->guest_rflags;
1938
1939 evmcs->guest_interruptibility_info =
1940 vmcs12->guest_interruptibility_info;
1941 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1942 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1943 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1944 evmcs->vm_entry_exception_error_code =
1945 vmcs12->vm_entry_exception_error_code;
1946 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1947
1948 evmcs->guest_rip = vmcs12->guest_rip;
1949
1950 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1951
Vitaly Kuznetsov25641ca2021-05-26 15:20:19 +02001952 return;
Sean Christopherson55d23752018-12-03 13:53:18 -08001953}
1954
1955/*
1956 * This is an equivalent of the nested hypervisor executing the vmptrld
1957 * instruction.
1958 */
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001959static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
1960 struct kvm_vcpu *vcpu, bool from_launch)
Sean Christopherson55d23752018-12-03 13:53:18 -08001961{
1962 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02001963 bool evmcs_gpa_changed = false;
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02001964 u64 evmcs_gpa;
Sean Christopherson55d23752018-12-03 13:53:18 -08001965
1966 if (likely(!vmx->nested.enlightened_vmcs_enabled))
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001967 return EVMPTRLD_DISABLED;
Sean Christopherson55d23752018-12-03 13:53:18 -08001968
Vitaly Kuznetsov02761712021-05-26 15:20:18 +02001969 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
1970 nested_release_evmcs(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001971 return EVMPTRLD_DISABLED;
Vitaly Kuznetsov02761712021-05-26 15:20:18 +02001972 }
Sean Christopherson55d23752018-12-03 13:53:18 -08001973
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02001974 if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
Yu Zhang64c78502021-09-30 01:51:53 +08001975 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08001976
1977 nested_release_evmcs(vcpu);
1978
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02001979 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
KarimAllah Ahmeddee9c042019-01-31 21:24:42 +01001980 &vmx->nested.hv_evmcs_map))
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01001981 return EVMPTRLD_ERROR;
Sean Christopherson55d23752018-12-03 13:53:18 -08001982
KarimAllah Ahmeddee9c042019-01-31 21:24:42 +01001983 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
Sean Christopherson55d23752018-12-03 13:53:18 -08001984
1985 /*
1986 * Currently, KVM only supports eVMCS version 1
1987 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
1988 * value to first u32 field of eVMCS which should specify eVMCS
1989 * VersionNumber.
1990 *
1991 * Guest should be aware of supported eVMCS versions by host by
1992 * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
1993 * expected to set this CPUID leaf according to the value
1994 * returned in vmcs_version from nested_enable_evmcs().
1995 *
1996 * However, it turns out that Microsoft Hyper-V fails to comply
1997 * to their own invented interface: When Hyper-V use eVMCS, it
1998 * just sets first u32 field of eVMCS to revision_id specified
1999 * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
2000 * which is one of the supported versions specified in
2001 * CPUID.0x4000000A.EAX[0:15].
2002 *
2003 * To overcome Hyper-V bug, we accept here either a supported
2004 * eVMCS version or VMCS12 revision_id as valid values for first
2005 * u32 field of eVMCS.
2006 */
2007 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
2008 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
2009 nested_release_evmcs(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01002010 return EVMPTRLD_VMFAIL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002011 }
2012
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02002013 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
Sean Christopherson55d23752018-12-03 13:53:18 -08002014
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002015 evmcs_gpa_changed = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08002016 /*
2017 * Unlike normal vmcs12, enlightened vmcs12 is not fully
2018 * reloaded from guest's memory (read only fields, fields not
2019 * present in struct hv_enlightened_vmcs, ...). Make sure there
2020 * are no leftovers.
2021 */
2022 if (from_launch) {
2023 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2024 memset(vmcs12, 0, sizeof(*vmcs12));
2025 vmcs12->hdr.revision_id = VMCS12_REVISION;
2026 }
2027
2028 }
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002029
2030 /*
Miaohe Linffdbd502020-02-07 23:22:45 +08002031 * Clean fields data can't be used on VMLAUNCH and when we switch
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002032 * between different L2 guests as KVM keeps a single VMCS12 per L1.
2033 */
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01002034 if (from_launch || evmcs_gpa_changed) {
Vitaly Kuznetsova21a39c2019-06-28 13:23:32 +02002035 vmx->nested.hv_evmcs->hv_clean_fields &=
2036 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2037
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01002038 vmx->nested.force_msr_bitmap_recalc = true;
2039 }
2040
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01002041 return EVMPTRLD_SUCCEEDED;
Sean Christopherson55d23752018-12-03 13:53:18 -08002042}
2043
Sean Christopherson3731905ef2019-05-07 08:36:27 -07002044void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08002045{
2046 struct vcpu_vmx *vmx = to_vmx(vcpu);
2047
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002048 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08002049 copy_vmcs12_to_enlightened(vmx);
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002050 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002051 copy_vmcs12_to_shadow(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08002052
Sean Christopherson3731905ef2019-05-07 08:36:27 -07002053 vmx->nested.need_vmcs12_to_shadow_sync = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002054}
2055
2056static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
2057{
2058 struct vcpu_vmx *vmx =
2059 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
2060
2061 vmx->nested.preemption_timer_expired = true;
2062 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
2063 kvm_vcpu_kick(&vmx->vcpu);
2064
2065 return HRTIMER_NORESTART;
2066}
2067
Peter Shier850448f2020-05-26 14:51:06 -07002068static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08002069{
Peter Shier850448f2020-05-26 14:51:06 -07002070 struct vcpu_vmx *vmx = to_vmx(vcpu);
2071 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Peter Shier850448f2020-05-26 14:51:06 -07002072
2073 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
2074 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2075
2076 if (!vmx->nested.has_preemption_timer_deadline) {
Makarand Sonare8d7fbf02020-05-26 14:51:07 -07002077 vmx->nested.preemption_timer_deadline =
2078 vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
Peter Shier850448f2020-05-26 14:51:06 -07002079 vmx->nested.has_preemption_timer_deadline = true;
Makarand Sonare8d7fbf02020-05-26 14:51:07 -07002080 }
2081 return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
Peter Shier850448f2020-05-26 14:51:06 -07002082}
2083
2084static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
2085 u64 preemption_timeout)
2086{
Sean Christopherson55d23752018-12-03 13:53:18 -08002087 struct vcpu_vmx *vmx = to_vmx(vcpu);
2088
2089 /*
2090 * A timer value of zero is architecturally guaranteed to cause
2091 * a VMExit prior to executing any instructions in the guest.
2092 */
2093 if (preemption_timeout == 0) {
2094 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
2095 return;
2096 }
2097
2098 if (vcpu->arch.virtual_tsc_khz == 0)
2099 return;
2100
2101 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2102 preemption_timeout *= 1000000;
2103 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
2104 hrtimer_start(&vmx->nested.preemption_timer,
Jim Mattsonada00982020-05-08 13:36:42 -07002105 ktime_add_ns(ktime_get(), preemption_timeout),
2106 HRTIMER_MODE_ABS_PINNED);
Sean Christopherson55d23752018-12-03 13:53:18 -08002107}
2108
2109static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2110{
2111 if (vmx->nested.nested_run_pending &&
2112 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
2113 return vmcs12->guest_ia32_efer;
2114 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
2115 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
2116 else
2117 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
2118}
2119
2120static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
2121{
2122 /*
2123 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
2124 * according to L0's settings (vmcs12 is irrelevant here). Host
2125 * fields that come from L0 and are not constant, e.g. HOST_CR3,
2126 * will be set as needed prior to VMLAUNCH/VMRESUME.
2127 */
2128 if (vmx->nested.vmcs02_initialized)
2129 return;
2130 vmx->nested.vmcs02_initialized = true;
2131
2132 /*
2133 * We don't care what the EPTP value is we just need to guarantee
2134 * it's valid so we don't get a false positive when doing early
2135 * consistency checks.
2136 */
2137 if (enable_ept && nested_early_check)
Sean Christopherson2a40b902020-07-15 20:41:18 -07002138 vmcs_write64(EPT_POINTER,
2139 construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
Sean Christopherson55d23752018-12-03 13:53:18 -08002140
2141 /* All VMFUNCs are currently emulated through L0 vmexits. */
2142 if (cpu_has_vmx_vmfunc())
2143 vmcs_write64(VM_FUNCTION_CONTROL, 0);
2144
2145 if (cpu_has_vmx_posted_intr())
2146 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
2147
2148 if (cpu_has_vmx_msr_bitmap())
2149 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2150
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002151 /*
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002152 * PML is emulated for L2, but never enabled in hardware as the MMU
2153 * handles A/D emulation. Disabling PML for L2 also avoids having to
2154 * deal with filtering out L2 GPAs from the buffer.
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002155 */
2156 if (enable_pml) {
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002157 vmcs_write64(PML_ADDRESS, 0);
2158 vmcs_write16(GUEST_PML_INDEX, -1);
Sean Christopherson4d6c9892019-05-07 09:06:30 -07002159 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002160
Sean Christophersonc538d572019-05-07 09:06:29 -07002161 if (cpu_has_vmx_encls_vmexit())
Yu Zhang64c78502021-09-30 01:51:53 +08002162 vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08002163
2164 /*
2165 * Set the MSR load/store lists to match L0's settings. Only the
2166 * addresses are constant (for vmcs02), the counts can change based
2167 * on L2's behavior, e.g. switching to/from long mode.
2168 */
Aaron Lewis662f1d12019-11-07 21:14:39 -08002169 vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
Sean Christopherson55d23752018-12-03 13:53:18 -08002170 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
2171 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
2172
2173 vmx_set_constant_host_state(vmx);
2174}
2175
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002176static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
Sean Christopherson55d23752018-12-03 13:53:18 -08002177 struct vmcs12 *vmcs12)
2178{
2179 prepare_vmcs02_constant_state(vmx);
2180
Yu Zhang64c78502021-09-30 01:51:53 +08002181 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08002182
2183 if (enable_vpid) {
2184 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2185 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2186 else
2187 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2188 }
2189}
2190
Sean Christopherson389ab252021-08-10 10:19:50 -07002191static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
2192 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002193{
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002194 u32 exec_control;
Sean Christopherson55d23752018-12-03 13:53:18 -08002195 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2196
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002197 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002198 prepare_vmcs02_early_rare(vmx, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08002199
2200 /*
Sean Christopherson55d23752018-12-03 13:53:18 -08002201 * PIN CONTROLS
2202 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002203 exec_control = __pin_controls_get(vmcs01);
Sean Christopherson804939e2019-05-07 12:18:05 -07002204 exec_control |= (vmcs12->pin_based_vm_exec_control &
2205 ~PIN_BASED_VMX_PREEMPTION_TIMER);
Sean Christopherson55d23752018-12-03 13:53:18 -08002206
2207 /* Posted interrupts setting is only taken from vmcs12. */
Sean Christophersonf7782bb82021-08-10 07:45:26 -07002208 vmx->nested.pi_pending = false;
2209 if (nested_cpu_has_posted_intr(vmcs12))
Sean Christopherson55d23752018-12-03 13:53:18 -08002210 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
Sean Christophersonf7782bb82021-08-10 07:45:26 -07002211 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002212 exec_control &= ~PIN_BASED_POSTED_INTR;
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002213 pin_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002214
2215 /*
2216 * EXEC CONTROLS
2217 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002218 exec_control = __exec_controls_get(vmcs01); /* L0's desires */
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08002219 exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08002220 exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
Sean Christopherson55d23752018-12-03 13:53:18 -08002221 exec_control &= ~CPU_BASED_TPR_SHADOW;
2222 exec_control |= vmcs12->cpu_based_vm_exec_control;
2223
Liran Alon02d496cf2019-11-11 14:30:55 +02002224 vmx->nested.l1_tpr_threshold = -1;
Sean Christophersonca2f5462019-05-07 09:06:33 -07002225 if (exec_control & CPU_BASED_TPR_SHADOW)
Sean Christopherson55d23752018-12-03 13:53:18 -08002226 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
Sean Christopherson55d23752018-12-03 13:53:18 -08002227#ifdef CONFIG_X86_64
Sean Christophersonca2f5462019-05-07 09:06:33 -07002228 else
Sean Christopherson55d23752018-12-03 13:53:18 -08002229 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2230 CPU_BASED_CR8_STORE_EXITING;
2231#endif
Sean Christopherson55d23752018-12-03 13:53:18 -08002232
2233 /*
2234 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
2235 * for I/O port accesses.
2236 */
Sean Christopherson55d23752018-12-03 13:53:18 -08002237 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
Sean Christophersonde0286b2019-05-07 12:18:01 -07002238 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2239
2240 /*
2241 * This bit will be computed in nested_get_vmcs12_pages, because
2242 * we do not have access to L1's MSR bitmap yet. For now, keep
2243 * the same bit as before, hoping to avoid multiple VMWRITEs that
2244 * only set/clear this bit.
2245 */
2246 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2247 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2248
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002249 exec_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002250
2251 /*
2252 * SECONDARY EXEC CONTROLS
2253 */
2254 if (cpu_has_secondary_exec_ctrls()) {
Sean Christopherson389ab252021-08-10 10:19:50 -07002255 exec_control = __secondary_exec_controls_get(vmcs01);
Sean Christopherson55d23752018-12-03 13:53:18 -08002256
2257 /* Take the following fields only from vmcs12 */
2258 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Sean Christopherson389ab252021-08-10 10:19:50 -07002259 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sean Christopherson55d23752018-12-03 13:53:18 -08002260 SECONDARY_EXEC_ENABLE_INVPCID |
Sean Christopherson7f3603b2020-09-23 09:50:47 -07002261 SECONDARY_EXEC_ENABLE_RDTSCP |
Sean Christopherson55d23752018-12-03 13:53:18 -08002262 SECONDARY_EXEC_XSAVES |
Tao Xue69e72fa2019-07-16 14:55:49 +08002263 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
Sean Christopherson55d23752018-12-03 13:53:18 -08002264 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2265 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01002266 SECONDARY_EXEC_ENABLE_VMFUNC |
Sean Christopherson389ab252021-08-10 10:19:50 -07002267 SECONDARY_EXEC_TSC_SCALING |
2268 SECONDARY_EXEC_DESC);
2269
Sean Christopherson55d23752018-12-03 13:53:18 -08002270 if (nested_cpu_has(vmcs12,
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08002271 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
2272 exec_control |= vmcs12->secondary_vm_exec_control;
2273
2274 /* PML is emulated and never enabled in hardware for L2. */
2275 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Sean Christopherson55d23752018-12-03 13:53:18 -08002276
2277 /* VMCS shadowing for L2 is emulated for now */
2278 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2279
Sean Christopherson469debd2019-05-07 12:18:02 -07002280 /*
2281 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
2282 * will not have to rewrite the controls just for this bit.
2283 */
2284 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2285 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2286 exec_control |= SECONDARY_EXEC_DESC;
2287
Sean Christopherson55d23752018-12-03 13:53:18 -08002288 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2289 vmcs_write16(GUEST_INTR_STATUS,
2290 vmcs12->guest_intr_status);
2291
Krish Sadhukhanbddd82d2020-09-21 08:10:25 +00002292 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
2293 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2294
Sean Christopherson72add912021-04-12 16:21:42 +12002295 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
2296 vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
2297
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002298 secondary_exec_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002299 }
2300
2301 /*
2302 * ENTRY CONTROLS
2303 *
2304 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
2305 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
2306 * on the related bits (if supported by the CPU) in the hope that
2307 * we can avoid VMWrites during vmx_set_efer().
2308 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002309 exec_control = __vm_entry_controls_get(vmcs01);
2310 exec_control |= vmcs12->vm_entry_controls;
2311 exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
Sean Christopherson55d23752018-12-03 13:53:18 -08002312 if (cpu_has_load_ia32_efer()) {
2313 if (guest_efer & EFER_LMA)
2314 exec_control |= VM_ENTRY_IA32E_MODE;
2315 if (guest_efer != host_efer)
2316 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2317 }
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002318 vm_entry_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002319
2320 /*
2321 * EXIT CONTROLS
2322 *
2323 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
2324 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
2325 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
2326 */
Sean Christopherson389ab252021-08-10 10:19:50 -07002327 exec_control = __vm_exit_controls_get(vmcs01);
Sean Christopherson55d23752018-12-03 13:53:18 -08002328 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2329 exec_control |= VM_EXIT_LOAD_IA32_EFER;
Sean Christopherson389ab252021-08-10 10:19:50 -07002330 else
2331 exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
Sean Christopherson3af80fe2019-05-07 12:18:00 -07002332 vm_exit_controls_set(vmx, exec_control);
Sean Christopherson55d23752018-12-03 13:53:18 -08002333
2334 /*
2335 * Interrupt/Exception Fields
2336 */
2337 if (vmx->nested.nested_run_pending) {
2338 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2339 vmcs12->vm_entry_intr_info_field);
2340 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2341 vmcs12->vm_entry_exception_error_code);
2342 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2343 vmcs12->vm_entry_instruction_len);
2344 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2345 vmcs12->guest_interruptibility_info);
2346 vmx->loaded_vmcs->nmi_known_unmasked =
2347 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2348 } else {
2349 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2350 }
2351}
2352
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002353static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002354{
2355 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2356
2357 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2358 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2359 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2360 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2361 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2362 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2363 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2364 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2365 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2366 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2367 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2368 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2369 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2370 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2371 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2372 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2373 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2374 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2375 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2376 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07002377 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2378 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
Sean Christopherson55d23752018-12-03 13:53:18 -08002379 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2380 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2381 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2382 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2383 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2384 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2385 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2386 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2387 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2388 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2389 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2390 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2391 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2392 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2393 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2394 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
Sean Christophersonfc387d82020-09-23 11:44:46 -07002395
2396 vmx->segment_cache.bitmask = 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08002397 }
2398
2399 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2400 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2401 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2402 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2403 vmcs12->guest_pending_dbg_exceptions);
2404 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2405 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2406
2407 /*
2408 * L1 may access the L2's PDPTR, so save them to construct
2409 * vmcs12
2410 */
2411 if (enable_ept) {
2412 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2413 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2414 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2415 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2416 }
Sean Christophersonc27e5b02019-05-07 09:06:39 -07002417
2418 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2419 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2420 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
Sean Christopherson55d23752018-12-03 13:53:18 -08002421 }
2422
2423 if (nested_cpu_has_xsaves(vmcs12))
2424 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2425
2426 /*
2427 * Whether page-faults are trapped is determined by a combination of
Paolo Bonzinia0c13432020-07-10 17:48:08 +02002428 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0
2429 * doesn't care about page faults then we should set all of these to
2430 * L1's desires. However, if L0 does care about (some) page faults, it
2431 * is not easy (if at all possible?) to merge L0 and L1's desires, we
2432 * simply ask to exit on each and every L2 page fault. This is done by
2433 * setting MASK=MATCH=0 and (see below) EB.PF=1.
Sean Christopherson55d23752018-12-03 13:53:18 -08002434 * Note that below we don't need special code to set EB.PF beyond the
2435 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
2436 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
2437 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
2438 */
Paolo Bonzinia0c13432020-07-10 17:48:08 +02002439 if (vmx_need_pf_intercept(&vmx->vcpu)) {
2440 /*
2441 * TODO: if both L0 and L1 need the same MASK and MATCH,
2442 * go ahead and use it?
2443 */
2444 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
2445 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
2446 } else {
2447 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
2448 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
2449 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002450
2451 if (cpu_has_vmx_apicv()) {
2452 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2453 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2454 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2455 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2456 }
2457
Aaron Lewis662f1d12019-11-07 21:14:39 -08002458 /*
2459 * Make sure the msr_autostore list is up to date before we set the
2460 * count in the vmcs02.
2461 */
2462 prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2463
2464 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
Sean Christopherson55d23752018-12-03 13:53:18 -08002465 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2466 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2467
2468 set_cr4_guest_host_mask(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08002469}
2470
2471/*
2472 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
2473 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
2474 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
2475 * guest in a way that will both be appropriate to L1's requests, and our
2476 * needs. In addition to modifying the active vmcs (which is vmcs02), this
2477 * function also has additional necessary side-effects, like setting various
2478 * vcpu->arch fields.
2479 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
2480 * is assigned to entry_failure_code on failure.
2481 */
2482static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Maxim Levitsky0f857222021-06-07 12:02:00 +03002483 bool from_vmentry,
Sean Christopherson68cda402020-05-11 15:05:29 -07002484 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08002485{
2486 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002487 bool load_guest_pdptrs_vmcs12 = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002488
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002489 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
Paolo Bonzinib1346ab2019-06-06 17:24:00 +02002490 prepare_vmcs02_rare(vmx, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08002491 vmx->nested.dirty_vmcs12 = false;
Sean Christopherson55d23752018-12-03 13:53:18 -08002492
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02002493 load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
2494 !(vmx->nested.hv_evmcs->hv_clean_fields &
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002495 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
Sean Christopherson55d23752018-12-03 13:53:18 -08002496 }
2497
2498 if (vmx->nested.nested_run_pending &&
2499 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2500 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2501 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2502 } else {
2503 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2504 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2505 }
Sean Christopherson3b013a22019-05-07 09:06:28 -07002506 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2507 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2508 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
Sean Christopherson55d23752018-12-03 13:53:18 -08002509 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2510
Sean Christopherson55d23752018-12-03 13:53:18 -08002511 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
2512 * bitwise-or of what L1 wants to trap for L2, and what we want to
2513 * trap. Note that CR0.TS also needs updating - we do this later.
2514 */
Jason Baronb6a7cc32021-01-14 22:27:54 -05002515 vmx_update_exception_bitmap(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002516 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2517 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2518
2519 if (vmx->nested.nested_run_pending &&
2520 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2521 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2522 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2523 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2524 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2525 }
2526
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01002527 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2528 vcpu->arch.l1_tsc_offset,
2529 vmx_get_l2_tsc_offset(vcpu),
2530 vmx_get_l2_tsc_multiplier(vcpu));
2531
2532 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2533 vcpu->arch.l1_tsc_scaling_ratio,
2534 vmx_get_l2_tsc_multiplier(vcpu));
2535
Sean Christopherson55d23752018-12-03 13:53:18 -08002536 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Sean Christopherson55d23752018-12-03 13:53:18 -08002537 if (kvm_has_tsc_control)
Ilias Stamatis1ab92872021-06-07 11:54:38 +01002538 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
Sean Christopherson55d23752018-12-03 13:53:18 -08002539
Sean Christopherson50b265a2020-03-20 14:28:19 -07002540 nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
Sean Christopherson55d23752018-12-03 13:53:18 -08002541
2542 if (nested_cpu_has_ept(vmcs12))
2543 nested_ept_init_mmu_context(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002544
2545 /*
2546 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
2547 * bits which we consider mandatory enabled.
2548 * The CR0_READ_SHADOW is what L2 should have expected to read given
2549 * the specifications by L1; It's not enough to take
2550 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
2551 * have more bits than L1 expected.
2552 */
2553 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2554 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2555
2556 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2557 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2558
2559 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2560 /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
2561 vmx_set_efer(vcpu, vcpu->arch.efer);
2562
2563 /*
2564 * Guest state is invalid and unrestricted guest is disabled,
2565 * which means L1 attempted VMEntry to L2 with invalid state.
2566 * Fail the VMEntry.
Maxim Levitskyc8607e42021-09-13 17:09:53 +03002567 *
2568 * However when force loading the guest state (SMM exit or
2569 * loading nested state after migration, it is possible to
2570 * have invalid guest state now, which will be later fixed by
2571 * restoring L2 register state
Sean Christopherson55d23752018-12-03 13:53:18 -08002572 */
Maxim Levitskyc8607e42021-09-13 17:09:53 +03002573 if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08002574 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christophersonc80add02019-04-11 12:18:09 -07002575 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002576 }
2577
2578 /* Shadow page tables on either EPT or shadow page tables. */
2579 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Maxim Levitsky0f857222021-06-07 12:02:00 +03002580 from_vmentry, entry_failure_code))
Sean Christophersonc80add02019-04-11 12:18:09 -07002581 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002582
Sean Christopherson04f11ef2019-09-27 14:45:16 -07002583 /*
2584 * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
2585 * on nested VM-Exit, which can occur without actually running L2 and
Paolo Bonzini727a7e22020-03-05 03:52:50 -05002586 * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
Sean Christopherson04f11ef2019-09-27 14:45:16 -07002587 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
2588 * transition to HLT instead of running L2.
2589 */
2590 if (enable_ept)
2591 vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
2592
Sean Christophersonc7554efc2019-05-07 09:06:40 -07002593 /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
2594 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2595 is_pae_paging(vcpu)) {
2596 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2597 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2598 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2599 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2600 }
2601
Sean Christopherson55d23752018-12-03 13:53:18 -08002602 if (!enable_ept)
2603 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2604
Oliver Upton71f73472019-11-13 16:17:19 -08002605 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
Oliver Uptond1968422019-12-13 16:33:58 -08002606 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
Dan Carpenterbfbb3072021-11-30 15:53:37 +03002607 vmcs12->guest_ia32_perf_global_ctrl))) {
2608 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Oliver Upton71f73472019-11-13 16:17:19 -08002609 return -EINVAL;
Dan Carpenterbfbb3072021-11-30 15:53:37 +03002610 }
Oliver Upton71f73472019-11-13 16:17:19 -08002611
Paolo Bonzinie9c16c72019-04-30 22:07:26 +02002612 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2613 kvm_rip_write(vcpu, vmcs12->guest_rip);
Vitaly Kuznetsovdc313382021-05-26 15:20:24 +02002614
2615 /*
2616 * It was observed that genuine Hyper-V running in L1 doesn't reset
2617 * 'hv_clean_fields' by itself, it only sets the corresponding dirty
2618 * bits when it changes a field in eVMCS. Mark all fields as clean
2619 * here.
2620 */
2621 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2622 vmx->nested.hv_evmcs->hv_clean_fields |=
2623 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2624
Sean Christopherson55d23752018-12-03 13:53:18 -08002625 return 0;
2626}
2627
2628static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2629{
Sean Christopherson5497b952019-07-11 08:58:29 -07002630 if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
2631 nested_cpu_has_virtual_nmis(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002632 return -EINVAL;
2633
Sean Christopherson5497b952019-07-11 08:58:29 -07002634 if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08002635 nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002636 return -EINVAL;
2637
2638 return 0;
2639}
2640
Sean Christophersonac6389a2020-03-02 18:02:38 -08002641static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
Sean Christopherson55d23752018-12-03 13:53:18 -08002642{
2643 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002644
2645 /* Check for memory type validity */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002646 switch (new_eptp & VMX_EPTP_MT_MASK) {
Sean Christopherson55d23752018-12-03 13:53:18 -08002647 case VMX_EPTP_MT_UC:
Sean Christopherson5497b952019-07-11 08:58:29 -07002648 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002649 return false;
2650 break;
2651 case VMX_EPTP_MT_WB:
Sean Christopherson5497b952019-07-11 08:58:29 -07002652 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002653 return false;
2654 break;
2655 default:
2656 return false;
2657 }
2658
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002659 /* Page-walk levels validity. */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002660 switch (new_eptp & VMX_EPTP_PWL_MASK) {
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002661 case VMX_EPTP_PWL_5:
2662 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
2663 return false;
2664 break;
2665 case VMX_EPTP_PWL_4:
2666 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
2667 return false;
2668 break;
2669 default:
Sean Christopherson55d23752018-12-03 13:53:18 -08002670 return false;
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08002671 }
Sean Christopherson55d23752018-12-03 13:53:18 -08002672
2673 /* Reserved bits should not be set */
Sean Christopherson636e8b72021-02-03 16:01:10 -08002674 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002675 return false;
2676
2677 /* AD, if set, should be supported */
Sean Christophersonac6389a2020-03-02 18:02:38 -08002678 if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002679 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002680 return false;
2681 }
2682
2683 return true;
2684}
2685
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002686/*
2687 * Checks related to VM-Execution Control Fields
2688 */
2689static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2690 struct vmcs12 *vmcs12)
2691{
2692 struct vcpu_vmx *vmx = to_vmx(vcpu);
2693
Sean Christopherson5497b952019-07-11 08:58:29 -07002694 if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2695 vmx->nested.msrs.pinbased_ctls_low,
2696 vmx->nested.msrs.pinbased_ctls_high)) ||
2697 CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2698 vmx->nested.msrs.procbased_ctls_low,
2699 vmx->nested.msrs.procbased_ctls_high)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002700 return -EINVAL;
2701
2702 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07002703 CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
2704 vmx->nested.msrs.secondary_ctls_low,
2705 vmx->nested.msrs.secondary_ctls_high)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002706 return -EINVAL;
2707
Sean Christopherson5497b952019-07-11 08:58:29 -07002708 if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002709 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2710 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2711 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2712 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2713 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2714 nested_vmx_check_nmi_controls(vmcs12) ||
2715 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2716 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2717 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2718 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
Sean Christopherson5497b952019-07-11 08:58:29 -07002719 CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002720 return -EINVAL;
2721
Sean Christophersonbc441212019-02-12 16:42:23 -08002722 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2723 nested_cpu_has_save_preemption_timer(vmcs12))
2724 return -EINVAL;
2725
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002726 if (nested_cpu_has_ept(vmcs12) &&
Sean Christophersonac6389a2020-03-02 18:02:38 -08002727 CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002728 return -EINVAL;
2729
2730 if (nested_cpu_has_vmfunc(vmcs12)) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002731 if (CC(vmcs12->vm_function_control &
2732 ~vmx->nested.msrs.vmfunc_controls))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002733 return -EINVAL;
2734
2735 if (nested_cpu_has_eptp_switching(vmcs12)) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002736 if (CC(!nested_cpu_has_ept(vmcs12)) ||
2737 CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
Krish Sadhukhan461b4ba2018-12-12 13:30:07 -05002738 return -EINVAL;
2739 }
2740 }
2741
2742 return 0;
2743}
2744
Krish Sadhukhan61446ba2018-12-12 13:30:09 -05002745/*
2746 * Checks related to VM-Exit Control Fields
2747 */
2748static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2749 struct vmcs12 *vmcs12)
2750{
2751 struct vcpu_vmx *vmx = to_vmx(vcpu);
2752
Sean Christopherson5497b952019-07-11 08:58:29 -07002753 if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
2754 vmx->nested.msrs.exit_ctls_low,
2755 vmx->nested.msrs.exit_ctls_high)) ||
2756 CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
Krish Sadhukhan61446ba2018-12-12 13:30:09 -05002757 return -EINVAL;
2758
2759 return 0;
2760}
2761
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002762/*
2763 * Checks related to VM-Entry Control Fields
2764 */
2765static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2766 struct vmcs12 *vmcs12)
Sean Christopherson55d23752018-12-03 13:53:18 -08002767{
2768 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08002769
Sean Christopherson5497b952019-07-11 08:58:29 -07002770 if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
2771 vmx->nested.msrs.entry_ctls_low,
2772 vmx->nested.msrs.entry_ctls_high)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002773 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002774
2775 /*
2776 * From the Intel SDM, volume 3:
2777 * Fields relevant to VM-entry event injection must be set properly.
2778 * These fields are the VM-entry interruption-information field, the
2779 * VM-entry exception error code, and the VM-entry instruction length.
2780 */
2781 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2782 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2783 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2784 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2785 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2786 bool should_have_error_code;
2787 bool urg = nested_cpu_has2(vmcs12,
2788 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2789 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2790
2791 /* VM-entry interruption-info field: interruption type */
Sean Christopherson5497b952019-07-11 08:58:29 -07002792 if (CC(intr_type == INTR_TYPE_RESERVED) ||
2793 CC(intr_type == INTR_TYPE_OTHER_EVENT &&
2794 !nested_cpu_supports_monitor_trap_flag(vcpu)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002795 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002796
2797 /* VM-entry interruption-info field: vector */
Sean Christopherson5497b952019-07-11 08:58:29 -07002798 if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2799 CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2800 CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002801 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002802
2803 /* VM-entry interruption-info field: deliver error code */
2804 should_have_error_code =
2805 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2806 x86_exception_has_error_code(vector);
Sean Christopherson5497b952019-07-11 08:58:29 -07002807 if (CC(has_error_code != should_have_error_code))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002808 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002809
2810 /* VM-entry exception error code */
Sean Christopherson5497b952019-07-11 08:58:29 -07002811 if (CC(has_error_code &&
Sean Christopherson567926c2019-10-01 09:21:23 -07002812 vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002813 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002814
2815 /* VM-entry interruption-info field: reserved bits */
Sean Christopherson5497b952019-07-11 08:58:29 -07002816 if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002817 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002818
2819 /* VM-entry instruction length */
2820 switch (intr_type) {
2821 case INTR_TYPE_SOFT_EXCEPTION:
2822 case INTR_TYPE_SOFT_INTR:
2823 case INTR_TYPE_PRIV_SW_EXCEPTION:
Sean Christopherson5497b952019-07-11 08:58:29 -07002824 if (CC(vmcs12->vm_entry_instruction_len > 15) ||
2825 CC(vmcs12->vm_entry_instruction_len == 0 &&
2826 CC(!nested_cpu_has_zero_length_injection(vcpu))))
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002827 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002828 }
2829 }
2830
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002831 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2832 return -EINVAL;
2833
2834 return 0;
2835}
2836
Sean Christopherson5478ba32019-04-11 12:18:06 -07002837static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2838 struct vmcs12 *vmcs12)
2839{
2840 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2841 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2842 nested_check_vm_entry_controls(vcpu, vmcs12))
Paolo Bonzini98d9e852019-04-12 10:19:57 +02002843 return -EINVAL;
Sean Christopherson5478ba32019-04-11 12:18:06 -07002844
Vitaly Kuznetsova8350232020-02-05 13:30:34 +01002845 if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
2846 return nested_evmcs_check_controls(vmcs12);
2847
Sean Christopherson5478ba32019-04-11 12:18:06 -07002848 return 0;
2849}
2850
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002851static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
2852 struct vmcs12 *vmcs12)
2853{
2854#ifdef CONFIG_X86_64
2855 if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
2856 !!(vcpu->arch.efer & EFER_LMA)))
2857 return -EINVAL;
2858#endif
2859 return 0;
2860}
2861
Paolo Bonzini98d9e852019-04-12 10:19:57 +02002862static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2863 struct vmcs12 *vmcs12)
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002864{
2865 bool ia32e;
2866
Sean Christopherson5497b952019-07-11 08:58:29 -07002867 if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
2868 CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
Sean Christopherson636e8b72021-02-03 16:01:10 -08002869 CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
Krish Sadhukhan254b2f32018-12-12 13:30:11 -05002870 return -EINVAL;
Krish Sadhukhan711eff32019-02-07 14:05:30 -05002871
Sean Christopherson5497b952019-07-11 08:58:29 -07002872 if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
2873 CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
Krish Sadhukhan711eff32019-02-07 14:05:30 -05002874 return -EINVAL;
2875
Krish Sadhukhanf6b0db1f2019-04-08 17:35:11 -04002876 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07002877 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
Krish Sadhukhanf6b0db1f2019-04-08 17:35:11 -04002878 return -EINVAL;
2879
Oliver Uptonc547cb62019-11-13 16:17:17 -08002880 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2881 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2882 vmcs12->host_ia32_perf_global_ctrl)))
2883 return -EINVAL;
2884
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002885#ifdef CONFIG_X86_64
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002886 ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002887#else
2888 ia32e = false;
2889#endif
2890
2891 if (ia32e) {
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002892 if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002893 return -EINVAL;
2894 } else {
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02002895 if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002896 CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
2897 CC((vmcs12->host_rip) >> 32))
2898 return -EINVAL;
2899 }
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002900
Sean Christopherson5497b952019-07-11 08:58:29 -07002901 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2902 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2903 CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2904 CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2905 CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2906 CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2907 CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2908 CC(vmcs12->host_cs_selector == 0) ||
2909 CC(vmcs12->host_tr_selector == 0) ||
2910 CC(vmcs12->host_ss_selector == 0 && !ia32e))
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002911 return -EINVAL;
2912
Sean Christopherson5497b952019-07-11 08:58:29 -07002913 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
2914 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
2915 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
2916 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
Paolo Bonzinifd3edd42019-09-25 18:33:53 +02002917 CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
2918 CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
Krish Sadhukhan58450382019-08-09 12:26:19 -07002919 return -EINVAL;
Krish Sadhukhan1ef23e12019-07-03 19:54:35 -04002920
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002921 /*
2922 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
2923 * IA32_EFER MSR must be 0 in the field for that register. In addition,
2924 * the values of the LMA and LME bits in the field must each be that of
2925 * the host address-space size VM-exit control.
2926 */
2927 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
Sean Christopherson5497b952019-07-11 08:58:29 -07002928 if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
2929 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
2930 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
Krish Sadhukhan254b2f32018-12-12 13:30:11 -05002931 return -EINVAL;
Krish Sadhukhan5fbf9632018-12-12 13:30:10 -05002932 }
2933
Sean Christopherson55d23752018-12-03 13:53:18 -08002934 return 0;
2935}
2936
2937static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2938 struct vmcs12 *vmcs12)
2939{
David Woodhouse7d0172b2021-11-15 16:50:25 +00002940 struct vcpu_vmx *vmx = to_vmx(vcpu);
2941 struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
2942 struct vmcs_hdr hdr;
Sean Christopherson55d23752018-12-03 13:53:18 -08002943
Yu Zhang64c78502021-09-30 01:51:53 +08002944 if (vmcs12->vmcs_link_pointer == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08002945 return 0;
2946
Sean Christopherson5497b952019-07-11 08:58:29 -07002947 if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
Sean Christopherson55d23752018-12-03 13:53:18 -08002948 return -EINVAL;
2949
David Woodhouse7d0172b2021-11-15 16:50:25 +00002950 if (ghc->gpa != vmcs12->vmcs_link_pointer &&
2951 CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
2952 vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
2953 return -EINVAL;
2954
2955 if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
2956 offsetof(struct vmcs12, hdr),
2957 sizeof(hdr))))
Sean Christopherson55d23752018-12-03 13:53:18 -08002958 return -EINVAL;
2959
David Woodhouse7d0172b2021-11-15 16:50:25 +00002960 if (CC(hdr.revision_id != VMCS12_REVISION) ||
2961 CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
2962 return -EINVAL;
KarimAllah Ahmed88925302019-01-31 21:24:41 +01002963
David Woodhouse7d0172b2021-11-15 16:50:25 +00002964 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08002965}
2966
Sean Christopherson55d23752018-12-03 13:53:18 -08002967/*
2968 * Checks related to Guest Non-register State
2969 */
2970static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2971{
Sean Christopherson5497b952019-07-11 08:58:29 -07002972 if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
Yadong Qibf0cd882020-11-06 14:51:22 +08002973 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
2974 vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
Sean Christopherson55d23752018-12-03 13:53:18 -08002975 return -EINVAL;
2976
2977 return 0;
2978}
2979
Sean Christopherson5478ba32019-04-11 12:18:06 -07002980static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2981 struct vmcs12 *vmcs12,
Sean Christopherson68cda402020-05-11 15:05:29 -07002982 enum vm_entry_failure_code *entry_failure_code)
Sean Christopherson55d23752018-12-03 13:53:18 -08002983{
2984 bool ia32e;
2985
Sean Christopherson68cda402020-05-11 15:05:29 -07002986 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christopherson55d23752018-12-03 13:53:18 -08002987
Sean Christopherson5497b952019-07-11 08:58:29 -07002988 if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
2989 CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
Sean Christophersonc80add02019-04-11 12:18:09 -07002990 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002991
Krish Sadhukhanb91991b2020-01-15 19:54:32 -05002992 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
2993 CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
2994 return -EINVAL;
2995
Krish Sadhukhande2bc2b2019-04-08 17:35:12 -04002996 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07002997 CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
Sean Christophersonc80add02019-04-11 12:18:09 -07002998 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08002999
3000 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
Sean Christopherson68cda402020-05-11 15:05:29 -07003001 *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
Sean Christophersonc80add02019-04-11 12:18:09 -07003002 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003003 }
3004
Oliver Uptonbfc6ad62019-11-13 16:17:16 -08003005 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
3006 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
3007 vmcs12->guest_ia32_perf_global_ctrl)))
3008 return -EINVAL;
3009
Sean Christopherson55d23752018-12-03 13:53:18 -08003010 /*
3011 * If the load IA32_EFER VM-entry control is 1, the following checks
3012 * are performed on the field for the IA32_EFER MSR:
3013 * - Bits reserved in the IA32_EFER MSR must be 0.
3014 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
3015 * the IA-32e mode guest VM-exit control. It must also be identical
3016 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
3017 * CR0.PG) is 1.
3018 */
3019 if (to_vmx(vcpu)->nested.nested_run_pending &&
3020 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3021 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
Sean Christopherson5497b952019-07-11 08:58:29 -07003022 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
3023 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
3024 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
3025 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
Sean Christophersonc80add02019-04-11 12:18:09 -07003026 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003027 }
3028
3029 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
Sean Christopherson5497b952019-07-11 08:58:29 -07003030 (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
3031 CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
Sean Christophersonc80add02019-04-11 12:18:09 -07003032 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003033
Sean Christopherson9c3e9222019-04-11 12:18:05 -07003034 if (nested_check_guest_non_reg_state(vmcs12))
Sean Christophersonc80add02019-04-11 12:18:09 -07003035 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003036
3037 return 0;
3038}
3039
Sean Christopherson453eafb2018-12-20 12:25:17 -08003040static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003041{
3042 struct vcpu_vmx *vmx = to_vmx(vcpu);
Lai Jiangshan15ad9762021-11-18 19:08:03 +08003043 unsigned long cr4;
Sean Christophersonf1727b42019-01-25 07:40:58 -08003044 bool vm_fail;
Sean Christopherson55d23752018-12-03 13:53:18 -08003045
3046 if (!nested_early_check)
3047 return 0;
3048
3049 if (vmx->msr_autoload.host.nr)
3050 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3051 if (vmx->msr_autoload.guest.nr)
3052 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3053
3054 preempt_disable();
3055
3056 vmx_prepare_switch_to_guest(vcpu);
3057
3058 /*
3059 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
3060 * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
Miaohe Lin49f933d2020-02-27 11:20:54 +08003061 * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
Sean Christopherson55d23752018-12-03 13:53:18 -08003062 * there is no need to preserve other bits or save/restore the field.
3063 */
3064 vmcs_writel(GUEST_RFLAGS, 0);
3065
Sean Christopherson55d23752018-12-03 13:53:18 -08003066 cr4 = cr4_read_shadow();
3067 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
3068 vmcs_writel(HOST_CR4, cr4);
3069 vmx->loaded_vmcs->host_state.cr4 = cr4;
3070 }
3071
Uros Bizjak150f17b2020-12-30 16:26:57 -08003072 vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
3073 vmx->loaded_vmcs->launched);
Sean Christopherson55d23752018-12-03 13:53:18 -08003074
Sean Christopherson55d23752018-12-03 13:53:18 -08003075 if (vmx->msr_autoload.host.nr)
3076 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
3077 if (vmx->msr_autoload.guest.nr)
3078 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
3079
Sean Christophersonf1727b42019-01-25 07:40:58 -08003080 if (vm_fail) {
Sean Christopherson380e0052019-07-11 08:58:30 -07003081 u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
3082
Wanpeng Li541e8862019-05-17 16:49:50 +08003083 preempt_enable();
Sean Christopherson380e0052019-07-11 08:58:30 -07003084
3085 trace_kvm_nested_vmenter_failed(
3086 "early hardware check VM-instruction error: ", error);
3087 WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003088 return 1;
3089 }
3090
3091 /*
3092 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
3093 */
Sean Christopherson55d23752018-12-03 13:53:18 -08003094 if (hw_breakpoint_active())
3095 set_debugreg(__this_cpu_read(cpu_dr7), 7);
Peter Zijlstra84b6a342020-05-29 23:27:36 +02003096 local_irq_enable();
Wanpeng Li541e8862019-05-17 16:49:50 +08003097 preempt_enable();
Sean Christopherson55d23752018-12-03 13:53:18 -08003098
3099 /*
3100 * A non-failing VMEntry means we somehow entered guest mode with
3101 * an illegal RIP, and that's just the tip of the iceberg. There
3102 * is no telling what memory has been modified or what state has
3103 * been exposed to unknown code. Hitting this all but guarantees
3104 * a (very critical) hardware issue.
3105 */
3106 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
3107 VMX_EXIT_REASONS_FAILED_VMENTRY));
3108
3109 return 0;
3110}
Sean Christopherson55d23752018-12-03 13:53:18 -08003111
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003112static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003113{
Sean Christopherson55d23752018-12-03 13:53:18 -08003114 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003115
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01003116 /*
3117 * hv_evmcs may end up being not mapped after migration (when
3118 * L2 was running), map it here to make sure vmcs12 changes are
3119 * properly reflected.
3120 */
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003121 if (vmx->nested.enlightened_vmcs_enabled &&
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02003122 vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003123 enum nested_evmptrld_status evmptrld_status =
3124 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
3125
3126 if (evmptrld_status == EVMPTRLD_VMFAIL ||
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003127 evmptrld_status == EVMPTRLD_ERROR)
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003128 return false;
Vitaly Kuznetsov8629b622021-05-26 15:20:25 +02003129
3130 /*
3131 * Post migration VMCS12 always provides the most actual
3132 * information, copy it to eVMCS upon entry.
3133 */
3134 vmx->nested.need_vmcs12_to_shadow_sync = true;
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003135 }
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01003136
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003137 return true;
3138}
3139
3140static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
3141{
3142 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3143 struct vcpu_vmx *vmx = to_vmx(vcpu);
3144 struct kvm_host_map *map;
3145 struct page *page;
3146 u64 hpa;
3147
Maxim Levitsky158a48e2021-06-07 12:02:03 +03003148 if (!vcpu->arch.pdptrs_from_userspace &&
3149 !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
Maxim Levitsky0f857222021-06-07 12:02:00 +03003150 /*
3151 * Reload the guest's PDPTRs since after a migration
3152 * the guest CR3 might be restored prior to setting the nested
3153 * state which can lead to a load of wrong PDPTRs.
3154 */
Lai Jiangshan2df4a5e2021-11-24 20:20:52 +08003155 if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
Maxim Levitsky0f857222021-06-07 12:02:00 +03003156 return false;
3157 }
3158
3159
Sean Christopherson55d23752018-12-03 13:53:18 -08003160 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3161 /*
3162 * Translate L1 physical address to host physical
3163 * address for vmcs02. Keep the page pinned, so this
3164 * physical address remains valid. We keep a reference
3165 * to it so we can release it later.
3166 */
3167 if (vmx->nested.apic_access_page) { /* shouldn't happen */
Liran Alonb11494b2019-11-21 00:31:47 +02003168 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -08003169 vmx->nested.apic_access_page = NULL;
3170 }
3171 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Sean Christopherson55d23752018-12-03 13:53:18 -08003172 if (!is_error_page(page)) {
3173 vmx->nested.apic_access_page = page;
3174 hpa = page_to_phys(vmx->nested.apic_access_page);
3175 vmcs_write64(APIC_ACCESS_ADDR, hpa);
3176 } else {
Jim Mattson671ddc72019-10-15 10:44:05 -07003177 pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
3178 __func__);
3179 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3180 vcpu->run->internal.suberror =
3181 KVM_INTERNAL_ERROR_EMULATION;
3182 vcpu->run->internal.ndata = 0;
3183 return false;
Sean Christopherson55d23752018-12-03 13:53:18 -08003184 }
3185 }
3186
3187 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003188 map = &vmx->nested.virtual_apic_map;
Sean Christopherson55d23752018-12-03 13:53:18 -08003189
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003190 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
3191 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
Paolo Bonzini69090812019-04-15 15:16:17 +02003192 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
3193 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
3194 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3195 /*
3196 * The processor will never use the TPR shadow, simply
3197 * clear the bit from the execution control. Such a
3198 * configuration is useless, but it happens in tests.
3199 * For any other configuration, failing the vm entry is
3200 * _not_ what the processor does but it's basically the
3201 * only possibility we have.
3202 */
Sean Christopherson2183f562019-05-07 12:17:56 -07003203 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
Paolo Bonzini69090812019-04-15 15:16:17 +02003204 } else {
Sean Christophersonca2f5462019-05-07 09:06:33 -07003205 /*
3206 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
3207 * force VM-Entry to fail.
3208 */
Yu Zhang64c78502021-09-30 01:51:53 +08003209 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, INVALID_GPA);
Sean Christopherson55d23752018-12-03 13:53:18 -08003210 }
3211 }
3212
3213 if (nested_cpu_has_posted_intr(vmcs12)) {
KarimAllah Ahmed3278e042019-01-31 21:24:38 +01003214 map = &vmx->nested.pi_desc_map;
3215
3216 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
3217 vmx->nested.pi_desc =
3218 (struct pi_desc *)(((void *)map->hva) +
3219 offset_in_page(vmcs12->posted_intr_desc_addr));
3220 vmcs_write64(POSTED_INTR_DESC_ADDR,
3221 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
Jim Mattson966eefb2021-06-04 10:26:06 -07003222 } else {
3223 /*
3224 * Defer the KVM_INTERNAL_EXIT until KVM tries to
3225 * access the contents of the VMCS12 posted interrupt
3226 * descriptor. (Note that KVM may do this when it
3227 * should not, per the architectural specification.)
3228 */
3229 vmx->nested.pi_desc = NULL;
3230 pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
Sean Christopherson55d23752018-12-03 13:53:18 -08003231 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003232 }
3233 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
Sean Christopherson2183f562019-05-07 12:17:56 -07003234 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
Sean Christopherson55d23752018-12-03 13:53:18 -08003235 else
Sean Christopherson2183f562019-05-07 12:17:56 -07003236 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003237
3238 return true;
3239}
3240
3241static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
3242{
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003243 if (!nested_get_evmcs_page(vcpu)) {
3244 pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
3245 __func__);
3246 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3247 vcpu->run->internal.suberror =
3248 KVM_INTERNAL_ERROR_EMULATION;
3249 vcpu->run->internal.ndata = 0;
3250
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003251 return false;
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02003252 }
Paolo Bonzini9a78e152021-01-08 11:43:08 -05003253
3254 if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
3255 return false;
3256
Jim Mattson671ddc72019-10-15 10:44:05 -07003257 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08003258}
3259
Sean Christopherson02f5fb22020-06-22 14:58:32 -07003260static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
3261{
3262 struct vmcs12 *vmcs12;
3263 struct vcpu_vmx *vmx = to_vmx(vcpu);
3264 gpa_t dst;
3265
3266 if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
3267 return 0;
3268
3269 if (WARN_ON_ONCE(vmx->nested.pml_full))
3270 return 1;
3271
3272 /*
3273 * Check if PML is enabled for the nested guest. Whether eptp bit 6 is
3274 * set is already checked as part of A/D emulation.
3275 */
3276 vmcs12 = get_vmcs12(vcpu);
3277 if (!nested_cpu_has_pml(vmcs12))
3278 return 0;
3279
3280 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
3281 vmx->nested.pml_full = true;
3282 return 1;
3283 }
3284
3285 gpa &= ~0xFFFull;
3286 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
3287
3288 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
3289 offset_in_page(dst), sizeof(gpa)))
3290 return 0;
3291
3292 vmcs12->guest_pml_index--;
3293
3294 return 0;
3295}
3296
Sean Christopherson55d23752018-12-03 13:53:18 -08003297/*
3298 * Intel's VMX Instruction Reference specifies a common set of prerequisites
3299 * for running VMX instructions (except VMXON, whose prerequisites are
3300 * slightly different). It also specifies what exception to inject otherwise.
3301 * Note that many of these exceptions have priority over VM exits, so they
3302 * don't have to be checked again here.
3303 */
3304static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
3305{
3306 if (!to_vmx(vcpu)->nested.vmxon) {
3307 kvm_queue_exception(vcpu, UD_VECTOR);
3308 return 0;
3309 }
3310
3311 if (vmx_get_cpl(vcpu)) {
3312 kvm_inject_gp(vcpu, 0);
3313 return 0;
3314 }
3315
3316 return 1;
3317}
3318
3319static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
3320{
3321 u8 rvi = vmx_get_rvi();
3322 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
3323
3324 return ((rvi & 0xf0) > (vppr & 0xf0));
3325}
3326
3327static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3328 struct vmcs12 *vmcs12);
3329
3330/*
3331 * If from_vmentry is false, this is being called from state restore (either RSM
3332 * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
Jim Mattson671ddc72019-10-15 10:44:05 -07003333 *
3334 * Returns:
Miaohe Lin463bfee2020-02-14 10:44:05 +08003335 * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
3336 * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
3337 * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
3338 * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
Sean Christopherson55d23752018-12-03 13:53:18 -08003339 */
Jim Mattson671ddc72019-10-15 10:44:05 -07003340enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
3341 bool from_vmentry)
Sean Christopherson55d23752018-12-03 13:53:18 -08003342{
3343 struct vcpu_vmx *vmx = to_vmx(vcpu);
3344 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson68cda402020-05-11 15:05:29 -07003345 enum vm_entry_failure_code entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003346 bool evaluate_pending_interrupts;
Sean Christopherson8e533242020-11-06 17:03:12 +08003347 union vmx_exit_reason exit_reason = {
3348 .basic = EXIT_REASON_INVALID_STATE,
3349 .failed_vmentry = 1,
3350 };
3351 u32 failed_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08003352
Sean Christopherson40e5f9082021-11-25 01:49:43 +00003353 kvm_service_local_tlb_flush_requests(vcpu);
Sean Christophersoneeeb4f62020-03-20 14:28:20 -07003354
Sean Christopherson2183f562019-05-07 12:17:56 -07003355 evaluate_pending_interrupts = exec_controls_get(vmx) &
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08003356 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08003357 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3358 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3359
3360 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3361 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3362 if (kvm_mpx_supported() &&
3363 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3364 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3365
Sean Christophersonf087a022019-06-07 11:55:34 -07003366 /*
3367 * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
3368 * nested early checks are disabled. In the event of a "late" VM-Fail,
3369 * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
3370 * software model to the pre-VMEntry host state. When EPT is disabled,
3371 * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
3372 * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
3373 * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
3374 * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
3375 * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
3376 * guaranteed to be overwritten with a shadow CR3 prior to re-entering
3377 * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
3378 * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
3379 * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
3380 * path would need to manually save/restore vmcs01.GUEST_CR3.
3381 */
3382 if (!enable_ept && !nested_early_check)
3383 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3384
Sean Christopherson55d23752018-12-03 13:53:18 -08003385 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3386
Sean Christopherson389ab252021-08-10 10:19:50 -07003387 prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08003388
3389 if (from_vmentry) {
Sean Christophersonb89d5ad2020-09-23 11:44:47 -07003390 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
3391 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson671ddc72019-10-15 10:44:05 -07003392 return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
Sean Christophersonb89d5ad2020-09-23 11:44:47 -07003393 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003394
3395 if (nested_vmx_check_vmentry_hw(vcpu)) {
3396 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson671ddc72019-10-15 10:44:05 -07003397 return NVMX_VMENTRY_VMFAIL;
Sean Christopherson55d23752018-12-03 13:53:18 -08003398 }
3399
Sean Christopherson68cda402020-05-11 15:05:29 -07003400 if (nested_vmx_check_guest_state(vcpu, vmcs12,
3401 &entry_failure_code)) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003402 exit_reason.basic = EXIT_REASON_INVALID_STATE;
Sean Christopherson68cda402020-05-11 15:05:29 -07003403 vmcs12->exit_qualification = entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003404 goto vmentry_fail_vmexit;
Sean Christopherson68cda402020-05-11 15:05:29 -07003405 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003406 }
3407
3408 enter_guest_mode(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003409
Maxim Levitsky0f857222021-06-07 12:02:00 +03003410 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003411 exit_reason.basic = EXIT_REASON_INVALID_STATE;
Sean Christopherson68cda402020-05-11 15:05:29 -07003412 vmcs12->exit_qualification = entry_failure_code;
Sean Christopherson55d23752018-12-03 13:53:18 -08003413 goto vmentry_fail_vmexit_guest_mode;
Sean Christopherson68cda402020-05-11 15:05:29 -07003414 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003415
3416 if (from_vmentry) {
Sean Christopherson68cda402020-05-11 15:05:29 -07003417 failed_index = nested_vmx_load_msr(vcpu,
3418 vmcs12->vm_entry_msr_load_addr,
3419 vmcs12->vm_entry_msr_load_count);
3420 if (failed_index) {
Sean Christopherson8e533242020-11-06 17:03:12 +08003421 exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
Sean Christopherson68cda402020-05-11 15:05:29 -07003422 vmcs12->exit_qualification = failed_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08003423 goto vmentry_fail_vmexit_guest_mode;
Sean Christopherson68cda402020-05-11 15:05:29 -07003424 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003425 } else {
3426 /*
3427 * The MMU is not initialized to point at the right entities yet and
3428 * "get pages" would need to read data from the guest (i.e. we will
3429 * need to perform gpa to hpa translation). Request a call
3430 * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
3431 * have already been set at vmentry time and should not be reset.
3432 */
Paolo Bonzini729c15c2020-09-22 06:53:57 -04003433 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003434 }
3435
3436 /*
3437 * If L1 had a pending IRQ/NMI until it executed
3438 * VMLAUNCH/VMRESUME which wasn't delivered because it was
3439 * disallowed (e.g. interrupts disabled), L0 needs to
3440 * evaluate if this pending event should cause an exit from L2
3441 * to L1 or delivered directly to L2 (e.g. In case L1 don't
3442 * intercept EXTERNAL_INTERRUPT).
3443 *
3444 * Usually this would be handled by the processor noticing an
3445 * IRQ/NMI window request, or checking RVI during evaluation of
3446 * pending virtual interrupts. However, this setting was done
3447 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
3448 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
3449 */
3450 if (unlikely(evaluate_pending_interrupts))
3451 kvm_make_request(KVM_REQ_EVENT, vcpu);
3452
3453 /*
Paolo Bonzini359a6c32019-01-29 19:14:46 +01003454 * Do not start the preemption timer hrtimer until after we know
3455 * we are successful, so that only nested_vmx_vmexit needs to cancel
3456 * the timer.
3457 */
3458 vmx->nested.preemption_timer_expired = false;
Peter Shier850448f2020-05-26 14:51:06 -07003459 if (nested_cpu_has_preemption_timer(vmcs12)) {
3460 u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
3461 vmx_start_preemption_timer(vcpu, timer_value);
3462 }
Paolo Bonzini359a6c32019-01-29 19:14:46 +01003463
3464 /*
Sean Christopherson55d23752018-12-03 13:53:18 -08003465 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
3466 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
3467 * returned as far as L1 is concerned. It will only return (and set
3468 * the success flag) when L2 exits (see nested_vmx_vmexit()).
3469 */
Jim Mattson671ddc72019-10-15 10:44:05 -07003470 return NVMX_VMENTRY_SUCCESS;
Sean Christopherson55d23752018-12-03 13:53:18 -08003471
3472 /*
3473 * A failed consistency check that leads to a VMExit during L1's
3474 * VMEnter to L2 is a variation of a normal VMexit, as explained in
3475 * 26.7 "VM-entry failures during or after loading guest state".
3476 */
3477vmentry_fail_vmexit_guest_mode:
Xiaoyao Li5e3d3942019-12-06 16:45:26 +08003478 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
Sean Christopherson55d23752018-12-03 13:53:18 -08003479 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3480 leave_guest_mode(vcpu);
3481
3482vmentry_fail_vmexit:
3483 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3484
3485 if (!from_vmentry)
Jim Mattson671ddc72019-10-15 10:44:05 -07003486 return NVMX_VMENTRY_VMEXIT;
Sean Christopherson55d23752018-12-03 13:53:18 -08003487
3488 load_vmcs12_host_state(vcpu, vmcs12);
Sean Christopherson8e533242020-11-06 17:03:12 +08003489 vmcs12->vm_exit_reason = exit_reason.full;
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003490 if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson3731905ef2019-05-07 08:36:27 -07003491 vmx->nested.need_vmcs12_to_shadow_sync = true;
Jim Mattson671ddc72019-10-15 10:44:05 -07003492 return NVMX_VMENTRY_VMEXIT;
Sean Christopherson55d23752018-12-03 13:53:18 -08003493}
3494
3495/*
3496 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
3497 * for running an L2 nested guest.
3498 */
3499static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3500{
3501 struct vmcs12 *vmcs12;
Jim Mattson671ddc72019-10-15 10:44:05 -07003502 enum nvmx_vmentry_status status;
Sean Christopherson55d23752018-12-03 13:53:18 -08003503 struct vcpu_vmx *vmx = to_vmx(vcpu);
3504 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003505 enum nested_evmptrld_status evmptrld_status;
Sean Christopherson55d23752018-12-03 13:53:18 -08003506
3507 if (!nested_vmx_check_permission(vcpu))
3508 return 1;
3509
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003510 evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
3511 if (evmptrld_status == EVMPTRLD_ERROR) {
3512 kvm_queue_exception(vcpu, UD_VECTOR);
Sean Christopherson55d23752018-12-03 13:53:18 -08003513 return 1;
Sean Christophersonfc595f32020-08-12 11:06:15 -07003514 } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
Vitaly Kuznetsovb6a06532020-03-09 16:52:13 +01003515 return nested_vmx_failInvalid(vcpu);
3516 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003517
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003518 if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
Yu Zhang64c78502021-09-30 01:51:53 +08003519 vmx->nested.current_vmptr == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08003520 return nested_vmx_failInvalid(vcpu);
3521
3522 vmcs12 = get_vmcs12(vcpu);
3523
3524 /*
3525 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
3526 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
3527 * rather than RFLAGS.ZF, and no error number is stored to the
3528 * VM-instruction error field.
3529 */
Sean Christophersonfc595f32020-08-12 11:06:15 -07003530 if (CC(vmcs12->hdr.shadow_vmcs))
Sean Christopherson55d23752018-12-03 13:53:18 -08003531 return nested_vmx_failInvalid(vcpu);
3532
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02003533 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02003534 copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
Sean Christopherson55d23752018-12-03 13:53:18 -08003535 /* Enlightened VMCS doesn't have launch state */
3536 vmcs12->launch_state = !launch;
3537 } else if (enable_shadow_vmcs) {
3538 copy_shadow_to_vmcs12(vmx);
3539 }
3540
3541 /*
3542 * The nested entry process starts with enforcing various prerequisites
3543 * on vmcs12 as required by the Intel SDM, and act appropriately when
3544 * they fail: As the SDM explains, some conditions should cause the
3545 * instruction to fail, while others will cause the instruction to seem
3546 * to succeed, but return an EXIT_REASON_INVALID_STATE.
3547 * To speed up the normal (success) code path, we should avoid checking
3548 * for misconfigurations which will anyway be caught by the processor
3549 * when using the merged vmcs02.
3550 */
Sean Christophersonfc595f32020-08-12 11:06:15 -07003551 if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003552 return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
Sean Christopherson55d23752018-12-03 13:53:18 -08003553
Sean Christophersonfc595f32020-08-12 11:06:15 -07003554 if (CC(vmcs12->launch_state == launch))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003555 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08003556 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3557 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3558
Paolo Bonzini98d9e852019-04-12 10:19:57 +02003559 if (nested_vmx_check_controls(vcpu, vmcs12))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003560 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson5478ba32019-04-11 12:18:06 -07003561
Maxim Levitskyaf957ee2021-11-15 15:18:36 +02003562 if (nested_vmx_check_address_space_size(vcpu, vmcs12))
3563 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3564
Paolo Bonzini98d9e852019-04-12 10:19:57 +02003565 if (nested_vmx_check_host_state(vcpu, vmcs12))
Sean Christophersonb2656e42020-06-08 18:56:07 -07003566 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003567
3568 /*
3569 * We're finally done with prerequisite checking, and can start with
3570 * the nested entry.
3571 */
3572 vmx->nested.nested_run_pending = 1;
Peter Shier850448f2020-05-26 14:51:06 -07003573 vmx->nested.has_preemption_timer_deadline = false;
Jim Mattson671ddc72019-10-15 10:44:05 -07003574 status = nested_vmx_enter_non_root_mode(vcpu, true);
3575 if (unlikely(status != NVMX_VMENTRY_SUCCESS))
3576 goto vmentry_failed;
Sean Christopherson55d23752018-12-03 13:53:18 -08003577
Sean Christopherson25bb2cf2020-08-12 10:51:29 -07003578 /* Emulate processing of posted interrupts on VM-Enter. */
3579 if (nested_cpu_has_posted_intr(vmcs12) &&
3580 kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
3581 vmx->nested.pi_pending = true;
3582 kvm_make_request(KVM_REQ_EVENT, vcpu);
3583 kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
3584 }
3585
Sean Christopherson55d23752018-12-03 13:53:18 -08003586 /* Hide L1D cache contents from the nested guest. */
3587 vmx->vcpu.arch.l1tf_flush_l1d = true;
3588
3589 /*
3590 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
3591 * also be used as part of restoring nVMX state for
3592 * snapshot restore (migration).
3593 *
3594 * In this flow, it is assumed that vmcs12 cache was
Ingo Molnar163b0992021-03-21 22:28:53 +01003595 * transferred as part of captured nVMX state and should
Sean Christopherson55d23752018-12-03 13:53:18 -08003596 * therefore not be read from guest memory (which may not
3597 * exist on destination host yet).
3598 */
3599 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3600
Yadong Qibf0cd882020-11-06 14:51:22 +08003601 switch (vmcs12->guest_activity_state) {
3602 case GUEST_ACTIVITY_HLT:
3603 /*
3604 * If we're entering a halted L2 vcpu and the L2 vcpu won't be
3605 * awakened by event injection or by an NMI-window VM-exit or
3606 * by an interrupt-window VM-exit, halt the vcpu.
3607 */
3608 if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3609 !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
3610 !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
3611 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3612 vmx->nested.nested_run_pending = 0;
Sean Christopherson14601792021-10-08 19:12:05 -07003613 return kvm_emulate_halt_noskip(vcpu);
Yadong Qibf0cd882020-11-06 14:51:22 +08003614 }
3615 break;
3616 case GUEST_ACTIVITY_WAIT_SIPI:
Sean Christopherson55d23752018-12-03 13:53:18 -08003617 vmx->nested.nested_run_pending = 0;
Yadong Qibf0cd882020-11-06 14:51:22 +08003618 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3619 break;
3620 default:
3621 break;
Sean Christopherson55d23752018-12-03 13:53:18 -08003622 }
Yadong Qibf0cd882020-11-06 14:51:22 +08003623
Sean Christopherson55d23752018-12-03 13:53:18 -08003624 return 1;
Jim Mattson671ddc72019-10-15 10:44:05 -07003625
3626vmentry_failed:
3627 vmx->nested.nested_run_pending = 0;
3628 if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
3629 return 0;
3630 if (status == NVMX_VMENTRY_VMEXIT)
3631 return 1;
3632 WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
Sean Christophersonb2656e42020-06-08 18:56:07 -07003633 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08003634}
3635
3636/*
3637 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
Miaohe Lin67b0ae42019-12-11 14:26:22 +08003638 * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
Sean Christopherson55d23752018-12-03 13:53:18 -08003639 * This function returns the new value we should put in vmcs12.guest_cr0.
3640 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
3641 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
3642 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
3643 * didn't trap the bit, because if L1 did, so would L0).
3644 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
3645 * been modified by L2, and L1 knows it. So just leave the old value of
3646 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
3647 * isn't relevant, because if L0 traps this bit it can set it to anything.
3648 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
3649 * changed these bits, and therefore they need to be updated, but L0
3650 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
3651 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
3652 */
3653static inline unsigned long
3654vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3655{
3656 return
3657 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3658 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3659 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3660 vcpu->arch.cr0_guest_owned_bits));
3661}
3662
3663static inline unsigned long
3664vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3665{
3666 return
3667 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3668 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3669 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3670 vcpu->arch.cr4_guest_owned_bits));
3671}
3672
3673static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3674 struct vmcs12 *vmcs12)
3675{
3676 u32 idt_vectoring;
3677 unsigned int nr;
3678
3679 if (vcpu->arch.exception.injected) {
3680 nr = vcpu->arch.exception.nr;
3681 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3682
3683 if (kvm_exception_is_soft(nr)) {
3684 vmcs12->vm_exit_instruction_len =
3685 vcpu->arch.event_exit_inst_len;
3686 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3687 } else
3688 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3689
3690 if (vcpu->arch.exception.has_error_code) {
3691 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3692 vmcs12->idt_vectoring_error_code =
3693 vcpu->arch.exception.error_code;
3694 }
3695
3696 vmcs12->idt_vectoring_info_field = idt_vectoring;
3697 } else if (vcpu->arch.nmi_injected) {
3698 vmcs12->idt_vectoring_info_field =
3699 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3700 } else if (vcpu->arch.interrupt.injected) {
3701 nr = vcpu->arch.interrupt.nr;
3702 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3703
3704 if (vcpu->arch.interrupt.soft) {
3705 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3706 vmcs12->vm_entry_instruction_len =
3707 vcpu->arch.event_exit_inst_len;
3708 } else
3709 idt_vectoring |= INTR_TYPE_EXT_INTR;
3710
3711 vmcs12->idt_vectoring_info_field = idt_vectoring;
3712 }
3713}
3714
3715
Paolo Bonzini96b100c2020-03-17 18:32:50 +01003716void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003717{
3718 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3719 gfn_t gfn;
3720
3721 /*
3722 * Don't need to mark the APIC access page dirty; it is never
3723 * written to by the CPU during APIC virtualization.
3724 */
3725
3726 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3727 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3728 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3729 }
3730
3731 if (nested_cpu_has_posted_intr(vmcs12)) {
3732 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3733 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3734 }
3735}
3736
Jim Mattson650293c2021-06-04 10:26:02 -07003737static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003738{
3739 struct vcpu_vmx *vmx = to_vmx(vcpu);
3740 int max_irr;
3741 void *vapic_page;
3742 u16 status;
3743
Jim Mattson966eefb2021-06-04 10:26:06 -07003744 if (!vmx->nested.pi_pending)
Jim Mattson650293c2021-06-04 10:26:02 -07003745 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08003746
Jim Mattson966eefb2021-06-04 10:26:06 -07003747 if (!vmx->nested.pi_desc)
3748 goto mmio_needed;
3749
Sean Christopherson55d23752018-12-03 13:53:18 -08003750 vmx->nested.pi_pending = false;
Jim Mattson966eefb2021-06-04 10:26:06 -07003751
Sean Christopherson55d23752018-12-03 13:53:18 -08003752 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
Jim Mattson650293c2021-06-04 10:26:02 -07003753 return 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08003754
3755 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3756 if (max_irr != 256) {
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003757 vapic_page = vmx->nested.virtual_apic_map.hva;
3758 if (!vapic_page)
Jim Mattson0fe998b2021-06-04 10:26:05 -07003759 goto mmio_needed;
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01003760
Sean Christopherson55d23752018-12-03 13:53:18 -08003761 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3762 vapic_page, &max_irr);
Sean Christopherson55d23752018-12-03 13:53:18 -08003763 status = vmcs_read16(GUEST_INTR_STATUS);
3764 if ((u8)max_irr > ((u8)status & 0xff)) {
3765 status &= ~0xff;
3766 status |= (u8)max_irr;
3767 vmcs_write16(GUEST_INTR_STATUS, status);
3768 }
3769 }
3770
3771 nested_mark_vmcs12_pages_dirty(vcpu);
Jim Mattson650293c2021-06-04 10:26:02 -07003772 return 0;
Jim Mattson0fe998b2021-06-04 10:26:05 -07003773
3774mmio_needed:
3775 kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
3776 return -ENXIO;
Sean Christopherson55d23752018-12-03 13:53:18 -08003777}
3778
3779static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3780 unsigned long exit_qual)
3781{
3782 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3783 unsigned int nr = vcpu->arch.exception.nr;
3784 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3785
3786 if (vcpu->arch.exception.has_error_code) {
3787 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3788 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3789 }
3790
3791 if (kvm_exception_is_soft(nr))
3792 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3793 else
3794 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3795
3796 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3797 vmx_get_nmi_mask(vcpu))
3798 intr_info |= INTR_INFO_UNBLOCK_NMI;
3799
3800 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3801}
3802
Oliver Upton684c0422020-02-07 02:36:05 -08003803/*
3804 * Returns true if a debug trap is pending delivery.
3805 *
3806 * In KVM, debug traps bear an exception payload. As such, the class of a #DB
3807 * exception may be inferred from the presence of an exception payload.
3808 */
3809static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
3810{
3811 return vcpu->arch.exception.pending &&
3812 vcpu->arch.exception.nr == DB_VECTOR &&
3813 vcpu->arch.exception.payload;
3814}
3815
3816/*
3817 * Certain VM-exits set the 'pending debug exceptions' field to indicate a
3818 * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
3819 * represents these debug traps with a payload that is said to be compatible
3820 * with the 'pending debug exceptions' field, write the payload to the VMCS
3821 * field if a VM-exit is delivered before the debug trap.
3822 */
3823static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
3824{
3825 if (vmx_pending_dbg_trap(vcpu))
3826 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
3827 vcpu->arch.exception.payload);
3828}
3829
Sean Christophersond2060bd2020-04-22 19:25:39 -07003830static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
3831{
3832 return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3833 to_vmx(vcpu)->nested.preemption_timer_expired;
3834}
3835
Sean Christophersona1c77ab2020-03-02 22:27:35 -08003836static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
Sean Christopherson55d23752018-12-03 13:53:18 -08003837{
3838 struct vcpu_vmx *vmx = to_vmx(vcpu);
3839 unsigned long exit_qual;
3840 bool block_nested_events =
3841 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003842 bool mtf_pending = vmx->nested.mtf_pending;
Liran Alon4b9852f2019-08-26 13:24:49 +03003843 struct kvm_lapic *apic = vcpu->arch.apic;
3844
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003845 /*
3846 * Clear the MTF state. If a higher priority VM-exit is delivered first,
3847 * this state is discarded.
3848 */
Oliver Upton5c8beb42020-04-06 20:12:37 +00003849 if (!block_nested_events)
3850 vmx->nested.mtf_pending = false;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003851
Liran Alon4b9852f2019-08-26 13:24:49 +03003852 if (lapic_in_kernel(vcpu) &&
3853 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
3854 if (block_nested_events)
3855 return -EBUSY;
Oliver Upton684c0422020-02-07 02:36:05 -08003856 nested_vmx_update_pending_dbg(vcpu);
Liran Alone64a8502019-11-11 14:16:05 +02003857 clear_bit(KVM_APIC_INIT, &apic->pending_events);
Yadong Qibf0cd882020-11-06 14:51:22 +08003858 if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
3859 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
3860 return 0;
3861 }
3862
3863 if (lapic_in_kernel(vcpu) &&
3864 test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3865 if (block_nested_events)
3866 return -EBUSY;
3867
3868 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3869 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3870 nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
3871 apic->sipi_vector & 0xFFUL);
Liran Alon4b9852f2019-08-26 13:24:49 +03003872 return 0;
3873 }
Sean Christopherson55d23752018-12-03 13:53:18 -08003874
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003875 /*
3876 * Process any exceptions that are not debug traps before MTF.
Maxim Levitsky4020da32021-04-01 17:38:14 +03003877 *
3878 * Note that only a pending nested run can block a pending exception.
3879 * Otherwise an injected NMI/interrupt should either be
3880 * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
3881 * while delivering the pending exception.
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003882 */
Maxim Levitsky4020da32021-04-01 17:38:14 +03003883
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003884 if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
Maxim Levitsky4020da32021-04-01 17:38:14 +03003885 if (vmx->nested.nested_run_pending)
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003886 return -EBUSY;
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003887 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3888 goto no_vmexit;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08003889 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3890 return 0;
3891 }
3892
3893 if (mtf_pending) {
3894 if (block_nested_events)
3895 return -EBUSY;
3896 nested_vmx_update_pending_dbg(vcpu);
3897 nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
3898 return 0;
3899 }
3900
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003901 if (vcpu->arch.exception.pending) {
Maxim Levitsky4020da32021-04-01 17:38:14 +03003902 if (vmx->nested.nested_run_pending)
Sean Christopherson55d23752018-12-03 13:53:18 -08003903 return -EBUSY;
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003904 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3905 goto no_vmexit;
Sean Christopherson55d23752018-12-03 13:53:18 -08003906 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3907 return 0;
3908 }
3909
Sean Christophersond2060bd2020-04-22 19:25:39 -07003910 if (nested_vmx_preemption_timer_pending(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003911 if (block_nested_events)
3912 return -EBUSY;
3913 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3914 return 0;
3915 }
3916
Sean Christopherson1cd2f0b2020-04-22 19:25:46 -07003917 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
3918 if (block_nested_events)
3919 return -EBUSY;
3920 goto no_vmexit;
3921 }
3922
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003923 if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003924 if (block_nested_events)
3925 return -EBUSY;
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003926 if (!nested_exit_on_nmi(vcpu))
3927 goto no_vmexit;
3928
Sean Christopherson55d23752018-12-03 13:53:18 -08003929 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3930 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3931 INTR_INFO_VALID_MASK, 0);
3932 /*
3933 * The NMI-triggered VM exit counts as injection:
3934 * clear this one and block further NMIs.
3935 */
3936 vcpu->arch.nmi_pending = 0;
3937 vmx_set_nmi_mask(vcpu, true);
3938 return 0;
3939 }
3940
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003941 if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08003942 if (block_nested_events)
3943 return -EBUSY;
Sean Christopherson15ff0b42020-04-22 19:25:45 -07003944 if (!nested_exit_on_intr(vcpu))
3945 goto no_vmexit;
Sean Christopherson55d23752018-12-03 13:53:18 -08003946 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3947 return 0;
3948 }
3949
Sean Christopherson6ce347a2020-04-22 19:25:38 -07003950no_vmexit:
Jim Mattson650293c2021-06-04 10:26:02 -07003951 return vmx_complete_nested_posted_interrupt(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08003952}
3953
3954static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3955{
3956 ktime_t remaining =
3957 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3958 u64 value;
3959
3960 if (ktime_to_ns(remaining) <= 0)
3961 return 0;
3962
3963 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3964 do_div(value, 1000000);
3965 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3966}
3967
Sean Christopherson7952d762019-05-07 08:36:29 -07003968static bool is_vmcs12_ext_field(unsigned long field)
Sean Christopherson55d23752018-12-03 13:53:18 -08003969{
Sean Christopherson7952d762019-05-07 08:36:29 -07003970 switch (field) {
3971 case GUEST_ES_SELECTOR:
3972 case GUEST_CS_SELECTOR:
3973 case GUEST_SS_SELECTOR:
3974 case GUEST_DS_SELECTOR:
3975 case GUEST_FS_SELECTOR:
3976 case GUEST_GS_SELECTOR:
3977 case GUEST_LDTR_SELECTOR:
3978 case GUEST_TR_SELECTOR:
3979 case GUEST_ES_LIMIT:
3980 case GUEST_CS_LIMIT:
3981 case GUEST_SS_LIMIT:
3982 case GUEST_DS_LIMIT:
3983 case GUEST_FS_LIMIT:
3984 case GUEST_GS_LIMIT:
3985 case GUEST_LDTR_LIMIT:
3986 case GUEST_TR_LIMIT:
3987 case GUEST_GDTR_LIMIT:
3988 case GUEST_IDTR_LIMIT:
3989 case GUEST_ES_AR_BYTES:
3990 case GUEST_DS_AR_BYTES:
3991 case GUEST_FS_AR_BYTES:
3992 case GUEST_GS_AR_BYTES:
3993 case GUEST_LDTR_AR_BYTES:
3994 case GUEST_TR_AR_BYTES:
3995 case GUEST_ES_BASE:
3996 case GUEST_CS_BASE:
3997 case GUEST_SS_BASE:
3998 case GUEST_DS_BASE:
3999 case GUEST_FS_BASE:
4000 case GUEST_GS_BASE:
4001 case GUEST_LDTR_BASE:
4002 case GUEST_TR_BASE:
4003 case GUEST_GDTR_BASE:
4004 case GUEST_IDTR_BASE:
4005 case GUEST_PENDING_DBG_EXCEPTIONS:
4006 case GUEST_BNDCFGS:
4007 return true;
4008 default:
4009 break;
4010 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004011
Sean Christopherson7952d762019-05-07 08:36:29 -07004012 return false;
4013}
4014
4015static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4016 struct vmcs12 *vmcs12)
4017{
4018 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004019
4020 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
4021 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
4022 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
4023 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
4024 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
4025 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
4026 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
4027 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
4028 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
4029 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
4030 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
4031 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
4032 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
4033 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
4034 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
4035 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
4036 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
4037 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
4038 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004039 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
4040 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
4041 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
4042 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
4043 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
4044 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
4045 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
4046 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
4047 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
4048 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
4049 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
4050 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
4051 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
4052 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
4053 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
Sean Christopherson7952d762019-05-07 08:36:29 -07004054 vmcs12->guest_pending_dbg_exceptions =
4055 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
4056 if (kvm_mpx_supported())
4057 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
4058
4059 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
4060}
4061
4062static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4063 struct vmcs12 *vmcs12)
4064{
4065 struct vcpu_vmx *vmx = to_vmx(vcpu);
4066 int cpu;
4067
4068 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
4069 return;
4070
4071
4072 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
4073
4074 cpu = get_cpu();
4075 vmx->loaded_vmcs = &vmx->nested.vmcs02;
Sean Christopherson1af1bb02020-05-06 16:58:50 -07004076 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
Sean Christopherson7952d762019-05-07 08:36:29 -07004077
4078 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4079
4080 vmx->loaded_vmcs = &vmx->vmcs01;
Sean Christopherson1af1bb02020-05-06 16:58:50 -07004081 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
Sean Christopherson7952d762019-05-07 08:36:29 -07004082 put_cpu();
4083}
4084
4085/*
4086 * Update the guest state fields of vmcs12 to reflect changes that
4087 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
4088 * VM-entry controls is also updated, since this is really a guest
4089 * state bit.)
4090 */
4091static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4092{
4093 struct vcpu_vmx *vmx = to_vmx(vcpu);
4094
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004095 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson7952d762019-05-07 08:36:29 -07004096 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4097
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004098 vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
4099 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
Sean Christopherson7952d762019-05-07 08:36:29 -07004100
4101 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
4102 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
4103
4104 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
4105 vmcs12->guest_rip = kvm_rip_read(vcpu);
4106 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
4107
4108 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
4109 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004110
4111 vmcs12->guest_interruptibility_info =
4112 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
Sean Christopherson7952d762019-05-07 08:36:29 -07004113
Sean Christopherson55d23752018-12-03 13:53:18 -08004114 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
4115 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
Yadong Qibf0cd882020-11-06 14:51:22 +08004116 else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4117 vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
Sean Christopherson55d23752018-12-03 13:53:18 -08004118 else
4119 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
4120
Paolo Bonzinib4b65b52019-01-29 19:12:35 +01004121 if (nested_cpu_has_preemption_timer(vmcs12) &&
Peter Shier850448f2020-05-26 14:51:06 -07004122 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
4123 !vmx->nested.nested_run_pending)
4124 vmcs12->vmx_preemption_timer_value =
4125 vmx_get_preemption_timer_value(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004126
4127 /*
4128 * In some cases (usually, nested EPT), L2 is allowed to change its
4129 * own CR3 without exiting. If it has changed it, we must keep it.
4130 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
4131 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
4132 *
4133 * Additionally, restore L2's PDPTR to vmcs12.
4134 */
4135 if (enable_ept) {
4136 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Sean Christophersonc7554efc2019-05-07 09:06:40 -07004137 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
4138 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
4139 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
4140 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
4141 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
4142 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004143 }
4144
4145 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
4146
4147 if (nested_cpu_has_vid(vmcs12))
4148 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
4149
4150 vmcs12->vm_entry_controls =
4151 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
4152 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
4153
Sean Christopherson699a1ac2019-05-07 09:06:37 -07004154 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
Sean Christopherson55d23752018-12-03 13:53:18 -08004155 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
Sean Christopherson55d23752018-12-03 13:53:18 -08004156
Sean Christopherson55d23752018-12-03 13:53:18 -08004157 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
4158 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Sean Christopherson55d23752018-12-03 13:53:18 -08004159}
4160
4161/*
4162 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
4163 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
4164 * and this function updates it to reflect the changes to the guest state while
4165 * L2 was running (and perhaps made some exits which were handled directly by L0
4166 * without going back to L1), and to reflect the exit reason.
4167 * Note that we do not have to copy here all VMCS fields, just those that
4168 * could have changed by the L2 guest or the exit - i.e., the guest-state and
4169 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
4170 * which already writes to vmcs12 directly.
4171 */
4172static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004173 u32 vm_exit_reason, u32 exit_intr_info,
Sean Christopherson55d23752018-12-03 13:53:18 -08004174 unsigned long exit_qualification)
4175{
Sean Christopherson55d23752018-12-03 13:53:18 -08004176 /* update exit information fields: */
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004177 vmcs12->vm_exit_reason = vm_exit_reason;
Sean Christopherson3c0c2ad2021-04-12 16:21:37 +12004178 if (to_vmx(vcpu)->exit_reason.enclave_mode)
4179 vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
Sean Christopherson55d23752018-12-03 13:53:18 -08004180 vmcs12->exit_qualification = exit_qualification;
4181 vmcs12->vm_exit_intr_info = exit_intr_info;
4182
4183 vmcs12->idt_vectoring_info_field = 0;
4184 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4185 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4186
4187 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
4188 vmcs12->launch_state = 1;
4189
4190 /* vm_entry_intr_info_field is cleared on exit. Emulate this
4191 * instead of reading the real value. */
4192 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
4193
4194 /*
4195 * Transfer the event that L0 or L1 may wanted to inject into
4196 * L2 to IDT_VECTORING_INFO_FIELD.
4197 */
4198 vmcs12_save_pending_event(vcpu, vmcs12);
Krish Sadhukhana0d4f802018-12-04 19:00:13 -05004199
4200 /*
4201 * According to spec, there's no need to store the guest's
4202 * MSRs if the exit is due to a VM-entry failure that occurs
4203 * during or after loading the guest state. Since this exit
4204 * does not fall in that category, we need to save the MSRs.
4205 */
4206 if (nested_vmx_store_msr(vcpu,
4207 vmcs12->vm_exit_msr_store_addr,
4208 vmcs12->vm_exit_msr_store_count))
4209 nested_vmx_abort(vcpu,
4210 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Sean Christopherson55d23752018-12-03 13:53:18 -08004211 }
4212
4213 /*
4214 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
4215 * preserved above and would only end up incorrectly in L1.
4216 */
4217 vcpu->arch.nmi_injected = false;
4218 kvm_clear_exception_queue(vcpu);
4219 kvm_clear_interrupt_queue(vcpu);
4220}
4221
4222/*
4223 * A part of what we need to when the nested L2 guest exits and we want to
4224 * run its L1 parent, is to reset L1's guest state to the host state specified
4225 * in vmcs12.
4226 * This function is to be called not only on normal nested exit, but also on
4227 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
4228 * Failures During or After Loading Guest State").
4229 * This function should be called when the active VMCS is L1's (vmcs01).
4230 */
4231static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4232 struct vmcs12 *vmcs12)
4233{
Sean Christopherson68cda402020-05-11 15:05:29 -07004234 enum vm_entry_failure_code ignored;
Sean Christopherson55d23752018-12-03 13:53:18 -08004235 struct kvm_segment seg;
Sean Christopherson55d23752018-12-03 13:53:18 -08004236
4237 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
4238 vcpu->arch.efer = vmcs12->host_ia32_efer;
4239 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4240 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
4241 else
4242 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
4243 vmx_set_efer(vcpu, vcpu->arch.efer);
4244
Paolo Bonzinie9c16c72019-04-30 22:07:26 +02004245 kvm_rsp_write(vcpu, vmcs12->host_rsp);
4246 kvm_rip_write(vcpu, vmcs12->host_rip);
Sean Christopherson55d23752018-12-03 13:53:18 -08004247 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
4248 vmx_set_interrupt_shadow(vcpu, 0);
4249
4250 /*
4251 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
4252 * actually changed, because vmx_set_cr0 refers to efer set above.
4253 *
4254 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
4255 * (KVM doesn't change it);
4256 */
Sean Christophersonfa71e952020-07-02 21:04:22 -07004257 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
Sean Christopherson55d23752018-12-03 13:53:18 -08004258 vmx_set_cr0(vcpu, vmcs12->host_cr0);
4259
4260 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
4261 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4262 vmx_set_cr4(vcpu, vmcs12->host_cr4);
4263
4264 nested_ept_uninit_mmu_context(vcpu);
4265
4266 /*
4267 * Only PDPTE load can fail as the value of cr3 was checked on entry and
4268 * couldn't have changed.
4269 */
Maxim Levitsky0f857222021-06-07 12:02:00 +03004270 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
Sean Christopherson55d23752018-12-03 13:53:18 -08004271 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
4272
Sean Christopherson50b265a2020-03-20 14:28:19 -07004273 nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
Sean Christopherson55d23752018-12-03 13:53:18 -08004274
4275 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
4276 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
4277 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
4278 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
4279 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
4280 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
4281 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
4282
4283 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
4284 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
4285 vmcs_write64(GUEST_BNDCFGS, 0);
4286
4287 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
4288 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
4289 vcpu->arch.pat = vmcs12->host_ia32_pat;
4290 }
4291 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Oliver Uptond1968422019-12-13 16:33:58 -08004292 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
4293 vmcs12->host_ia32_perf_global_ctrl));
Sean Christopherson55d23752018-12-03 13:53:18 -08004294
4295 /* Set L1 segment info according to Intel SDM
4296 27.5.2 Loading Host Segment and Descriptor-Table Registers */
4297 seg = (struct kvm_segment) {
4298 .base = 0,
4299 .limit = 0xFFFFFFFF,
4300 .selector = vmcs12->host_cs_selector,
4301 .type = 11,
4302 .present = 1,
4303 .s = 1,
4304 .g = 1
4305 };
4306 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4307 seg.l = 1;
4308 else
4309 seg.db = 1;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004310 __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004311 seg = (struct kvm_segment) {
4312 .base = 0,
4313 .limit = 0xFFFFFFFF,
4314 .type = 3,
4315 .present = 1,
4316 .s = 1,
4317 .db = 1,
4318 .g = 1
4319 };
4320 seg.selector = vmcs12->host_ds_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004321 __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004322 seg.selector = vmcs12->host_es_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004323 __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
Sean Christopherson55d23752018-12-03 13:53:18 -08004324 seg.selector = vmcs12->host_ss_selector;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004325 __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004326 seg.selector = vmcs12->host_fs_selector;
4327 seg.base = vmcs12->host_fs_base;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004328 __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004329 seg.selector = vmcs12->host_gs_selector;
4330 seg.base = vmcs12->host_gs_base;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004331 __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
Sean Christopherson55d23752018-12-03 13:53:18 -08004332 seg = (struct kvm_segment) {
4333 .base = vmcs12->host_tr_base,
4334 .limit = 0x67,
4335 .selector = vmcs12->host_tr_selector,
4336 .type = 11,
4337 .present = 1
4338 };
Sean Christopherson816be9e2021-07-13 09:33:07 -07004339 __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
Sean Christopherson55d23752018-12-03 13:53:18 -08004340
Sean Christophersonafc8de02021-07-13 09:32:40 -07004341 memset(&seg, 0, sizeof(seg));
4342 seg.unusable = 1;
Sean Christopherson816be9e2021-07-13 09:33:07 -07004343 __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
Sean Christopherson55d23752018-12-03 13:53:18 -08004344
4345 kvm_set_dr(vcpu, 7, 0x400);
4346 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4347
Sean Christopherson55d23752018-12-03 13:53:18 -08004348 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4349 vmcs12->vm_exit_msr_load_count))
4350 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Maxim Levitskydbab6102021-09-13 17:09:54 +03004351
4352 to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004353}
4354
4355static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
4356{
Sean Christophersoneb3db1b2020-09-23 11:03:58 -07004357 struct vmx_uret_msr *efer_msr;
Sean Christopherson55d23752018-12-03 13:53:18 -08004358 unsigned int i;
4359
4360 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
4361 return vmcs_read64(GUEST_IA32_EFER);
4362
4363 if (cpu_has_load_ia32_efer())
4364 return host_efer;
4365
4366 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
4367 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
4368 return vmx->msr_autoload.guest.val[i].value;
4369 }
4370
Sean Christophersond85a8032020-09-23 11:04:06 -07004371 efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
Sean Christopherson55d23752018-12-03 13:53:18 -08004372 if (efer_msr)
4373 return efer_msr->data;
4374
4375 return host_efer;
4376}
4377
4378static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
4379{
4380 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4381 struct vcpu_vmx *vmx = to_vmx(vcpu);
4382 struct vmx_msr_entry g, h;
Sean Christopherson55d23752018-12-03 13:53:18 -08004383 gpa_t gpa;
4384 u32 i, j;
4385
4386 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
4387
4388 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
4389 /*
4390 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
4391 * as vmcs01.GUEST_DR7 contains a userspace defined value
4392 * and vcpu->arch.dr7 is not squirreled away before the
4393 * nested VMENTER (not worth adding a variable in nested_vmx).
4394 */
4395 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
4396 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
4397 else
4398 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
4399 }
4400
4401 /*
4402 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
4403 * handle a variety of side effects to KVM's software model.
4404 */
4405 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
4406
Sean Christophersonfa71e952020-07-02 21:04:22 -07004407 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
Sean Christopherson55d23752018-12-03 13:53:18 -08004408 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
4409
4410 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4411 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
4412
4413 nested_ept_uninit_mmu_context(vcpu);
Sean Christophersonf087a022019-06-07 11:55:34 -07004414 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
Sean Christophersoncb3c1e22019-09-27 14:45:22 -07004415 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
Sean Christopherson55d23752018-12-03 13:53:18 -08004416
4417 /*
4418 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
4419 * from vmcs01 (if necessary). The PDPTRs are not loaded on
4420 * VMFail, like everything else we just need to ensure our
4421 * software model is up-to-date.
4422 */
Sean Christopherson9932b492020-04-15 13:34:50 -07004423 if (enable_ept && is_pae_paging(vcpu))
Sean Christophersonf087a022019-06-07 11:55:34 -07004424 ept_save_pdptrs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004425
4426 kvm_mmu_reset_context(vcpu);
4427
Sean Christopherson55d23752018-12-03 13:53:18 -08004428 /*
4429 * This nasty bit of open coding is a compromise between blindly
4430 * loading L1's MSRs using the exit load lists (incorrect emulation
4431 * of VMFail), leaving the nested VM's MSRs in the software model
4432 * (incorrect behavior) and snapshotting the modified MSRs (too
4433 * expensive since the lists are unbound by hardware). For each
4434 * MSR that was (prematurely) loaded from the nested VMEntry load
4435 * list, reload it from the exit load list if it exists and differs
4436 * from the guest value. The intent is to stuff host state as
4437 * silently as possible, not to fully process the exit load list.
4438 */
Sean Christopherson55d23752018-12-03 13:53:18 -08004439 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
4440 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
4441 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
4442 pr_debug_ratelimited(
4443 "%s read MSR index failed (%u, 0x%08llx)\n",
4444 __func__, i, gpa);
4445 goto vmabort;
4446 }
4447
4448 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
4449 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
4450 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
4451 pr_debug_ratelimited(
4452 "%s read MSR failed (%u, 0x%08llx)\n",
4453 __func__, j, gpa);
4454 goto vmabort;
4455 }
4456 if (h.index != g.index)
4457 continue;
4458 if (h.value == g.value)
4459 break;
4460
4461 if (nested_vmx_load_msr_check(vcpu, &h)) {
4462 pr_debug_ratelimited(
4463 "%s check failed (%u, 0x%x, 0x%x)\n",
4464 __func__, j, h.index, h.reserved);
4465 goto vmabort;
4466 }
4467
Sean Christophersonf20935d2019-09-05 14:22:54 -07004468 if (kvm_set_msr(vcpu, h.index, h.value)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08004469 pr_debug_ratelimited(
4470 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
4471 __func__, j, h.index, h.value);
4472 goto vmabort;
4473 }
4474 }
4475 }
4476
4477 return;
4478
4479vmabort:
4480 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4481}
4482
4483/*
4484 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
4485 * and modify vmcs12 to make it see what it would expect to see there if
4486 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
4487 */
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004488void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
Sean Christopherson55d23752018-12-03 13:53:18 -08004489 u32 exit_intr_info, unsigned long exit_qualification)
4490{
4491 struct vcpu_vmx *vmx = to_vmx(vcpu);
4492 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4493
4494 /* trying to cancel vmlaunch/vmresume is a bug */
4495 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4496
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08004497 /* Similarly, triple faults in L2 should never escape. */
4498 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
4499
Vitaly Kuznetsovf5c7e842021-05-03 17:08:51 +02004500 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
4501 /*
4502 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
4503 * Enlightened VMCS after migration and we still need to
4504 * do that when something is forcing L2->L1 exit prior to
4505 * the first L2 run.
4506 */
4507 (void)nested_get_evmcs_page(vcpu);
4508 }
Maxim Levitskyf2c7ef32021-01-07 11:38:51 +02004509
Sean Christopherson40e5f9082021-11-25 01:49:43 +00004510 /* Service pending TLB flush requests for L2 before switching to L1. */
4511 kvm_service_local_tlb_flush_requests(vcpu);
Sean Christophersoneeeb4f62020-03-20 14:28:20 -07004512
Peter Shier43fea4e2020-08-20 16:05:45 -07004513 /*
4514 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
4515 * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are
4516 * up-to-date before switching to L1.
4517 */
4518 if (enable_ept && is_pae_paging(vcpu))
4519 vmx_ept_load_pdptrs(vcpu);
4520
Sean Christopherson55d23752018-12-03 13:53:18 -08004521 leave_guest_mode(vcpu);
4522
Paolo Bonzinib4b65b52019-01-29 19:12:35 +01004523 if (nested_cpu_has_preemption_timer(vmcs12))
4524 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4525
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01004526 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
4527 vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
4528 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
4529 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
4530 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004531
4532 if (likely(!vmx->fail)) {
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004533 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
Sean Christophersonf4f83162019-05-07 08:36:26 -07004534
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004535 if (vm_exit_reason != -1)
4536 prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
4537 exit_intr_info, exit_qualification);
Sean Christopherson55d23752018-12-03 13:53:18 -08004538
4539 /*
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004540 * Must happen outside of sync_vmcs02_to_vmcs12() as it will
Sean Christopherson55d23752018-12-03 13:53:18 -08004541 * also be used to capture vmcs12 cache as part of
4542 * capturing nVMX state for snapshot (migration).
4543 *
4544 * Otherwise, this flush will dirty guest memory at a
4545 * point it is already assumed by user-space to be
4546 * immutable.
4547 */
4548 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08004549 } else {
4550 /*
4551 * The only expected VM-instruction error is "VM entry with
4552 * invalid control field(s)." Anything else indicates a
4553 * problem with L0. And we should never get here with a
4554 * VMFail of any type if early consistency checks are enabled.
4555 */
4556 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4557 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4558 WARN_ON_ONCE(nested_early_check);
4559 }
4560
4561 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4562
4563 /* Update any VMCS fields that might have changed while L2 ran */
4564 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4565 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4566 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Ilias Stamatis1ab92872021-06-07 11:54:38 +01004567 if (kvm_has_tsc_control)
4568 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
4569
Liran Alon02d496cf2019-11-11 14:30:55 +02004570 if (vmx->nested.l1_tpr_threshold != -1)
4571 vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
Sean Christopherson55d23752018-12-03 13:53:18 -08004572
Sean Christopherson55d23752018-12-03 13:53:18 -08004573 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4574 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4575 vmx_set_virtual_apic_mode(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08004576 }
4577
Makarand Sonarea85863c2021-02-12 16:50:12 -08004578 if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
4579 vmx->nested.update_vmcs01_cpu_dirty_logging = false;
4580 vmx_update_cpu_dirty_logging(vcpu);
4581 }
4582
Sean Christopherson55d23752018-12-03 13:53:18 -08004583 /* Unpin physical memory we referred to in vmcs02 */
4584 if (vmx->nested.apic_access_page) {
Liran Alonb11494b2019-11-21 00:31:47 +02004585 kvm_release_page_clean(vmx->nested.apic_access_page);
Sean Christopherson55d23752018-12-03 13:53:18 -08004586 vmx->nested.apic_access_page = NULL;
4587 }
KarimAllah Ahmed96c66e82019-01-31 21:24:37 +01004588 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
KarimAllah Ahmed3278e042019-01-31 21:24:38 +01004589 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4590 vmx->nested.pi_desc = NULL;
Sean Christopherson55d23752018-12-03 13:53:18 -08004591
Sean Christopherson1196cb92020-03-20 14:28:23 -07004592 if (vmx->nested.reload_vmcs01_apic_access_page) {
4593 vmx->nested.reload_vmcs01_apic_access_page = false;
4594 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4595 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004596
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004597 if ((vm_exit_reason != -1) &&
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02004598 (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
Sean Christopherson3731905ef2019-05-07 08:36:27 -07004599 vmx->nested.need_vmcs12_to_shadow_sync = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08004600
4601 /* in case we halted in L2 */
4602 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4603
4604 if (likely(!vmx->fail)) {
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004605 if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
Sean Christophersona1c77ab2020-03-02 22:27:35 -08004606 nested_exit_intr_ack_set(vcpu)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08004607 int irq = kvm_cpu_get_interrupt(vcpu);
4608 WARN_ON(irq < 0);
4609 vmcs12->vm_exit_intr_info = irq |
4610 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4611 }
4612
Sean Christopherson4dcefa32020-04-15 10:55:18 -07004613 if (vm_exit_reason != -1)
Sean Christopherson55d23752018-12-03 13:53:18 -08004614 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4615 vmcs12->exit_qualification,
4616 vmcs12->idt_vectoring_info_field,
4617 vmcs12->vm_exit_intr_info,
4618 vmcs12->vm_exit_intr_error_code,
4619 KVM_ISA_VMX);
4620
4621 load_vmcs12_host_state(vcpu, vmcs12);
4622
4623 return;
4624 }
4625
4626 /*
4627 * After an early L2 VM-entry failure, we're now back
4628 * in L1 which thinks it just finished a VMLAUNCH or
4629 * VMRESUME instruction, so we need to set the failure
4630 * flag and the VM-instruction error field of the VMCS
4631 * accordingly, and skip the emulated instruction.
4632 */
Sean Christophersonb2656e42020-06-08 18:56:07 -07004633 (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christopherson55d23752018-12-03 13:53:18 -08004634
4635 /*
4636 * Restore L1's host state to KVM's software model. We're here
4637 * because a consistency check was caught by hardware, which
4638 * means some amount of guest state has been propagated to KVM's
4639 * model and needs to be unwound to the host's state.
4640 */
4641 nested_vmx_restore_host_state(vcpu);
4642
4643 vmx->fail = 0;
4644}
4645
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08004646static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
4647{
4648 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
4649}
4650
Sean Christopherson55d23752018-12-03 13:53:18 -08004651/*
4652 * Decode the memory-address operand of a vmx instruction, as recorded on an
4653 * exit caused by such an instruction (run by a guest hypervisor).
4654 * On success, returns 0. When the operand is invalid, returns 1 and throws
Miaohe Lin49f933d2020-02-27 11:20:54 +08004655 * #UD, #GP, or #SS.
Sean Christopherson55d23752018-12-03 13:53:18 -08004656 */
4657int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004658 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
Sean Christopherson55d23752018-12-03 13:53:18 -08004659{
4660 gva_t off;
4661 bool exn;
4662 struct kvm_segment s;
4663
4664 /*
4665 * According to Vol. 3B, "Information for VM Exits Due to Instruction
4666 * Execution", on an exit, vmx_instruction_info holds most of the
4667 * addressing components of the operand. Only the displacement part
4668 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
4669 * For how an actual address is calculated from all these components,
4670 * refer to Vol. 1, "Operand Addressing".
4671 */
4672 int scaling = vmx_instruction_info & 3;
4673 int addr_size = (vmx_instruction_info >> 7) & 7;
4674 bool is_reg = vmx_instruction_info & (1u << 10);
4675 int seg_reg = (vmx_instruction_info >> 15) & 7;
4676 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4677 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4678 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4679 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4680
4681 if (is_reg) {
4682 kvm_queue_exception(vcpu, UD_VECTOR);
4683 return 1;
4684 }
4685
4686 /* Addr = segment_base + offset */
4687 /* offset = base + [index * scale] + displacement */
4688 off = exit_qualification; /* holds the displacement */
Sean Christopherson946c5222019-01-23 14:39:23 -08004689 if (addr_size == 1)
4690 off = (gva_t)sign_extend64(off, 31);
4691 else if (addr_size == 0)
4692 off = (gva_t)sign_extend64(off, 15);
Sean Christopherson55d23752018-12-03 13:53:18 -08004693 if (base_is_valid)
4694 off += kvm_register_read(vcpu, base_reg);
4695 if (index_is_valid)
Miaohe Line6302692020-02-15 10:44:22 +08004696 off += kvm_register_read(vcpu, index_reg) << scaling;
Sean Christopherson55d23752018-12-03 13:53:18 -08004697 vmx_get_segment(vcpu, &s, seg_reg);
Sean Christopherson55d23752018-12-03 13:53:18 -08004698
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004699 /*
4700 * The effective address, i.e. @off, of a memory operand is truncated
4701 * based on the address size of the instruction. Note that this is
4702 * the *effective address*, i.e. the address prior to accounting for
4703 * the segment's base.
4704 */
Sean Christopherson55d23752018-12-03 13:53:18 -08004705 if (addr_size == 1) /* 32 bit */
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004706 off &= 0xffffffff;
4707 else if (addr_size == 0) /* 16 bit */
4708 off &= 0xffff;
Sean Christopherson55d23752018-12-03 13:53:18 -08004709
4710 /* Checks for #GP/#SS exceptions. */
4711 exn = false;
4712 if (is_long_mode(vcpu)) {
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004713 /*
4714 * The virtual/linear address is never truncated in 64-bit
4715 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
4716 * address when using FS/GS with a non-zero base.
4717 */
Liran Alon6694e482019-07-15 18:47:44 +03004718 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4719 *ret = s.base + off;
4720 else
4721 *ret = off;
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004722
Sean Christopherson55d23752018-12-03 13:53:18 -08004723 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
4724 * non-canonical form. This is the only check on the memory
4725 * destination for long mode!
4726 */
4727 exn = is_noncanonical_address(*ret, vcpu);
Paolo Bonzinie0dfacb2019-01-30 17:25:38 +01004728 } else {
Sean Christopherson8570f9e2019-01-23 14:39:24 -08004729 /*
4730 * When not in long mode, the virtual/linear address is
4731 * unconditionally truncated to 32 bits regardless of the
4732 * address size.
4733 */
4734 *ret = (s.base + off) & 0xffffffff;
4735
Sean Christopherson55d23752018-12-03 13:53:18 -08004736 /* Protected mode: apply checks for segment validity in the
4737 * following order:
4738 * - segment type check (#GP(0) may be thrown)
4739 * - usability check (#GP(0)/#SS(0))
4740 * - limit check (#GP(0)/#SS(0))
4741 */
4742 if (wr)
4743 /* #GP(0) if the destination operand is located in a
4744 * read-only data segment or any code segment.
4745 */
4746 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4747 else
4748 /* #GP(0) if the source operand is located in an
4749 * execute-only code segment
4750 */
4751 exn = ((s.type & 0xa) == 8);
4752 if (exn) {
4753 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4754 return 1;
4755 }
4756 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
4757 */
4758 exn = (s.unusable != 0);
Sean Christopherson34333cc2019-01-23 14:39:25 -08004759
4760 /*
4761 * Protected mode: #GP(0)/#SS(0) if the memory operand is
4762 * outside the segment limit. All CPUs that support VMX ignore
4763 * limit checks for flat segments, i.e. segments with base==0,
4764 * limit==0xffffffff and of type expand-up data or code.
Sean Christopherson55d23752018-12-03 13:53:18 -08004765 */
Sean Christopherson34333cc2019-01-23 14:39:25 -08004766 if (!(s.base == 0 && s.limit == 0xffffffff &&
4767 ((s.type & 8) || !(s.type & 4))))
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004768 exn = exn || ((u64)off + len - 1 > s.limit);
Sean Christopherson55d23752018-12-03 13:53:18 -08004769 }
4770 if (exn) {
4771 kvm_queue_exception_e(vcpu,
4772 seg_reg == VCPU_SREG_SS ?
4773 SS_VECTOR : GP_VECTOR,
4774 0);
4775 return 1;
4776 }
4777
4778 return 0;
4779}
4780
Oliver Upton03a8871a2019-11-13 16:17:20 -08004781void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
4782{
4783 struct vcpu_vmx *vmx;
4784
4785 if (!nested_vmx_allowed(vcpu))
4786 return;
4787
4788 vmx = to_vmx(vcpu);
Sean Christophersonafaf0b22020-03-21 13:26:00 -07004789 if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
Oliver Upton03a8871a2019-11-13 16:17:20 -08004790 vmx->nested.msrs.entry_ctls_high |=
4791 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4792 vmx->nested.msrs.exit_ctls_high |=
4793 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4794 } else {
4795 vmx->nested.msrs.entry_ctls_high &=
4796 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4797 vmx->nested.msrs.exit_ctls_high &=
Chenyi Qiangc6b177a2020-08-28 16:56:21 +08004798 ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
Oliver Upton03a8871a2019-11-13 16:17:20 -08004799 }
4800}
4801
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004802static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
4803 int *ret)
Sean Christopherson55d23752018-12-03 13:53:18 -08004804{
4805 gva_t gva;
4806 struct x86_exception e;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004807 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08004808
Sean Christopherson5addc232020-04-15 13:34:53 -07004809 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03004810 vmcs_read32(VMX_INSTRUCTION_INFO), false,
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004811 sizeof(*vmpointer), &gva)) {
4812 *ret = 1;
4813 return -EINVAL;
4814 }
Sean Christopherson55d23752018-12-03 13:53:18 -08004815
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004816 r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
4817 if (r != X86EMUL_CONTINUE) {
Babu Moger3f3393b2020-09-11 14:29:05 -05004818 *ret = kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004819 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08004820 }
4821
4822 return 0;
4823}
4824
4825/*
4826 * Allocate a shadow VMCS and associate it with the currently loaded
4827 * VMCS, unless such a shadow VMCS already exists. The newly allocated
4828 * VMCS is also VMCLEARed, so that it is ready for use.
4829 */
4830static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4831{
4832 struct vcpu_vmx *vmx = to_vmx(vcpu);
4833 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4834
4835 /*
4836 * We should allocate a shadow vmcs for vmcs01 only when L1
4837 * executes VMXON and free it when L1 executes VMXOFF.
4838 * As it is invalid to execute VMXON twice, we shouldn't reach
4839 * here when vmcs01 already have an allocated shadow vmcs.
4840 */
4841 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4842
4843 if (!loaded_vmcs->shadow_vmcs) {
4844 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4845 if (loaded_vmcs->shadow_vmcs)
4846 vmcs_clear(loaded_vmcs->shadow_vmcs);
4847 }
4848 return loaded_vmcs->shadow_vmcs;
4849}
4850
4851static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4852{
4853 struct vcpu_vmx *vmx = to_vmx(vcpu);
4854 int r;
4855
4856 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4857 if (r < 0)
4858 goto out_vmcs02;
4859
Ben Gardon41836832019-02-11 11:02:52 -08004860 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
Sean Christopherson55d23752018-12-03 13:53:18 -08004861 if (!vmx->nested.cached_vmcs12)
4862 goto out_cached_vmcs12;
4863
Paolo Bonzini8503fea2021-11-22 18:20:16 -05004864 vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
Ben Gardon41836832019-02-11 11:02:52 -08004865 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
Sean Christopherson55d23752018-12-03 13:53:18 -08004866 if (!vmx->nested.cached_shadow_vmcs12)
4867 goto out_cached_shadow_vmcs12;
4868
4869 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4870 goto out_shadow_vmcs;
4871
4872 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
Jim Mattsonada00982020-05-08 13:36:42 -07004873 HRTIMER_MODE_ABS_PINNED);
Sean Christopherson55d23752018-12-03 13:53:18 -08004874 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4875
4876 vmx->nested.vpid02 = allocate_vpid();
4877
4878 vmx->nested.vmcs02_initialized = false;
4879 vmx->nested.vmxon = true;
Luwei Kangee85dec2018-10-24 16:05:16 +08004880
Sean Christopherson2ef76192020-03-02 15:56:22 -08004881 if (vmx_pt_mode_is_host_guest()) {
Luwei Kangee85dec2018-10-24 16:05:16 +08004882 vmx->pt_desc.guest.ctl = 0;
Aaron Lewis476c9bd2020-09-25 16:34:18 +02004883 pt_update_intercept_for_msr(vcpu);
Luwei Kangee85dec2018-10-24 16:05:16 +08004884 }
4885
Sean Christopherson55d23752018-12-03 13:53:18 -08004886 return 0;
4887
4888out_shadow_vmcs:
4889 kfree(vmx->nested.cached_shadow_vmcs12);
4890
4891out_cached_shadow_vmcs12:
4892 kfree(vmx->nested.cached_vmcs12);
4893
4894out_cached_vmcs12:
4895 free_loaded_vmcs(&vmx->nested.vmcs02);
4896
4897out_vmcs02:
4898 return -ENOMEM;
4899}
4900
Yu Zhanged7023a2021-09-09 01:17:31 +08004901/* Emulate the VMXON instruction. */
Sean Christopherson55d23752018-12-03 13:53:18 -08004902static int handle_vmon(struct kvm_vcpu *vcpu)
4903{
4904 int ret;
4905 gpa_t vmptr;
KarimAllah Ahmed2e408932019-01-31 21:24:31 +01004906 uint32_t revision;
Sean Christopherson55d23752018-12-03 13:53:18 -08004907 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson32ad73d2019-12-20 20:44:55 -08004908 const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
4909 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
Sean Christopherson55d23752018-12-03 13:53:18 -08004910
4911 /*
4912 * The Intel VMX Instruction Reference lists a bunch of bits that are
4913 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
Sean Christophersonc2fe3cd2020-10-06 18:44:15 -07004914 * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
Sean Christopherson55d23752018-12-03 13:53:18 -08004915 * Otherwise, we should fail with #UD. But most faulting conditions
4916 * have already been checked by hardware, prior to the VM-exit for
4917 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
4918 * that bit set to 1 in non-root mode.
4919 */
4920 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4921 kvm_queue_exception(vcpu, UD_VECTOR);
4922 return 1;
4923 }
4924
4925 /* CPL=0 must be checked manually. */
4926 if (vmx_get_cpl(vcpu)) {
4927 kvm_inject_gp(vcpu, 0);
4928 return 1;
4929 }
4930
4931 if (vmx->nested.vmxon)
Sean Christophersonb2656e42020-06-08 18:56:07 -07004932 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Sean Christopherson55d23752018-12-03 13:53:18 -08004933
4934 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4935 != VMXON_NEEDED_FEATURES) {
4936 kvm_inject_gp(vcpu, 0);
4937 return 1;
4938 }
4939
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02004940 if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
4941 return ret;
Sean Christopherson55d23752018-12-03 13:53:18 -08004942
4943 /*
4944 * SDM 3: 24.11.5
4945 * The first 4 bytes of VMXON region contain the supported
4946 * VMCS revision identifier
4947 *
4948 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
4949 * which replaces physical address width with 32
4950 */
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01004951 if (!page_address_valid(vcpu, vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08004952 return nested_vmx_failInvalid(vcpu);
4953
KarimAllah Ahmed2e408932019-01-31 21:24:31 +01004954 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4955 revision != VMCS12_REVISION)
Sean Christopherson55d23752018-12-03 13:53:18 -08004956 return nested_vmx_failInvalid(vcpu);
4957
Sean Christopherson55d23752018-12-03 13:53:18 -08004958 vmx->nested.vmxon_ptr = vmptr;
4959 ret = enter_vmx_operation(vcpu);
4960 if (ret)
4961 return ret;
4962
4963 return nested_vmx_succeed(vcpu);
4964}
4965
4966static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4967{
4968 struct vcpu_vmx *vmx = to_vmx(vcpu);
4969
Yu Zhang64c78502021-09-30 01:51:53 +08004970 if (vmx->nested.current_vmptr == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08004971 return;
4972
Sean Christopherson7952d762019-05-07 08:36:29 -07004973 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4974
Sean Christopherson55d23752018-12-03 13:53:18 -08004975 if (enable_shadow_vmcs) {
4976 /* copy to memory all shadowed fields in case
4977 they were modified */
4978 copy_shadow_to_vmcs12(vmx);
Sean Christopherson55d23752018-12-03 13:53:18 -08004979 vmx_disable_shadow_vmcs(vmx);
4980 }
4981 vmx->nested.posted_intr_nv = -1;
4982
4983 /* Flush VMCS12 to guest memory */
4984 kvm_vcpu_write_guest_page(vcpu,
4985 vmx->nested.current_vmptr >> PAGE_SHIFT,
4986 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
4987
4988 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
4989
Yu Zhang64c78502021-09-30 01:51:53 +08004990 vmx->nested.current_vmptr = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08004991}
4992
4993/* Emulate the VMXOFF instruction */
4994static int handle_vmoff(struct kvm_vcpu *vcpu)
4995{
4996 if (!nested_vmx_check_permission(vcpu))
4997 return 1;
Liran Alon4b9852f2019-08-26 13:24:49 +03004998
Sean Christopherson55d23752018-12-03 13:53:18 -08004999 free_nested(vcpu);
Liran Alon4b9852f2019-08-26 13:24:49 +03005000
5001 /* Process a latched INIT during time CPU was in VMX operation */
5002 kvm_make_request(KVM_REQ_EVENT, vcpu);
5003
Sean Christopherson55d23752018-12-03 13:53:18 -08005004 return nested_vmx_succeed(vcpu);
5005}
5006
5007/* Emulate the VMCLEAR instruction */
5008static int handle_vmclear(struct kvm_vcpu *vcpu)
5009{
5010 struct vcpu_vmx *vmx = to_vmx(vcpu);
5011 u32 zero = 0;
5012 gpa_t vmptr;
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02005013 u64 evmcs_gpa;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005014 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005015
5016 if (!nested_vmx_check_permission(vcpu))
5017 return 1;
5018
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005019 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5020 return r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005021
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01005022 if (!page_address_valid(vcpu, vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005023 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005024
5025 if (vmptr == vmx->nested.vmxon_ptr)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005026 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
Sean Christopherson55d23752018-12-03 13:53:18 -08005027
Vitaly Kuznetsov11e34912019-06-28 13:23:33 +02005028 /*
5029 * When Enlightened VMEntry is enabled on the calling CPU we treat
5030 * memory area pointer by vmptr as Enlightened VMCS (as there's no good
5031 * way to distinguish it from VMCS12) and we must not corrupt it by
5032 * writing to the non-existent 'launch_state' field. The area doesn't
5033 * have to be the currently active EVMCS on the calling CPU and there's
5034 * nothing KVM has to do to transition it from 'active' to 'non-active'
5035 * state. It is possible that the area will stay mapped as
5036 * vmx->nested.hv_evmcs but this shouldn't be a problem.
5037 */
5038 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
5039 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005040 if (vmptr == vmx->nested.current_vmptr)
5041 nested_release_vmcs12(vcpu);
5042
5043 kvm_vcpu_write_guest(vcpu,
5044 vmptr + offsetof(struct vmcs12,
5045 launch_state),
5046 &zero, sizeof(zero));
Vitaly Kuznetsov3b19b812021-05-26 15:20:21 +02005047 } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
5048 nested_release_evmcs(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005049 }
5050
5051 return nested_vmx_succeed(vcpu);
5052}
5053
Sean Christopherson55d23752018-12-03 13:53:18 -08005054/* Emulate the VMLAUNCH instruction */
5055static int handle_vmlaunch(struct kvm_vcpu *vcpu)
5056{
5057 return nested_vmx_run(vcpu, true);
5058}
5059
5060/* Emulate the VMRESUME instruction */
5061static int handle_vmresume(struct kvm_vcpu *vcpu)
5062{
5063
5064 return nested_vmx_run(vcpu, false);
5065}
5066
5067static int handle_vmread(struct kvm_vcpu *vcpu)
5068{
Jim Mattsondd2d6042019-12-06 15:46:35 -08005069 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5070 : get_vmcs12(vcpu);
Sean Christopherson5addc232020-04-15 13:34:53 -07005071 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005072 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5073 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzinif7eea632019-09-14 00:26:27 +02005074 struct x86_exception e;
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005075 unsigned long field;
5076 u64 value;
5077 gva_t gva = 0;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005078 short offset;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005079 int len, r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005080
5081 if (!nested_vmx_check_permission(vcpu))
5082 return 1;
5083
Jim Mattsondd2d6042019-12-06 15:46:35 -08005084 /*
Yu Zhang64c78502021-09-30 01:51:53 +08005085 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
Jim Mattsondd2d6042019-12-06 15:46:35 -08005086 * any VMREAD sets the ALU flags for VMfailInvalid.
5087 */
Yu Zhang64c78502021-09-30 01:51:53 +08005088 if (vmx->nested.current_vmptr == INVALID_GPA ||
Jim Mattsondd2d6042019-12-06 15:46:35 -08005089 (is_guest_mode(vcpu) &&
Yu Zhang64c78502021-09-30 01:51:53 +08005090 get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08005091 return nested_vmx_failInvalid(vcpu);
5092
Sean Christopherson55d23752018-12-03 13:53:18 -08005093 /* Decode instruction info and find the field to read */
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005094 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005095
5096 offset = vmcs_field_to_offset(field);
5097 if (offset < 0)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005098 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Sean Christopherson55d23752018-12-03 13:53:18 -08005099
Sean Christopherson7952d762019-05-07 08:36:29 -07005100 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
5101 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5102
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005103 /* Read the field, zero-extended to a u64 value */
5104 value = vmcs12_read_any(vmcs12, field, offset);
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005105
Sean Christopherson55d23752018-12-03 13:53:18 -08005106 /*
5107 * Now copy part of this value to register or memory, as requested.
5108 * Note that the number of bits actually copied is 32 or 64 depending
5109 * on the guest's mode (32 or 64 bit), not on the given field's length.
5110 */
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005111 if (instr_info & BIT(10)) {
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005112 kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
Sean Christopherson55d23752018-12-03 13:53:18 -08005113 } else {
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005114 len = is_64_bit_mode(vcpu) ? 8 : 4;
Sean Christopherson55d23752018-12-03 13:53:18 -08005115 if (get_vmx_mem_address(vcpu, exit_qualification,
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005116 instr_info, true, len, &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005117 return 1;
5118 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005119 r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
5120 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005121 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005122 }
5123
5124 return nested_vmx_succeed(vcpu);
5125}
5126
Sean Christophersone2174292019-05-07 08:36:28 -07005127static bool is_shadow_field_rw(unsigned long field)
5128{
5129 switch (field) {
5130#define SHADOW_FIELD_RW(x, y) case x:
5131#include "vmcs_shadow_fields.h"
5132 return true;
5133 default:
5134 break;
5135 }
5136 return false;
5137}
5138
5139static bool is_shadow_field_ro(unsigned long field)
5140{
5141 switch (field) {
5142#define SHADOW_FIELD_RO(x, y) case x:
5143#include "vmcs_shadow_fields.h"
5144 return true;
5145 default:
5146 break;
5147 }
5148 return false;
5149}
Sean Christopherson55d23752018-12-03 13:53:18 -08005150
5151static int handle_vmwrite(struct kvm_vcpu *vcpu)
5152{
Jim Mattsondd2d6042019-12-06 15:46:35 -08005153 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5154 : get_vmcs12(vcpu);
Sean Christopherson5addc232020-04-15 13:34:53 -07005155 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005156 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5157 struct vcpu_vmx *vmx = to_vmx(vcpu);
5158 struct x86_exception e;
5159 unsigned long field;
Sean Christopherson1c6f0b42019-05-07 08:36:25 -07005160 short offset;
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005161 gva_t gva;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005162 int len, r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005163
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005164 /*
5165 * The value to write might be 32 or 64 bits, depending on L1's long
Sean Christopherson55d23752018-12-03 13:53:18 -08005166 * mode, and eventually we need to write that into a field of several
5167 * possible lengths. The code below first zero-extends the value to 64
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005168 * bit (value), and then copies only the appropriate number of
Sean Christopherson55d23752018-12-03 13:53:18 -08005169 * bits into the vmcs12 field.
5170 */
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005171 u64 value = 0;
Sean Christopherson55d23752018-12-03 13:53:18 -08005172
5173 if (!nested_vmx_check_permission(vcpu))
5174 return 1;
5175
Jim Mattsondd2d6042019-12-06 15:46:35 -08005176 /*
Yu Zhang64c78502021-09-30 01:51:53 +08005177 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
Jim Mattsondd2d6042019-12-06 15:46:35 -08005178 * any VMWRITE sets the ALU flags for VMfailInvalid.
5179 */
Yu Zhang64c78502021-09-30 01:51:53 +08005180 if (vmx->nested.current_vmptr == INVALID_GPA ||
Jim Mattsondd2d6042019-12-06 15:46:35 -08005181 (is_guest_mode(vcpu) &&
Yu Zhang64c78502021-09-30 01:51:53 +08005182 get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
Sean Christopherson55d23752018-12-03 13:53:18 -08005183 return nested_vmx_failInvalid(vcpu);
5184
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005185 if (instr_info & BIT(10))
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005186 value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
Sean Christopherson55d23752018-12-03 13:53:18 -08005187 else {
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005188 len = is_64_bit_mode(vcpu) ? 8 : 4;
Sean Christopherson55d23752018-12-03 13:53:18 -08005189 if (get_vmx_mem_address(vcpu, exit_qualification,
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005190 instr_info, false, len, &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005191 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005192 r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
5193 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005194 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005195 }
5196
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005197 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
Sean Christopherson55d23752018-12-03 13:53:18 -08005198
Jim Mattson693e02c2019-12-06 15:46:36 -08005199 offset = vmcs_field_to_offset(field);
5200 if (offset < 0)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005201 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Jim Mattson693e02c2019-12-06 15:46:36 -08005202
Sean Christopherson55d23752018-12-03 13:53:18 -08005203 /*
5204 * If the vCPU supports "VMWRITE to any supported field in the
5205 * VMCS," then the "read-only" fields are actually read/write.
5206 */
5207 if (vmcs_field_readonly(field) &&
5208 !nested_cpu_has_vmwrite_any_field(vcpu))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005209 return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Sean Christopherson55d23752018-12-03 13:53:18 -08005210
Jim Mattsondd2d6042019-12-06 15:46:35 -08005211 /*
5212 * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
5213 * vmcs12, else we may crush a field or consume a stale value.
5214 */
5215 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
5216 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08005217
5218 /*
Sean Christophersonb6437802019-05-07 08:36:24 -07005219 * Some Intel CPUs intentionally drop the reserved bits of the AR byte
5220 * fields on VMWRITE. Emulate this behavior to ensure consistent KVM
5221 * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
5222 * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
5223 * from L1 will return a different value than VMREAD from L2 (L1 sees
5224 * the stripped down value, L2 sees the full value as stored by KVM).
Sean Christopherson55d23752018-12-03 13:53:18 -08005225 */
Sean Christophersonb6437802019-05-07 08:36:24 -07005226 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005227 value &= 0x1f0ff;
Sean Christophersonb6437802019-05-07 08:36:24 -07005228
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005229 vmcs12_write_any(vmcs12, field, offset, value);
Sean Christopherson55d23752018-12-03 13:53:18 -08005230
5231 /*
Sean Christophersone2174292019-05-07 08:36:28 -07005232 * Do not track vmcs12 dirty-state if in guest-mode as we actually
5233 * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated
5234 * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
5235 * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
Sean Christopherson55d23752018-12-03 13:53:18 -08005236 */
Sean Christophersone2174292019-05-07 08:36:28 -07005237 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
5238 /*
5239 * L1 can read these fields without exiting, ensure the
5240 * shadow VMCS is up-to-date.
5241 */
5242 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
5243 preempt_disable();
5244 vmcs_load(vmx->vmcs01.shadow_vmcs);
Sean Christophersonfadcead2019-05-07 08:36:23 -07005245
Jim Mattsonc90f4d02019-12-06 15:46:37 -08005246 __vmcs_writel(field, value);
Sean Christophersonfadcead2019-05-07 08:36:23 -07005247
Sean Christophersone2174292019-05-07 08:36:28 -07005248 vmcs_clear(vmx->vmcs01.shadow_vmcs);
5249 vmcs_load(vmx->loaded_vmcs->vmcs);
5250 preempt_enable();
Sean Christopherson55d23752018-12-03 13:53:18 -08005251 }
Sean Christophersone2174292019-05-07 08:36:28 -07005252 vmx->nested.dirty_vmcs12 = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005253 }
5254
5255 return nested_vmx_succeed(vcpu);
5256}
5257
5258static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
5259{
5260 vmx->nested.current_vmptr = vmptr;
5261 if (enable_shadow_vmcs) {
Sean Christophersonfe7f895d2019-05-07 12:17:57 -07005262 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005263 vmcs_write64(VMCS_LINK_POINTER,
5264 __pa(vmx->vmcs01.shadow_vmcs));
Sean Christopherson3731905ef2019-05-07 08:36:27 -07005265 vmx->nested.need_vmcs12_to_shadow_sync = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005266 }
5267 vmx->nested.dirty_vmcs12 = true;
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01005268 vmx->nested.force_msr_bitmap_recalc = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005269}
5270
5271/* Emulate the VMPTRLD instruction */
5272static int handle_vmptrld(struct kvm_vcpu *vcpu)
5273{
5274 struct vcpu_vmx *vmx = to_vmx(vcpu);
5275 gpa_t vmptr;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005276 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005277
5278 if (!nested_vmx_check_permission(vcpu))
5279 return 1;
5280
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005281 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5282 return r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005283
KarimAllah Ahmede0bf2662019-01-31 21:24:43 +01005284 if (!page_address_valid(vcpu, vmptr))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005285 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
Sean Christopherson55d23752018-12-03 13:53:18 -08005286
5287 if (vmptr == vmx->nested.vmxon_ptr)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005288 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
Sean Christopherson55d23752018-12-03 13:53:18 -08005289
5290 /* Forbid normal VMPTRLD if Enlightened version was used */
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02005291 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Sean Christopherson55d23752018-12-03 13:53:18 -08005292 return 1;
5293
5294 if (vmx->nested.current_vmptr != vmptr) {
David Woodhousecee66662021-11-15 16:50:26 +00005295 struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
5296 struct vmcs_hdr hdr;
Sean Christopherson55d23752018-12-03 13:53:18 -08005297
Paolo Bonzini8503fea2021-11-22 18:20:16 -05005298 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005299 /*
5300 * Reads from an unbacked page return all 1s,
5301 * which means that the 32 bits located at the
5302 * given physical address won't match the required
5303 * VMCS12_REVISION identifier.
5304 */
Sean Christophersonb2656e42020-06-08 18:56:07 -07005305 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005306 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Sean Christopherson55d23752018-12-03 13:53:18 -08005307 }
KarimAllah Ahmedb146b832019-01-31 21:24:35 +01005308
David Woodhousecee66662021-11-15 16:50:26 +00005309 if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
5310 offsetof(struct vmcs12, hdr),
5311 sizeof(hdr))) {
5312 return nested_vmx_fail(vcpu,
5313 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5314 }
KarimAllah Ahmedb146b832019-01-31 21:24:35 +01005315
David Woodhousecee66662021-11-15 16:50:26 +00005316 if (hdr.revision_id != VMCS12_REVISION ||
5317 (hdr.shadow_vmcs &&
Sean Christopherson55d23752018-12-03 13:53:18 -08005318 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
Sean Christophersonb2656e42020-06-08 18:56:07 -07005319 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005320 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5321 }
5322
5323 nested_release_vmcs12(vcpu);
5324
5325 /*
5326 * Load VMCS12 from guest memory since it is not already
5327 * cached.
5328 */
David Woodhousecee66662021-11-15 16:50:26 +00005329 if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
5330 VMCS12_SIZE)) {
5331 return nested_vmx_fail(vcpu,
5332 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5333 }
Sean Christopherson55d23752018-12-03 13:53:18 -08005334
5335 set_current_vmptr(vmx, vmptr);
5336 }
5337
5338 return nested_vmx_succeed(vcpu);
5339}
5340
5341/* Emulate the VMPTRST instruction */
5342static int handle_vmptrst(struct kvm_vcpu *vcpu)
5343{
Sean Christopherson5addc232020-04-15 13:34:53 -07005344 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005345 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5346 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
5347 struct x86_exception e;
5348 gva_t gva;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005349 int r;
Sean Christopherson55d23752018-12-03 13:53:18 -08005350
5351 if (!nested_vmx_check_permission(vcpu))
5352 return 1;
5353
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02005354 if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005355 return 1;
5356
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005357 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
5358 true, sizeof(gpa_t), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005359 return 1;
5360 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005361 r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
5362 sizeof(gpa_t), &e);
5363 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005364 return kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005365
Sean Christopherson55d23752018-12-03 13:53:18 -08005366 return nested_vmx_succeed(vcpu);
5367}
5368
5369/* Emulate the INVEPT instruction */
5370static int handle_invept(struct kvm_vcpu *vcpu)
5371{
5372 struct vcpu_vmx *vmx = to_vmx(vcpu);
5373 u32 vmx_instruction_info, types;
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005374 unsigned long type, roots_to_free;
5375 struct kvm_mmu *mmu;
Sean Christopherson55d23752018-12-03 13:53:18 -08005376 gva_t gva;
5377 struct x86_exception e;
5378 struct {
5379 u64 eptp, gpa;
5380 } operand;
Vipin Sharma329bd562021-11-09 17:44:25 +00005381 int i, r, gpr_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08005382
5383 if (!(vmx->nested.msrs.secondary_ctls_high &
5384 SECONDARY_EXEC_ENABLE_EPT) ||
5385 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
5386 kvm_queue_exception(vcpu, UD_VECTOR);
5387 return 1;
5388 }
5389
5390 if (!nested_vmx_check_permission(vcpu))
5391 return 1;
5392
5393 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Vipin Sharma329bd562021-11-09 17:44:25 +00005394 gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
5395 type = kvm_register_read(vcpu, gpr_index);
Sean Christopherson55d23752018-12-03 13:53:18 -08005396
5397 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
5398
5399 if (type >= 32 || !(types & (1 << type)))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005400 return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christopherson55d23752018-12-03 13:53:18 -08005401
5402 /* According to the Intel VMX instruction reference, the memory
5403 * operand is read even if it isn't needed (e.g., for type==global)
5404 */
Sean Christopherson5addc232020-04-15 13:34:53 -07005405 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005406 vmx_instruction_info, false, sizeof(operand), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005407 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005408 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5409 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005410 return kvm_handle_memory_failure(vcpu, r, &e);
Sean Christopherson55d23752018-12-03 13:53:18 -08005411
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005412 /*
5413 * Nested EPT roots are always held through guest_mmu,
5414 * not root_mmu.
5415 */
5416 mmu = &vcpu->arch.guest_mmu;
5417
Sean Christopherson55d23752018-12-03 13:53:18 -08005418 switch (type) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005419 case VMX_EPT_EXTENT_CONTEXT:
Sean Christophersoneed00302020-03-20 14:27:58 -07005420 if (!nested_vmx_check_eptp(vcpu, operand.eptp))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005421 return nested_vmx_fail(vcpu,
Sean Christophersoneed00302020-03-20 14:27:58 -07005422 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christophersonf8aa7e32020-03-20 14:27:59 -07005423
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005424 roots_to_free = 0;
Sean Christophersonbe01e8e2020-03-20 14:28:32 -07005425 if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005426 operand.eptp))
5427 roots_to_free |= KVM_MMU_ROOT_CURRENT;
5428
5429 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5430 if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
Sean Christophersonbe01e8e2020-03-20 14:28:32 -07005431 mmu->prev_roots[i].pgd,
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005432 operand.eptp))
5433 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5434 }
5435 break;
Sean Christophersoneed00302020-03-20 14:27:58 -07005436 case VMX_EPT_EXTENT_GLOBAL:
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005437 roots_to_free = KVM_MMU_ROOTS_ALL;
Sean Christopherson55d23752018-12-03 13:53:18 -08005438 break;
5439 default:
Sean Christophersonf9336e32020-05-04 08:35:06 -07005440 BUG();
Sean Christopherson55d23752018-12-03 13:53:18 -08005441 break;
5442 }
5443
Sean Christophersonce8fe7b2020-03-20 14:28:31 -07005444 if (roots_to_free)
5445 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
5446
Sean Christopherson55d23752018-12-03 13:53:18 -08005447 return nested_vmx_succeed(vcpu);
5448}
5449
5450static int handle_invvpid(struct kvm_vcpu *vcpu)
5451{
5452 struct vcpu_vmx *vmx = to_vmx(vcpu);
5453 u32 vmx_instruction_info;
5454 unsigned long type, types;
5455 gva_t gva;
5456 struct x86_exception e;
5457 struct {
5458 u64 vpid;
5459 u64 gla;
5460 } operand;
5461 u16 vpid02;
Vipin Sharma329bd562021-11-09 17:44:25 +00005462 int r, gpr_index;
Sean Christopherson55d23752018-12-03 13:53:18 -08005463
5464 if (!(vmx->nested.msrs.secondary_ctls_high &
5465 SECONDARY_EXEC_ENABLE_VPID) ||
5466 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
5467 kvm_queue_exception(vcpu, UD_VECTOR);
5468 return 1;
5469 }
5470
5471 if (!nested_vmx_check_permission(vcpu))
5472 return 1;
5473
5474 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Vipin Sharma329bd562021-11-09 17:44:25 +00005475 gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
5476 type = kvm_register_read(vcpu, gpr_index);
Sean Christopherson55d23752018-12-03 13:53:18 -08005477
5478 types = (vmx->nested.msrs.vpid_caps &
5479 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
5480
5481 if (type >= 32 || !(types & (1 << type)))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005482 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005483 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5484
5485 /* according to the intel vmx instruction reference, the memory
5486 * operand is read even if it isn't needed (e.g., for type==global)
5487 */
Sean Christopherson5addc232020-04-15 13:34:53 -07005488 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
Eugene Korenevskyfdb28612019-06-06 00:19:16 +03005489 vmx_instruction_info, false, sizeof(operand), &gva))
Sean Christopherson55d23752018-12-03 13:53:18 -08005490 return 1;
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005491 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5492 if (r != X86EMUL_CONTINUE)
Babu Moger3f3393b2020-09-11 14:29:05 -05005493 return kvm_handle_memory_failure(vcpu, r, &e);
Vitaly Kuznetsov7a35e512020-06-05 13:59:05 +02005494
Sean Christopherson55d23752018-12-03 13:53:18 -08005495 if (operand.vpid >> 16)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005496 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005497 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5498
5499 vpid02 = nested_get_vpid02(vcpu);
5500 switch (type) {
5501 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
5502 if (!operand.vpid ||
5503 is_noncanonical_address(operand.gla, vcpu))
Sean Christophersonb2656e42020-06-08 18:56:07 -07005504 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005505 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christophersonbc41d0c2020-03-20 14:28:09 -07005506 vpid_sync_vcpu_addr(vpid02, operand.gla);
Sean Christopherson55d23752018-12-03 13:53:18 -08005507 break;
5508 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
5509 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
5510 if (!operand.vpid)
Sean Christophersonb2656e42020-06-08 18:56:07 -07005511 return nested_vmx_fail(vcpu,
Sean Christopherson55d23752018-12-03 13:53:18 -08005512 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Sean Christopherson446ace42020-03-20 14:28:05 -07005513 vpid_sync_context(vpid02);
Sean Christopherson55d23752018-12-03 13:53:18 -08005514 break;
5515 case VMX_VPID_EXTENT_ALL_CONTEXT:
Sean Christopherson446ace42020-03-20 14:28:05 -07005516 vpid_sync_context(vpid02);
Sean Christopherson55d23752018-12-03 13:53:18 -08005517 break;
5518 default:
5519 WARN_ON_ONCE(1);
5520 return kvm_skip_emulated_instruction(vcpu);
5521 }
5522
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005523 /*
5524 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
Sean Christopherson25b62c62021-06-09 16:42:29 -07005525 * linear mappings for L2 (tagged with L2's VPID). Free all guest
5526 * roots as VPIDs are not tracked in the MMU role.
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005527 *
5528 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
5529 * an MMU when EPT is disabled.
5530 *
5531 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
5532 */
5533 if (!enable_ept)
Sean Christopherson25b62c62021-06-09 16:42:29 -07005534 kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
Junaid Shahidd6e3f832020-03-20 14:28:00 -07005535
Sean Christopherson55d23752018-12-03 13:53:18 -08005536 return nested_vmx_succeed(vcpu);
5537}
5538
5539static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
5540 struct vmcs12 *vmcs12)
5541{
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005542 u32 index = kvm_rcx_read(vcpu);
Sean Christophersonac6389a2020-03-02 18:02:38 -08005543 u64 new_eptp;
Sean Christopherson55d23752018-12-03 13:53:18 -08005544
Sean Christophersonc5ffd402021-06-09 16:42:35 -07005545 if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005546 return 1;
Sean Christopherson55d23752018-12-03 13:53:18 -08005547 if (index >= VMFUNC_EPTP_ENTRIES)
5548 return 1;
5549
Sean Christopherson55d23752018-12-03 13:53:18 -08005550 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
Sean Christophersonac6389a2020-03-02 18:02:38 -08005551 &new_eptp, index * 8, 8))
Sean Christopherson55d23752018-12-03 13:53:18 -08005552 return 1;
5553
Sean Christopherson55d23752018-12-03 13:53:18 -08005554 /*
5555 * If the (L2) guest does a vmfunc to the currently
5556 * active ept pointer, we don't have to do anything else
5557 */
Sean Christophersonac6389a2020-03-02 18:02:38 -08005558 if (vmcs12->ept_pointer != new_eptp) {
5559 if (!nested_vmx_check_eptp(vcpu, new_eptp))
Sean Christopherson55d23752018-12-03 13:53:18 -08005560 return 1;
5561
Sean Christophersonac6389a2020-03-02 18:02:38 -08005562 vmcs12->ept_pointer = new_eptp;
Sean Christopherson39353ab2021-06-09 16:42:31 -07005563 nested_ept_new_eptp(vcpu);
Sean Christophersonc805f5d2021-03-04 17:10:57 -08005564
Sean Christopherson39353ab2021-06-09 16:42:31 -07005565 if (!nested_cpu_has_vpid(vmcs12))
5566 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005567 }
5568
5569 return 0;
5570}
5571
5572static int handle_vmfunc(struct kvm_vcpu *vcpu)
5573{
5574 struct vcpu_vmx *vmx = to_vmx(vcpu);
5575 struct vmcs12 *vmcs12;
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005576 u32 function = kvm_rax_read(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005577
5578 /*
5579 * VMFUNC is only supported for nested guests, but we always enable the
5580 * secondary control for simplicity; for non-nested mode, fake that we
5581 * didn't by injecting #UD.
5582 */
5583 if (!is_guest_mode(vcpu)) {
5584 kvm_queue_exception(vcpu, UD_VECTOR);
5585 return 1;
5586 }
5587
5588 vmcs12 = get_vmcs12(vcpu);
Sean Christopherson546e8392021-06-09 16:42:34 -07005589
5590 /*
5591 * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
5592 * is enabled in vmcs02 if and only if it's enabled in vmcs12.
5593 */
5594 if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
5595 kvm_queue_exception(vcpu, UD_VECTOR);
5596 return 1;
5597 }
5598
Sean Christopherson0e752252021-06-09 16:42:22 -07005599 if (!(vmcs12->vm_function_control & BIT_ULL(function)))
Sean Christopherson55d23752018-12-03 13:53:18 -08005600 goto fail;
5601
5602 switch (function) {
5603 case 0:
5604 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5605 goto fail;
5606 break;
5607 default:
5608 goto fail;
5609 }
5610 return kvm_skip_emulated_instruction(vcpu);
5611
5612fail:
Sean Christopherson8e533242020-11-06 17:03:12 +08005613 /*
5614 * This is effectively a reflected VM-Exit, as opposed to a synthesized
5615 * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
5616 * EXIT_REASON_VMFUNC as the exit reason.
5617 */
5618 nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
Sean Christopherson87915852020-04-15 13:34:54 -07005619 vmx_get_intr_info(vcpu),
Sean Christopherson5addc232020-04-15 13:34:53 -07005620 vmx_get_exit_qual(vcpu));
Sean Christopherson55d23752018-12-03 13:53:18 -08005621 return 1;
5622}
5623
Oliver Uptone71237d2020-02-04 15:26:30 -08005624/*
5625 * Return true if an IO instruction with the specified port and size should cause
5626 * a VM-exit into L1.
5627 */
5628bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
5629 int size)
Sean Christopherson55d23752018-12-03 13:53:18 -08005630{
Oliver Uptone71237d2020-02-04 15:26:30 -08005631 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005632 gpa_t bitmap, last_bitmap;
Sean Christopherson55d23752018-12-03 13:53:18 -08005633 u8 b;
5634
Yu Zhang64c78502021-09-30 01:51:53 +08005635 last_bitmap = INVALID_GPA;
Sean Christopherson55d23752018-12-03 13:53:18 -08005636 b = -1;
5637
5638 while (size > 0) {
5639 if (port < 0x8000)
5640 bitmap = vmcs12->io_bitmap_a;
5641 else if (port < 0x10000)
5642 bitmap = vmcs12->io_bitmap_b;
5643 else
5644 return true;
5645 bitmap += (port & 0x7fff) / 8;
5646
5647 if (last_bitmap != bitmap)
5648 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5649 return true;
5650 if (b & (1 << (port & 7)))
5651 return true;
5652
5653 port++;
5654 size--;
5655 last_bitmap = bitmap;
5656 }
5657
5658 return false;
5659}
5660
Oliver Uptone71237d2020-02-04 15:26:30 -08005661static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5662 struct vmcs12 *vmcs12)
5663{
5664 unsigned long exit_qualification;
Oliver Upton35a57132020-02-04 15:26:31 -08005665 unsigned short port;
Oliver Uptone71237d2020-02-04 15:26:30 -08005666 int size;
5667
5668 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5669 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5670
Sean Christopherson5addc232020-04-15 13:34:53 -07005671 exit_qualification = vmx_get_exit_qual(vcpu);
Oliver Uptone71237d2020-02-04 15:26:30 -08005672
5673 port = exit_qualification >> 16;
5674 size = (exit_qualification & 7) + 1;
5675
5676 return nested_vmx_check_io_bitmaps(vcpu, port, size);
5677}
5678
Sean Christopherson55d23752018-12-03 13:53:18 -08005679/*
Miaohe Lin463bfee2020-02-14 10:44:05 +08005680 * Return 1 if we should exit from L2 to L1 to handle an MSR access,
Sean Christopherson55d23752018-12-03 13:53:18 -08005681 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
5682 * disinterest in the current event (read or write a specific MSR) by using an
5683 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
5684 */
5685static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
Sean Christopherson8e533242020-11-06 17:03:12 +08005686 struct vmcs12 *vmcs12,
5687 union vmx_exit_reason exit_reason)
Sean Christopherson55d23752018-12-03 13:53:18 -08005688{
Sean Christopherson2b3eaf82019-04-30 10:36:19 -07005689 u32 msr_index = kvm_rcx_read(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005690 gpa_t bitmap;
5691
5692 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5693 return true;
5694
5695 /*
5696 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
5697 * for the four combinations of read/write and low/high MSR numbers.
5698 * First we need to figure out which of the four to use:
5699 */
5700 bitmap = vmcs12->msr_bitmap;
Sean Christopherson8e533242020-11-06 17:03:12 +08005701 if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
Sean Christopherson55d23752018-12-03 13:53:18 -08005702 bitmap += 2048;
5703 if (msr_index >= 0xc0000000) {
5704 msr_index -= 0xc0000000;
5705 bitmap += 1024;
5706 }
5707
5708 /* Then read the msr_index'th bit from this bitmap: */
5709 if (msr_index < 1024*8) {
5710 unsigned char b;
5711 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5712 return true;
5713 return 1 & (b >> (msr_index & 7));
5714 } else
5715 return true; /* let L1 handle the wrong parameter */
5716}
5717
5718/*
5719 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
5720 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
5721 * intercept (via guest_host_mask etc.) the current event.
5722 */
5723static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5724 struct vmcs12 *vmcs12)
5725{
Sean Christopherson5addc232020-04-15 13:34:53 -07005726 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005727 int cr = exit_qualification & 15;
5728 int reg;
5729 unsigned long val;
5730
5731 switch ((exit_qualification >> 4) & 3) {
5732 case 0: /* mov to cr */
5733 reg = (exit_qualification >> 8) & 15;
Sean Christopherson27b4a9c42021-04-21 19:21:28 -07005734 val = kvm_register_read(vcpu, reg);
Sean Christopherson55d23752018-12-03 13:53:18 -08005735 switch (cr) {
5736 case 0:
5737 if (vmcs12->cr0_guest_host_mask &
5738 (val ^ vmcs12->cr0_read_shadow))
5739 return true;
5740 break;
5741 case 3:
Sean Christopherson55d23752018-12-03 13:53:18 -08005742 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5743 return true;
5744 break;
5745 case 4:
5746 if (vmcs12->cr4_guest_host_mask &
5747 (vmcs12->cr4_read_shadow ^ val))
5748 return true;
5749 break;
5750 case 8:
5751 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5752 return true;
5753 break;
5754 }
5755 break;
5756 case 2: /* clts */
5757 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5758 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5759 return true;
5760 break;
5761 case 1: /* mov from cr */
5762 switch (cr) {
5763 case 3:
5764 if (vmcs12->cpu_based_vm_exec_control &
5765 CPU_BASED_CR3_STORE_EXITING)
5766 return true;
5767 break;
5768 case 8:
5769 if (vmcs12->cpu_based_vm_exec_control &
5770 CPU_BASED_CR8_STORE_EXITING)
5771 return true;
5772 break;
5773 }
5774 break;
5775 case 3: /* lmsw */
5776 /*
5777 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
5778 * cr0. Other attempted changes are ignored, with no exit.
5779 */
5780 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5781 if (vmcs12->cr0_guest_host_mask & 0xe &
5782 (val ^ vmcs12->cr0_read_shadow))
5783 return true;
5784 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5785 !(vmcs12->cr0_read_shadow & 0x1) &&
5786 (val & 0x1))
5787 return true;
5788 break;
5789 }
5790 return false;
5791}
5792
Sean Christopherson72add912021-04-12 16:21:42 +12005793static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
5794 struct vmcs12 *vmcs12)
5795{
5796 u32 encls_leaf;
5797
5798 if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
5799 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
5800 return false;
5801
5802 encls_leaf = kvm_rax_read(vcpu);
5803 if (encls_leaf > 62)
5804 encls_leaf = 63;
5805 return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
5806}
5807
Sean Christopherson55d23752018-12-03 13:53:18 -08005808static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5809 struct vmcs12 *vmcs12, gpa_t bitmap)
5810{
5811 u32 vmx_instruction_info;
5812 unsigned long field;
5813 u8 b;
5814
5815 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5816 return true;
5817
5818 /* Decode instruction info and find the field to access */
5819 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5820 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5821
5822 /* Out-of-range fields always cause a VM exit from L2 to L1 */
5823 if (field >> 15)
5824 return true;
5825
5826 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5827 return true;
5828
5829 return 1 & (b >> (field & 7));
5830}
5831
Oliver Uptonb045ae92020-04-14 22:47:45 +00005832static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
5833{
5834 u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
5835
5836 if (nested_cpu_has_mtf(vmcs12))
5837 return true;
5838
5839 /*
5840 * An MTF VM-exit may be injected into the guest by setting the
5841 * interruption-type to 7 (other event) and the vector field to 0. Such
5842 * is the case regardless of the 'monitor trap flag' VM-execution
5843 * control.
5844 */
5845 return entry_intr_info == (INTR_INFO_VALID_MASK
5846 | INTR_TYPE_OTHER_EVENT);
5847}
5848
Sean Christopherson55d23752018-12-03 13:53:18 -08005849/*
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005850 * Return true if L0 wants to handle an exit from L2 regardless of whether or not
5851 * L1 wants the exit. Only call this when in is_guest_mode (L2).
Sean Christopherson55d23752018-12-03 13:53:18 -08005852 */
Sean Christopherson8e533242020-11-06 17:03:12 +08005853static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
5854 union vmx_exit_reason exit_reason)
Sean Christopherson55d23752018-12-03 13:53:18 -08005855{
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005856 u32 intr_info;
5857
Sean Christopherson8e533242020-11-06 17:03:12 +08005858 switch ((u16)exit_reason.basic) {
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005859 case EXIT_REASON_EXCEPTION_NMI:
Sean Christopherson87915852020-04-15 13:34:54 -07005860 intr_info = vmx_get_intr_info(vcpu);
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005861 if (is_nmi(intr_info))
5862 return true;
5863 else if (is_page_fault(intr_info))
Sean Christopherson18712c12021-08-11 21:56:15 -07005864 return vcpu->arch.apf.host_apf_flags ||
5865 vmx_need_pf_intercept(vcpu);
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005866 else if (is_debug(intr_info) &&
5867 vcpu->guest_debug &
5868 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5869 return true;
5870 else if (is_breakpoint(intr_info) &&
5871 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5872 return true;
Sean Christophersonb33bb782021-06-22 10:22:44 -07005873 else if (is_alignment_check(intr_info) &&
5874 !vmx_guest_inject_ac(vcpu))
5875 return true;
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005876 return false;
5877 case EXIT_REASON_EXTERNAL_INTERRUPT:
5878 return true;
5879 case EXIT_REASON_MCE_DURING_VMENTRY:
5880 return true;
5881 case EXIT_REASON_EPT_VIOLATION:
5882 /*
5883 * L0 always deals with the EPT violation. If nested EPT is
5884 * used, and the nested mmu code discovers that the address is
5885 * missing in the guest EPT table (EPT12), the EPT violation
5886 * will be injected with nested_ept_inject_page_fault()
5887 */
5888 return true;
5889 case EXIT_REASON_EPT_MISCONFIG:
5890 /*
5891 * L2 never uses directly L1's EPT, but rather L0's own EPT
5892 * table (shadow on EPT) or a merged EPT table that L0 built
5893 * (EPT on EPT). So any problems with the structure of the
5894 * table is L0's fault.
5895 */
5896 return true;
5897 case EXIT_REASON_PREEMPTION_TIMER:
5898 return true;
5899 case EXIT_REASON_PML_FULL:
Sean Christophersonc3bb9a22021-02-12 16:50:07 -08005900 /*
5901 * PML is emulated for an L1 VMM and should never be enabled in
5902 * vmcs02, always "handle" PML_FULL by exiting to userspace.
5903 */
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005904 return true;
5905 case EXIT_REASON_VMFUNC:
5906 /* VM functions are emulated through L2->L0 vmexits. */
5907 return true;
Chenyi Qiang24a996a2021-09-14 17:50:41 +08005908 case EXIT_REASON_BUS_LOCK:
5909 /*
5910 * At present, bus lock VM exit is never exposed to L1.
5911 * Handle L2's bus locks in L0 directly.
5912 */
5913 return true;
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005914 default:
5915 break;
5916 }
5917 return false;
5918}
5919
5920/*
5921 * Return 1 if L1 wants to intercept an exit from L2. Only call this when in
5922 * is_guest_mode (L2).
5923 */
Sean Christopherson8e533242020-11-06 17:03:12 +08005924static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
5925 union vmx_exit_reason exit_reason)
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005926{
Sean Christopherson55d23752018-12-03 13:53:18 -08005927 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Sean Christopherson9bd4af22020-04-21 00:53:27 -07005928 u32 intr_info;
Sean Christopherson55d23752018-12-03 13:53:18 -08005929
Sean Christopherson8e533242020-11-06 17:03:12 +08005930 switch ((u16)exit_reason.basic) {
Sean Christopherson55d23752018-12-03 13:53:18 -08005931 case EXIT_REASON_EXCEPTION_NMI:
Sean Christopherson87915852020-04-15 13:34:54 -07005932 intr_info = vmx_get_intr_info(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005933 if (is_nmi(intr_info))
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005934 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005935 else if (is_page_fault(intr_info))
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005936 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08005937 return vmcs12->exception_bitmap &
5938 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5939 case EXIT_REASON_EXTERNAL_INTERRUPT:
Sean Christopherson2c1f3322020-04-15 10:55:14 -07005940 return nested_exit_on_intr(vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08005941 case EXIT_REASON_TRIPLE_FAULT:
5942 return true;
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08005943 case EXIT_REASON_INTERRUPT_WINDOW:
5944 return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08005945 case EXIT_REASON_NMI_WINDOW:
Xiaoyao Li4e2a0bc2019-12-06 16:45:25 +08005946 return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
Sean Christopherson55d23752018-12-03 13:53:18 -08005947 case EXIT_REASON_TASK_SWITCH:
5948 return true;
5949 case EXIT_REASON_CPUID:
5950 return true;
5951 case EXIT_REASON_HLT:
5952 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5953 case EXIT_REASON_INVD:
5954 return true;
5955 case EXIT_REASON_INVLPG:
5956 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5957 case EXIT_REASON_RDPMC:
5958 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5959 case EXIT_REASON_RDRAND:
5960 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5961 case EXIT_REASON_RDSEED:
5962 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5963 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5964 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5965 case EXIT_REASON_VMREAD:
5966 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5967 vmcs12->vmread_bitmap);
5968 case EXIT_REASON_VMWRITE:
5969 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5970 vmcs12->vmwrite_bitmap);
5971 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5972 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5973 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5974 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5975 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5976 /*
5977 * VMX instructions trap unconditionally. This allows L1 to
5978 * emulate them for its L2 guest, i.e., allows 3-level nesting!
5979 */
5980 return true;
5981 case EXIT_REASON_CR_ACCESS:
5982 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5983 case EXIT_REASON_DR_ACCESS:
5984 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5985 case EXIT_REASON_IO_INSTRUCTION:
5986 return nested_vmx_exit_handled_io(vcpu, vmcs12);
5987 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
5988 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
5989 case EXIT_REASON_MSR_READ:
5990 case EXIT_REASON_MSR_WRITE:
5991 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5992 case EXIT_REASON_INVALID_STATE:
5993 return true;
5994 case EXIT_REASON_MWAIT_INSTRUCTION:
5995 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5996 case EXIT_REASON_MONITOR_TRAP_FLAG:
Oliver Uptonb045ae92020-04-14 22:47:45 +00005997 return nested_vmx_exit_handled_mtf(vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08005998 case EXIT_REASON_MONITOR_INSTRUCTION:
5999 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
6000 case EXIT_REASON_PAUSE_INSTRUCTION:
6001 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
6002 nested_cpu_has2(vmcs12,
6003 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
6004 case EXIT_REASON_MCE_DURING_VMENTRY:
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006005 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006006 case EXIT_REASON_TPR_BELOW_THRESHOLD:
6007 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
6008 case EXIT_REASON_APIC_ACCESS:
6009 case EXIT_REASON_APIC_WRITE:
6010 case EXIT_REASON_EOI_INDUCED:
6011 /*
6012 * The controls for "virtualize APIC accesses," "APIC-
6013 * register virtualization," and "virtual-interrupt
6014 * delivery" only come from vmcs12.
6015 */
6016 return true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006017 case EXIT_REASON_INVPCID:
6018 return
6019 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
6020 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
6021 case EXIT_REASON_WBINVD:
6022 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6023 case EXIT_REASON_XSETBV:
6024 return true;
6025 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
6026 /*
6027 * This should never happen, since it is not possible to
6028 * set XSS to a non-zero value---neither in L1 nor in L2.
6029 * If if it were, XSS would have to be checked against
6030 * the XSS exit bitmap in vmcs12.
6031 */
6032 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Tao Xubf653b72019-07-16 14:55:51 +08006033 case EXIT_REASON_UMWAIT:
6034 case EXIT_REASON_TPAUSE:
6035 return nested_cpu_has2(vmcs12,
6036 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
Sean Christopherson72add912021-04-12 16:21:42 +12006037 case EXIT_REASON_ENCLS:
6038 return nested_vmx_exit_handled_encls(vcpu, vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006039 default:
6040 return true;
6041 }
6042}
6043
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006044/*
6045 * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was
6046 * reflected into L1.
6047 */
Sean Christophersonf47baae2020-04-15 10:55:16 -07006048bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006049{
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006050 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson8e533242020-11-06 17:03:12 +08006051 union vmx_exit_reason exit_reason = vmx->exit_reason;
Sean Christopherson87796552020-04-22 17:11:27 -07006052 unsigned long exit_qual;
6053 u32 exit_intr_info;
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006054
6055 WARN_ON_ONCE(vmx->nested.nested_run_pending);
6056
6057 /*
6058 * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
6059 * has already loaded L2's state.
6060 */
6061 if (unlikely(vmx->fail)) {
6062 trace_kvm_nested_vmenter_failed(
6063 "hardware VM-instruction error: ",
6064 vmcs_read32(VM_INSTRUCTION_ERROR));
6065 exit_intr_info = 0;
6066 exit_qual = 0;
6067 goto reflect_vmexit;
6068 }
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006069
David Edmondson0a62a032021-09-20 11:37:35 +01006070 trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX);
Sean Christopherson236871b2020-04-15 10:55:13 -07006071
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006072 /* If L0 (KVM) wants the exit, it trumps L1's desires. */
6073 if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
6074 return false;
6075
6076 /* If L1 doesn't want the exit, handle it in L0. */
6077 if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006078 return false;
6079
6080 /*
Sean Christopherson1d283062020-04-15 10:55:15 -07006081 * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For
6082 * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
6083 * need to be synthesized by querying the in-kernel LAPIC, but external
6084 * interrupts are never reflected to L1 so it's a non-issue.
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006085 */
Sean Christopherson02f19652020-09-23 13:13:49 -07006086 exit_intr_info = vmx_get_intr_info(vcpu);
Sean Christophersonf315f2b2020-09-23 13:13:45 -07006087 if (is_exception_with_error_code(exit_intr_info)) {
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006088 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6089
6090 vmcs12->vm_exit_intr_error_code =
6091 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
6092 }
Sean Christopherson02f19652020-09-23 13:13:49 -07006093 exit_qual = vmx_get_exit_qual(vcpu);
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006094
Sean Christophersonfbdd5022020-04-15 10:55:12 -07006095reflect_vmexit:
Sean Christopherson8e533242020-11-06 17:03:12 +08006096 nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
Sean Christopherson7b7bd872020-04-15 10:55:11 -07006097 return true;
6098}
Sean Christopherson55d23752018-12-03 13:53:18 -08006099
6100static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
6101 struct kvm_nested_state __user *user_kvm_nested_state,
6102 u32 user_data_size)
6103{
6104 struct vcpu_vmx *vmx;
6105 struct vmcs12 *vmcs12;
6106 struct kvm_nested_state kvm_state = {
6107 .flags = 0,
Liran Alon6ca00df2019-06-16 15:03:10 +03006108 .format = KVM_STATE_NESTED_FORMAT_VMX,
Sean Christopherson55d23752018-12-03 13:53:18 -08006109 .size = sizeof(kvm_state),
Peter Shier850448f2020-05-26 14:51:06 -07006110 .hdr.vmx.flags = 0,
Yu Zhang64c78502021-09-30 01:51:53 +08006111 .hdr.vmx.vmxon_pa = INVALID_GPA,
6112 .hdr.vmx.vmcs12_pa = INVALID_GPA,
Peter Shier850448f2020-05-26 14:51:06 -07006113 .hdr.vmx.preemption_timer_deadline = 0,
Sean Christopherson55d23752018-12-03 13:53:18 -08006114 };
Liran Alon6ca00df2019-06-16 15:03:10 +03006115 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6116 &user_kvm_nested_state->data.vmx[0];
Sean Christopherson55d23752018-12-03 13:53:18 -08006117
6118 if (!vcpu)
Liran Alon6ca00df2019-06-16 15:03:10 +03006119 return kvm_state.size + sizeof(*user_vmx_nested_state);
Sean Christopherson55d23752018-12-03 13:53:18 -08006120
6121 vmx = to_vmx(vcpu);
6122 vmcs12 = get_vmcs12(vcpu);
6123
Sean Christopherson55d23752018-12-03 13:53:18 -08006124 if (nested_vmx_allowed(vcpu) &&
6125 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006126 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
6127 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
Sean Christopherson55d23752018-12-03 13:53:18 -08006128
6129 if (vmx_has_valid_vmcs12(vcpu)) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006130 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006131
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02006132 /* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
6133 if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
Liran Alon323d73a2019-06-26 16:09:27 +03006134 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
6135
Sean Christopherson55d23752018-12-03 13:53:18 -08006136 if (is_guest_mode(vcpu) &&
6137 nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006138 vmcs12->vmcs_link_pointer != INVALID_GPA)
Liran Alon6ca00df2019-06-16 15:03:10 +03006139 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
Sean Christopherson55d23752018-12-03 13:53:18 -08006140 }
6141
6142 if (vmx->nested.smm.vmxon)
Liran Alon6ca00df2019-06-16 15:03:10 +03006143 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
Sean Christopherson55d23752018-12-03 13:53:18 -08006144
6145 if (vmx->nested.smm.guest_mode)
Liran Alon6ca00df2019-06-16 15:03:10 +03006146 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
Sean Christopherson55d23752018-12-03 13:53:18 -08006147
6148 if (is_guest_mode(vcpu)) {
6149 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
6150
6151 if (vmx->nested.nested_run_pending)
6152 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
Oliver Upton5ef8acb2020-02-07 02:36:07 -08006153
6154 if (vmx->nested.mtf_pending)
6155 kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
Peter Shier850448f2020-05-26 14:51:06 -07006156
6157 if (nested_cpu_has_preemption_timer(vmcs12) &&
6158 vmx->nested.has_preemption_timer_deadline) {
6159 kvm_state.hdr.vmx.flags |=
6160 KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
6161 kvm_state.hdr.vmx.preemption_timer_deadline =
6162 vmx->nested.preemption_timer_deadline;
6163 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006164 }
6165 }
6166
6167 if (user_data_size < kvm_state.size)
6168 goto out;
6169
6170 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
6171 return -EFAULT;
6172
6173 if (!vmx_has_valid_vmcs12(vcpu))
6174 goto out;
6175
6176 /*
6177 * When running L2, the authoritative vmcs12 state is in the
6178 * vmcs02. When running L1, the authoritative vmcs12 state is
6179 * in the shadow or enlightened vmcs linked to vmcs01, unless
Sean Christopherson3731905ef2019-05-07 08:36:27 -07006180 * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
Sean Christopherson55d23752018-12-03 13:53:18 -08006181 * vmcs12 state is in the vmcs12 already.
6182 */
6183 if (is_guest_mode(vcpu)) {
Sean Christopherson3731905ef2019-05-07 08:36:27 -07006184 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
Sean Christopherson7952d762019-05-07 08:36:29 -07006185 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
Maxim Levitskyd51e1d32021-01-14 22:54:47 +02006186 } else {
6187 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
6188 if (!vmx->nested.need_vmcs12_to_shadow_sync) {
Vitaly Kuznetsov1e9dfbd2021-05-26 15:20:16 +02006189 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
Vitaly Kuznetsovd6bf71a2021-05-26 15:20:22 +02006190 /*
6191 * L1 hypervisor is not obliged to keep eVMCS
6192 * clean fields data always up-to-date while
6193 * not in guest mode, 'hv_clean_fields' is only
6194 * supposed to be actual upon vmentry so we need
6195 * to ignore it here and do full copy.
6196 */
6197 copy_enlightened_to_vmcs12(vmx, 0);
Maxim Levitskyd51e1d32021-01-14 22:54:47 +02006198 else if (enable_shadow_vmcs)
6199 copy_shadow_to_vmcs12(vmx);
6200 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006201 }
6202
Liran Alon6ca00df2019-06-16 15:03:10 +03006203 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
6204 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
6205
Tom Roeder3a33d032019-01-24 13:48:20 -08006206 /*
6207 * Copy over the full allocated size of vmcs12 rather than just the size
6208 * of the struct.
6209 */
Liran Alon6ca00df2019-06-16 15:03:10 +03006210 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
Sean Christopherson55d23752018-12-03 13:53:18 -08006211 return -EFAULT;
6212
6213 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006214 vmcs12->vmcs_link_pointer != INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006215 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
Tom Roeder3a33d032019-01-24 13:48:20 -08006216 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
Sean Christopherson55d23752018-12-03 13:53:18 -08006217 return -EFAULT;
6218 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006219out:
6220 return kvm_state.size;
6221}
6222
6223/*
6224 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
6225 */
6226void vmx_leave_nested(struct kvm_vcpu *vcpu)
6227{
6228 if (is_guest_mode(vcpu)) {
6229 to_vmx(vcpu)->nested.nested_run_pending = 0;
6230 nested_vmx_vmexit(vcpu, -1, 0, 0);
6231 }
6232 free_nested(vcpu);
6233}
6234
6235static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
6236 struct kvm_nested_state __user *user_kvm_nested_state,
6237 struct kvm_nested_state *kvm_state)
6238{
6239 struct vcpu_vmx *vmx = to_vmx(vcpu);
6240 struct vmcs12 *vmcs12;
Sean Christopherson68cda402020-05-11 15:05:29 -07006241 enum vm_entry_failure_code ignored;
Liran Alon6ca00df2019-06-16 15:03:10 +03006242 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6243 &user_kvm_nested_state->data.vmx[0];
Sean Christopherson55d23752018-12-03 13:53:18 -08006244 int ret;
6245
Liran Alon6ca00df2019-06-16 15:03:10 +03006246 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
Sean Christopherson55d23752018-12-03 13:53:18 -08006247 return -EINVAL;
6248
Yu Zhang64c78502021-09-30 01:51:53 +08006249 if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006250 if (kvm_state->hdr.vmx.smm.flags)
Sean Christopherson55d23752018-12-03 13:53:18 -08006251 return -EINVAL;
6252
Yu Zhang64c78502021-09-30 01:51:53 +08006253 if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08006254 return -EINVAL;
6255
Liran Alon323d73a2019-06-26 16:09:27 +03006256 /*
6257 * KVM_STATE_NESTED_EVMCS used to signal that KVM should
6258 * enable eVMCS capability on vCPU. However, since then
6259 * code was changed such that flag signals vmcs12 should
6260 * be copied into eVMCS in guest memory.
6261 *
6262 * To preserve backwards compatability, allow user
6263 * to set this flag even when there is no VMXON region.
6264 */
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006265 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
6266 return -EINVAL;
6267 } else {
6268 if (!nested_vmx_allowed(vcpu))
6269 return -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006270
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006271 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
6272 return -EINVAL;
Liran Alon323d73a2019-06-26 16:09:27 +03006273 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006274
Liran Alon6ca00df2019-06-16 15:03:10 +03006275 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
Sean Christopherson55d23752018-12-03 13:53:18 -08006276 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6277 return -EINVAL;
6278
Liran Alon6ca00df2019-06-16 15:03:10 +03006279 if (kvm_state->hdr.vmx.smm.flags &
Sean Christopherson55d23752018-12-03 13:53:18 -08006280 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
6281 return -EINVAL;
6282
Paolo Bonzini5e105c82020-07-27 08:55:09 -04006283 if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
6284 return -EINVAL;
6285
Sean Christopherson55d23752018-12-03 13:53:18 -08006286 /*
6287 * SMM temporarily disables VMX, so we cannot be in guest mode,
6288 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
6289 * must be zero.
6290 */
Liran Alon65b712f12019-06-25 14:26:42 +03006291 if (is_smm(vcpu) ?
6292 (kvm_state->flags &
6293 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
6294 : kvm_state->hdr.vmx.smm.flags)
Sean Christopherson55d23752018-12-03 13:53:18 -08006295 return -EINVAL;
6296
Liran Alon6ca00df2019-06-16 15:03:10 +03006297 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6298 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
Sean Christopherson55d23752018-12-03 13:53:18 -08006299 return -EINVAL;
6300
Liran Alon323d73a2019-06-26 16:09:27 +03006301 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
6302 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006303 return -EINVAL;
6304
Liran Alon323d73a2019-06-26 16:09:27 +03006305 vmx_leave_nested(vcpu);
Paolo Bonzini9fd58872019-06-19 16:52:27 +02006306
Yu Zhang64c78502021-09-30 01:51:53 +08006307 if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA)
Sean Christopherson55d23752018-12-03 13:53:18 -08006308 return 0;
6309
Liran Alon6ca00df2019-06-16 15:03:10 +03006310 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
Sean Christopherson55d23752018-12-03 13:53:18 -08006311 ret = enter_vmx_operation(vcpu);
6312 if (ret)
6313 return ret;
6314
Paolo Bonzini0f02bd02020-07-27 09:00:37 -04006315 /* Empty 'VMXON' state is permitted if no VMCS loaded */
6316 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
6317 /* See vmx_has_valid_vmcs12. */
6318 if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
6319 (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
Yu Zhang64c78502021-09-30 01:51:53 +08006320 (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA))
Paolo Bonzini0f02bd02020-07-27 09:00:37 -04006321 return -EINVAL;
6322 else
6323 return 0;
6324 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006325
Yu Zhang64c78502021-09-30 01:51:53 +08006326 if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) {
Liran Alon6ca00df2019-06-16 15:03:10 +03006327 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
6328 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
Sean Christopherson55d23752018-12-03 13:53:18 -08006329 return -EINVAL;
6330
Liran Alon6ca00df2019-06-16 15:03:10 +03006331 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
Sean Christopherson55d23752018-12-03 13:53:18 -08006332 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
6333 /*
Vitaly Kuznetsove942dbf2020-03-09 16:52:12 +01006334 * nested_vmx_handle_enlightened_vmptrld() cannot be called
6335 * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
6336 * restored yet. EVMCS will be mapped from
6337 * nested_get_vmcs12_pages().
Sean Christopherson55d23752018-12-03 13:53:18 -08006338 */
Vitaly Kuznetsov27849962021-05-26 15:20:20 +02006339 vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
Paolo Bonzini729c15c2020-09-22 06:53:57 -04006340 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
Sean Christopherson55d23752018-12-03 13:53:18 -08006341 } else {
6342 return -EINVAL;
6343 }
6344
Liran Alon6ca00df2019-06-16 15:03:10 +03006345 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006346 vmx->nested.smm.vmxon = true;
6347 vmx->nested.vmxon = false;
6348
Liran Alon6ca00df2019-06-16 15:03:10 +03006349 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
Sean Christopherson55d23752018-12-03 13:53:18 -08006350 vmx->nested.smm.guest_mode = true;
6351 }
6352
6353 vmcs12 = get_vmcs12(vcpu);
Liran Alon6ca00df2019-06-16 15:03:10 +03006354 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
Sean Christopherson55d23752018-12-03 13:53:18 -08006355 return -EFAULT;
6356
6357 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
6358 return -EINVAL;
6359
6360 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6361 return 0;
6362
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006363 vmx->nested.nested_run_pending =
6364 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
6365
Oliver Upton5ef8acb2020-02-07 02:36:07 -08006366 vmx->nested.mtf_pending =
6367 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
6368
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006369 ret = -EINVAL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006370 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
Yu Zhang64c78502021-09-30 01:51:53 +08006371 vmcs12->vmcs_link_pointer != INVALID_GPA) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006372 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
6373
Liran Alon6ca00df2019-06-16 15:03:10 +03006374 if (kvm_state->size <
6375 sizeof(*kvm_state) +
6376 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006377 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006378
6379 if (copy_from_user(shadow_vmcs12,
Liran Alon6ca00df2019-06-16 15:03:10 +03006380 user_vmx_nested_state->shadow_vmcs12,
6381 sizeof(*shadow_vmcs12))) {
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006382 ret = -EFAULT;
6383 goto error_guest_mode;
6384 }
Sean Christopherson55d23752018-12-03 13:53:18 -08006385
6386 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
6387 !shadow_vmcs12->hdr.shadow_vmcs)
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006388 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006389 }
6390
Paolo Bonzini83d31e52020-07-09 13:12:09 -04006391 vmx->nested.has_preemption_timer_deadline = false;
Peter Shier850448f2020-05-26 14:51:06 -07006392 if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
6393 vmx->nested.has_preemption_timer_deadline = true;
6394 vmx->nested.preemption_timer_deadline =
6395 kvm_state->hdr.vmx.preemption_timer_deadline;
6396 }
6397
Sean Christopherson5478ba32019-04-11 12:18:06 -07006398 if (nested_vmx_check_controls(vcpu, vmcs12) ||
6399 nested_vmx_check_host_state(vcpu, vmcs12) ||
Sean Christopherson68cda402020-05-11 15:05:29 -07006400 nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006401 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006402
6403 vmx->nested.dirty_vmcs12 = true;
Vitaly Kuznetsoved2a4802021-11-29 10:47:03 +01006404 vmx->nested.force_msr_bitmap_recalc = true;
Sean Christopherson55d23752018-12-03 13:53:18 -08006405 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006406 if (ret)
6407 goto error_guest_mode;
Sean Christopherson55d23752018-12-03 13:53:18 -08006408
6409 return 0;
Sean Christopherson21be4ca2019-05-08 11:04:32 -07006410
6411error_guest_mode:
6412 vmx->nested.nested_run_pending = 0;
6413 return ret;
Sean Christopherson55d23752018-12-03 13:53:18 -08006414}
6415
Xiaoyao Li1b842922019-10-20 17:11:01 +08006416void nested_vmx_set_vmcs_shadowing_bitmap(void)
Sean Christopherson55d23752018-12-03 13:53:18 -08006417{
6418 if (enable_shadow_vmcs) {
Sean Christopherson55d23752018-12-03 13:53:18 -08006419 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
Sean Christophersonfadcead2019-05-07 08:36:23 -07006420 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
Sean Christopherson55d23752018-12-03 13:53:18 -08006421 }
6422}
6423
6424/*
Sean Christophersonba1f8242021-06-18 14:46:58 -07006425 * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo
6426 * that madness to get the encoding for comparison.
6427 */
6428#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
6429
6430static u64 nested_vmx_calc_vmcs_enum_msr(void)
6431{
6432 /*
6433 * Note these are the so called "index" of the VMCS field encoding, not
6434 * the index into vmcs12.
6435 */
6436 unsigned int max_idx, idx;
6437 int i;
6438
6439 /*
6440 * For better or worse, KVM allows VMREAD/VMWRITE to all fields in
6441 * vmcs12, regardless of whether or not the associated feature is
6442 * exposed to L1. Simply find the field with the highest index.
6443 */
6444 max_idx = 0;
6445 for (i = 0; i < nr_vmcs12_fields; i++) {
6446 /* The vmcs12 table is very, very sparsely populated. */
6447 if (!vmcs_field_to_offset_table[i])
6448 continue;
6449
6450 idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
6451 if (idx > max_idx)
6452 max_idx = idx;
6453 }
6454
6455 return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
6456}
6457
6458/*
Sean Christopherson55d23752018-12-03 13:53:18 -08006459 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
6460 * returned for the various VMX controls MSRs when nested VMX is enabled.
6461 * The same values should also be used to verify that vmcs12 control fields are
6462 * valid during nested entry from L1 to L2.
6463 * Each of these control msrs has a low and high 32-bit half: A low bit is on
6464 * if the corresponding bit in the (32-bit) control field *must* be on, and a
6465 * bit in the high half is on if the corresponding bit in the control field
6466 * may be on. See also vmx_control_verify().
6467 */
Vitaly Kuznetsova4443262020-02-20 18:22:04 +01006468void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
Sean Christopherson55d23752018-12-03 13:53:18 -08006469{
6470 /*
6471 * Note that as a general rule, the high half of the MSRs (bits in
6472 * the control fields which may be 1) should be initialized by the
6473 * intersection of the underlying hardware's MSR (i.e., features which
6474 * can be supported) and the list of features we want to expose -
6475 * because they are known to be properly supported in our code.
6476 * Also, usually, the low half of the MSRs (bits which must be 1) can
6477 * be set to 0, meaning that L1 may turn off any of these bits. The
6478 * reason is that if one of these bits is necessary, it will appear
6479 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
6480 * fields of vmcs01 and vmcs02, will turn these bits off - and
Sean Christopherson2c1f3322020-04-15 10:55:14 -07006481 * nested_vmx_l1_wants_exit() will not pass related exits to L1.
Sean Christopherson55d23752018-12-03 13:53:18 -08006482 * These rules have exceptions below.
6483 */
6484
6485 /* pin-based controls */
6486 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
6487 msrs->pinbased_ctls_low,
6488 msrs->pinbased_ctls_high);
6489 msrs->pinbased_ctls_low |=
6490 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6491 msrs->pinbased_ctls_high &=
6492 PIN_BASED_EXT_INTR_MASK |
6493 PIN_BASED_NMI_EXITING |
6494 PIN_BASED_VIRTUAL_NMIS |
Vitaly Kuznetsova4443262020-02-20 18:22:04 +01006495 (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
Sean Christopherson55d23752018-12-03 13:53:18 -08006496 msrs->pinbased_ctls_high |=
6497 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6498 PIN_BASED_VMX_PREEMPTION_TIMER;
6499
6500 /* exit controls */
6501 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
6502 msrs->exit_ctls_low,
6503 msrs->exit_ctls_high);
6504 msrs->exit_ctls_low =
6505 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
6506
6507 msrs->exit_ctls_high &=
6508#ifdef CONFIG_X86_64
6509 VM_EXIT_HOST_ADDR_SPACE_SIZE |
6510#endif
Chenyi Qiangefc83132020-08-28 16:56:18 +08006511 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
6512 VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006513 msrs->exit_ctls_high |=
6514 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
6515 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
6516 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
6517
6518 /* We support free control of debug control saving. */
6519 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
6520
6521 /* entry controls */
6522 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
6523 msrs->entry_ctls_low,
6524 msrs->entry_ctls_high);
6525 msrs->entry_ctls_low =
6526 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
6527 msrs->entry_ctls_high &=
6528#ifdef CONFIG_X86_64
6529 VM_ENTRY_IA32E_MODE |
6530#endif
Chenyi Qiangefc83132020-08-28 16:56:18 +08006531 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
6532 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
Sean Christopherson55d23752018-12-03 13:53:18 -08006533 msrs->entry_ctls_high |=
6534 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
6535
6536 /* We support free control of debug control loading. */
6537 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
6538
6539 /* cpu-based controls */
6540 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
6541 msrs->procbased_ctls_low,
6542 msrs->procbased_ctls_high);
6543 msrs->procbased_ctls_low =
6544 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6545 msrs->procbased_ctls_high &=
Xiaoyao Li9dadc2f2019-12-06 16:45:24 +08006546 CPU_BASED_INTR_WINDOW_EXITING |
Xiaoyao Li5e3d3942019-12-06 16:45:26 +08006547 CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
Sean Christopherson55d23752018-12-03 13:53:18 -08006548 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
6549 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
6550 CPU_BASED_CR3_STORE_EXITING |
6551#ifdef CONFIG_X86_64
6552 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
6553#endif
6554 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
6555 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
6556 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
6557 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
6558 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
6559 /*
6560 * We can allow some features even when not supported by the
6561 * hardware. For example, L1 can specify an MSR bitmap - and we
6562 * can use it to avoid exits to L1 - even when L0 runs L2
6563 * without MSR bitmaps.
6564 */
6565 msrs->procbased_ctls_high |=
6566 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6567 CPU_BASED_USE_MSR_BITMAPS;
6568
6569 /* We support free control of CR3 access interception. */
6570 msrs->procbased_ctls_low &=
6571 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
6572
6573 /*
6574 * secondary cpu-based controls. Do not include those that
Xiaoyao Li7c1b7612020-07-09 12:34:25 +08006575 * depend on CPUID bits, they are added later by
6576 * vmx_vcpu_after_set_cpuid.
Sean Christopherson55d23752018-12-03 13:53:18 -08006577 */
Vitaly Kuznetsov6b1971c2019-02-07 11:42:14 +01006578 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
6579 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
6580 msrs->secondary_ctls_low,
6581 msrs->secondary_ctls_high);
6582
Sean Christopherson55d23752018-12-03 13:53:18 -08006583 msrs->secondary_ctls_low = 0;
6584 msrs->secondary_ctls_high &=
6585 SECONDARY_EXEC_DESC |
Sean Christopherson7f3603b2020-09-23 09:50:47 -07006586 SECONDARY_EXEC_ENABLE_RDTSCP |
Sean Christopherson55d23752018-12-03 13:53:18 -08006587 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini6defc592019-07-02 14:39:29 +02006588 SECONDARY_EXEC_WBINVD_EXITING |
Sean Christopherson55d23752018-12-03 13:53:18 -08006589 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6590 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini6defc592019-07-02 14:39:29 +02006591 SECONDARY_EXEC_RDRAND_EXITING |
6592 SECONDARY_EXEC_ENABLE_INVPCID |
6593 SECONDARY_EXEC_RDSEED_EXITING |
Ilias Stamatisd041b5e2021-05-26 19:44:17 +01006594 SECONDARY_EXEC_XSAVES |
6595 SECONDARY_EXEC_TSC_SCALING;
Sean Christopherson55d23752018-12-03 13:53:18 -08006596
6597 /*
6598 * We can emulate "VMCS shadowing," even if the hardware
6599 * doesn't support it.
6600 */
6601 msrs->secondary_ctls_high |=
6602 SECONDARY_EXEC_SHADOW_VMCS;
6603
6604 if (enable_ept) {
6605 /* nested EPT: emulate EPT also to L1 */
6606 msrs->secondary_ctls_high |=
6607 SECONDARY_EXEC_ENABLE_EPT;
Sean Christophersonbb1fcc72020-03-02 18:02:36 -08006608 msrs->ept_caps =
6609 VMX_EPT_PAGE_WALK_4_BIT |
6610 VMX_EPT_PAGE_WALK_5_BIT |
6611 VMX_EPTP_WB_BIT |
Sean Christopherson96d47012020-03-02 18:02:40 -08006612 VMX_EPT_INVEPT_BIT |
6613 VMX_EPT_EXECUTE_ONLY_BIT;
6614
Sean Christopherson55d23752018-12-03 13:53:18 -08006615 msrs->ept_caps &= ept_caps;
6616 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
6617 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
6618 VMX_EPT_1GB_PAGE_BIT;
6619 if (enable_ept_ad_bits) {
6620 msrs->secondary_ctls_high |=
6621 SECONDARY_EXEC_ENABLE_PML;
6622 msrs->ept_caps |= VMX_EPT_AD_BIT;
6623 }
6624 }
6625
6626 if (cpu_has_vmx_vmfunc()) {
6627 msrs->secondary_ctls_high |=
6628 SECONDARY_EXEC_ENABLE_VMFUNC;
6629 /*
6630 * Advertise EPTP switching unconditionally
6631 * since we emulate it
6632 */
6633 if (enable_ept)
6634 msrs->vmfunc_controls =
6635 VMX_VMFUNC_EPTP_SWITCHING;
6636 }
6637
6638 /*
6639 * Old versions of KVM use the single-context version without
6640 * checking for support, so declare that it is supported even
6641 * though it is treated as global context. The alternative is
6642 * not failing the single-context invvpid, and it is worse.
6643 */
6644 if (enable_vpid) {
6645 msrs->secondary_ctls_high |=
6646 SECONDARY_EXEC_ENABLE_VPID;
6647 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
6648 VMX_VPID_EXTENT_SUPPORTED_MASK;
6649 }
6650
6651 if (enable_unrestricted_guest)
6652 msrs->secondary_ctls_high |=
6653 SECONDARY_EXEC_UNRESTRICTED_GUEST;
6654
6655 if (flexpriority_enabled)
6656 msrs->secondary_ctls_high |=
6657 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6658
Sean Christopherson72add912021-04-12 16:21:42 +12006659 if (enable_sgx)
6660 msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
6661
Sean Christopherson55d23752018-12-03 13:53:18 -08006662 /* miscellaneous data */
6663 rdmsr(MSR_IA32_VMX_MISC,
6664 msrs->misc_low,
6665 msrs->misc_high);
6666 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
6667 msrs->misc_low |=
6668 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
6669 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Yadong Qibf0cd882020-11-06 14:51:22 +08006670 VMX_MISC_ACTIVITY_HLT |
6671 VMX_MISC_ACTIVITY_WAIT_SIPI;
Sean Christopherson55d23752018-12-03 13:53:18 -08006672 msrs->misc_high = 0;
6673
6674 /*
6675 * This MSR reports some information about VMX support. We
6676 * should return information about the VMX we emulate for the
6677 * guest, and the VMCS structure we give it - not about the
6678 * VMX support of the underlying hardware.
6679 */
6680 msrs->basic =
6681 VMCS12_REVISION |
6682 VMX_BASIC_TRUE_CTLS |
6683 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
6684 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
6685
6686 if (cpu_has_vmx_basic_inout())
6687 msrs->basic |= VMX_BASIC_INOUT;
6688
6689 /*
6690 * These MSRs specify bits which the guest must keep fixed on
6691 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
6692 * We picked the standard core2 setting.
6693 */
6694#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
6695#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
6696 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
6697 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
6698
6699 /* These MSRs specify bits which the guest must keep fixed off. */
6700 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
6701 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
6702
Sean Christophersonba1f8242021-06-18 14:46:58 -07006703 msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
Sean Christopherson55d23752018-12-03 13:53:18 -08006704}
6705
6706void nested_vmx_hardware_unsetup(void)
6707{
6708 int i;
6709
6710 if (enable_shadow_vmcs) {
6711 for (i = 0; i < VMX_BITMAP_NR; i++)
6712 free_page((unsigned long)vmx_bitmap[i]);
6713 }
6714}
6715
Sean Christopherson6c1c6e52020-05-06 13:46:53 -07006716__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
Sean Christopherson55d23752018-12-03 13:53:18 -08006717{
6718 int i;
6719
6720 if (!cpu_has_vmx_shadow_vmcs())
6721 enable_shadow_vmcs = 0;
6722 if (enable_shadow_vmcs) {
6723 for (i = 0; i < VMX_BITMAP_NR; i++) {
Ben Gardon41836832019-02-11 11:02:52 -08006724 /*
6725 * The vmx_bitmap is not tied to a VM and so should
6726 * not be charged to a memcg.
6727 */
Sean Christopherson55d23752018-12-03 13:53:18 -08006728 vmx_bitmap[i] = (unsigned long *)
6729 __get_free_page(GFP_KERNEL);
6730 if (!vmx_bitmap[i]) {
6731 nested_vmx_hardware_unsetup();
6732 return -ENOMEM;
6733 }
6734 }
6735
6736 init_vmcs_shadow_fields();
6737 }
6738
Liran Aloncc877672019-11-18 21:11:21 +02006739 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
6740 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
6741 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
6742 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
6743 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
6744 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
6745 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
6746 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
6747 exit_handlers[EXIT_REASON_VMON] = handle_vmon;
6748 exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
6749 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
6750 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
Sean Christopherson55d23752018-12-03 13:53:18 -08006751
Sean Christopherson55d23752018-12-03 13:53:18 -08006752 return 0;
6753}
Paolo Bonzini33b22172020-04-17 10:24:18 -04006754
6755struct kvm_x86_nested_ops vmx_nested_ops = {
6756 .check_events = vmx_check_nested_events,
Sean Christophersond2060bd2020-04-22 19:25:39 -07006757 .hv_timer_pending = nested_vmx_preemption_timer_pending,
Sean Christophersoncb6a32c2021-03-02 09:45:14 -08006758 .triple_fault = nested_vmx_triple_fault,
Paolo Bonzini33b22172020-04-17 10:24:18 -04006759 .get_state = vmx_get_nested_state,
6760 .set_state = vmx_set_nested_state,
Paolo Bonzini9a78e152021-01-08 11:43:08 -05006761 .get_nested_state_pages = vmx_get_nested_state_pages,
Sean Christopherson02f5fb22020-06-22 14:58:32 -07006762 .write_log_dirty = nested_vmx_write_pml_buffer,
Paolo Bonzini33b22172020-04-17 10:24:18 -04006763 .enable_evmcs = nested_enable_evmcs,
6764 .get_evmcs_version = nested_get_evmcs_version,
6765};