blob: b23ecc04ca51254264c2746d3aca156cd9d05b58 [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
Nicolas Kaiser9611c182010-10-06 14:23:22 +02008 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity6aa8b732006-12-10 02:21:36 -08009 *
10 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2. See
15 * the COPYING file in the top-level directory.
16 *
17 */
18
Eddie Dong85f455f2007-07-06 12:20:49 +030019#include "irq.h"
Zhang Xiantao1d737c82007-12-14 09:35:10 +080020#include "mmu.h"
Avi Kivity00b27a32011-11-23 16:30:32 +020021#include "cpuid.h"
Andrey Smetanind62caab2015-11-10 15:36:33 +030022#include "lapic.h"
Avi Kivitye4956062007-06-28 14:15:57 -040023
Avi Kivityedf88412007-12-16 11:02:48 +020024#include <linux/kvm_host.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080025#include <linux/module.h>
Ahmed S. Darwish9d8f5492007-02-19 14:37:46 +020026#include <linux/kernel.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080027#include <linux/mm.h>
28#include <linux/highmem.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040029#include <linux/sched.h>
Avi Kivityc7addb92007-09-16 18:58:32 +020030#include <linux/moduleparam.h>
Josh Triplette9bda3b2012-03-20 23:33:51 -070031#include <linux/mod_devicetable.h>
Steven Rostedt (Red Hat)af658dc2015-04-29 14:36:05 -040032#include <linux/trace_events.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090033#include <linux/slab.h>
Shane Wangcafd6652010-04-29 12:09:01 -040034#include <linux/tboot.h>
Jan Kiszkaf41245002014-03-07 20:03:13 +010035#include <linux/hrtimer.h>
Josh Poimboeufc207aee2017-06-28 10:11:06 -050036#include <linux/frame.h>
Dan Williams085331d2018-01-31 17:47:03 -080037#include <linux/nospec.h>
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030038#include "kvm_cache_regs.h"
Avi Kivity35920a32008-07-03 14:50:12 +030039#include "x86.h"
Avi Kivitye4956062007-06-28 14:15:57 -040040
Feng Wu28b835d2015-09-18 22:29:54 +080041#include <asm/cpu.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080042#include <asm/io.h>
Anthony Liguori3b3be0d2006-12-13 00:33:43 -080043#include <asm/desc.h>
Eduardo Habkost13673a92008-11-17 19:03:13 -020044#include <asm/vmx.h>
Eduardo Habkost6210e372008-11-17 19:03:16 -020045#include <asm/virtext.h>
Andi Kleena0861c02009-06-08 17:37:09 +080046#include <asm/mce.h>
Ingo Molnar952f07e2015-04-26 16:56:05 +020047#include <asm/fpu/internal.h>
Gleb Natapovd7cd9792011-10-05 14:01:23 +020048#include <asm/perf_event.h>
Paolo Bonzini81908bf2014-02-21 10:32:27 +010049#include <asm/debugreg.h>
Zhang Yanfei8f536b72012-12-06 23:43:34 +080050#include <asm/kexec.h>
Radim Krčmářdab20872015-02-09 22:44:07 +010051#include <asm/apic.h>
Feng Wuefc64402015-09-18 22:29:51 +080052#include <asm/irq_remapping.h>
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070053#include <asm/mmu_context.h>
Thomas Gleixner28a27752018-04-29 15:01:37 +020054#include <asm/spec-ctrl.h>
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010055#include <asm/mshyperv.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080056
Marcelo Tosatti229456f2009-06-17 09:22:14 -030057#include "trace.h"
Wei Huang25462f72015-06-19 15:45:05 +020058#include "pmu.h"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010059#include "vmx_evmcs.h"
Marcelo Tosatti229456f2009-06-17 09:22:14 -030060
Avi Kivity4ecac3f2008-05-13 13:23:38 +030061#define __ex(x) __kvm_handle_fault_on_reboot(x)
Avi Kivity5e520e62011-05-15 10:13:12 -040062#define __ex_clear(x, reg) \
63 ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
Avi Kivity4ecac3f2008-05-13 13:23:38 +030064
Avi Kivity6aa8b732006-12-10 02:21:36 -080065MODULE_AUTHOR("Qumranet");
66MODULE_LICENSE("GPL");
67
Josh Triplette9bda3b2012-03-20 23:33:51 -070068static const struct x86_cpu_id vmx_cpu_id[] = {
69 X86_FEATURE_MATCH(X86_FEATURE_VMX),
70 {}
71};
72MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
73
Rusty Russell476bc002012-01-13 09:32:18 +103074static bool __read_mostly enable_vpid = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020075module_param_named(vpid, enable_vpid, bool, 0444);
Sheng Yang2384d2b2008-01-17 15:14:33 +080076
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010077static bool __read_mostly enable_vnmi = 1;
78module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
79
Rusty Russell476bc002012-01-13 09:32:18 +103080static bool __read_mostly flexpriority_enabled = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020081module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
Avi Kivity4c9fc8e2008-03-24 18:15:14 +020082
Rusty Russell476bc002012-01-13 09:32:18 +103083static bool __read_mostly enable_ept = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020084module_param_named(ept, enable_ept, bool, S_IRUGO);
Sheng Yangd56f5462008-04-25 10:13:16 +080085
Rusty Russell476bc002012-01-13 09:32:18 +103086static bool __read_mostly enable_unrestricted_guest = 1;
Nitin A Kamble3a624e22009-06-08 11:34:16 -070087module_param_named(unrestricted_guest,
88 enable_unrestricted_guest, bool, S_IRUGO);
89
Xudong Hao83c3a332012-05-28 19:33:35 +080090static bool __read_mostly enable_ept_ad_bits = 1;
91module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
92
Avi Kivitya27685c2012-06-12 20:30:18 +030093static bool __read_mostly emulate_invalid_guest_state = true;
Avi Kivityc1f8bc02009-03-23 15:41:17 +020094module_param(emulate_invalid_guest_state, bool, S_IRUGO);
Mohammed Gamal04fa4d32008-08-17 16:39:48 +030095
Rusty Russell476bc002012-01-13 09:32:18 +103096static bool __read_mostly fasteoi = 1;
Kevin Tian58fbbf22011-08-30 13:56:17 +030097module_param(fasteoi, bool, S_IRUGO);
98
Yang Zhang5a717852013-04-11 19:25:16 +080099static bool __read_mostly enable_apicv = 1;
Yang Zhang01e439b2013-04-11 19:25:12 +0800100module_param(enable_apicv, bool, S_IRUGO);
Yang Zhang83d4c282013-01-25 10:18:49 +0800101
Abel Gordonabc4fc52013-04-18 14:35:25 +0300102static bool __read_mostly enable_shadow_vmcs = 1;
103module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
Nadav Har'El801d3422011-05-25 23:02:23 +0300104/*
105 * If nested=1, nested virtualization is supported, i.e., guests may use
106 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
107 * use VMX instructions.
108 */
Rusty Russell476bc002012-01-13 09:32:18 +1030109static bool __read_mostly nested = 0;
Nadav Har'El801d3422011-05-25 23:02:23 +0300110module_param(nested, bool, S_IRUGO);
111
Wanpeng Li20300092014-12-02 19:14:59 +0800112static u64 __read_mostly host_xss;
113
Kai Huang843e4332015-01-28 10:54:28 +0800114static bool __read_mostly enable_pml = 1;
115module_param_named(pml, enable_pml, bool, S_IRUGO);
116
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100117#define MSR_TYPE_R 1
118#define MSR_TYPE_W 2
119#define MSR_TYPE_RW 3
120
121#define MSR_BITMAP_MODE_X2APIC 1
122#define MSR_BITMAP_MODE_X2APIC_APICV 2
123#define MSR_BITMAP_MODE_LM 4
124
Haozhong Zhang64903d62015-10-20 15:39:09 +0800125#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
126
Yunhong Jiang64672c92016-06-13 14:19:59 -0700127/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
128static int __read_mostly cpu_preemption_timer_multi;
129static bool __read_mostly enable_preemption_timer = 1;
130#ifdef CONFIG_X86_64
131module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
132#endif
133
Gleb Natapov50378782013-02-04 16:00:28 +0200134#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
Sean Christopherson1706bd02018-03-05 12:04:38 -0800135#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
136#define KVM_VM_CR0_ALWAYS_ON \
137 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
138 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
Avi Kivity4c386092009-12-07 12:26:18 +0200139#define KVM_CR4_GUEST_OWNED_BITS \
140 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
Yu Zhangfd8cb432017-08-24 20:27:56 +0800141 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
Avi Kivity4c386092009-12-07 12:26:18 +0200142
Sean Christopherson5dc1f042018-03-05 12:04:39 -0800143#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
Avi Kivitycdc0e242009-12-06 17:21:14 +0200144#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
145#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
146
Avi Kivity78ac8b42010-04-08 18:19:35 +0300147#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
148
Jan Kiszkaf41245002014-03-07 20:03:13 +0100149#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
150
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800151/*
Jan Dakinevich16c2aec2016-10-28 07:00:30 +0300152 * Hyper-V requires all of these, so mark them as supported even though
153 * they are just treated the same as all-context.
154 */
155#define VMX_VPID_EXTENT_SUPPORTED_MASK \
156 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
157 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
158 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
159 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
160
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800161/*
162 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
163 * ple_gap: upper bound on the amount of time between two successive
164 * executions of PAUSE in a loop. Also indicate if ple enabled.
Rik van Riel00c25bc2011-01-04 09:51:33 -0500165 * According to test, this time is usually smaller than 128 cycles.
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800166 * ple_window: upper bound on the amount of time a guest is allowed to execute
167 * in a PAUSE loop. Tests indicate that most spinlocks are held for
168 * less than 2^12 cycles
169 * Time is measured based on a counter that runs at the same rate as the TSC,
170 * refer SDM volume 3b section 21.6.13 & 22.1.3.
171 */
Babu Mogerc8e88712018-03-16 16:37:24 -0400172static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200173
Babu Moger7fbc85a2018-03-16 16:37:22 -0400174static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
175module_param(ple_window, uint, 0444);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800176
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200177/* Default doubles per-vcpu window every exit. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400178static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400179module_param(ple_window_grow, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200180
181/* Default resets per-vcpu window every exit to ple_window. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400182static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400183module_param(ple_window_shrink, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200184
185/* Default is to compute the maximum so we can never overflow. */
Babu Moger7fbc85a2018-03-16 16:37:22 -0400186static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
187module_param(ple_window_max, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200188
Avi Kivity83287ea422012-09-16 15:10:57 +0300189extern const ulong vmx_return;
190
Tianyu Lan877ad952018-07-19 08:40:23 +0000191enum ept_pointers_status {
192 EPT_POINTERS_CHECK = 0,
193 EPT_POINTERS_MATCH = 1,
194 EPT_POINTERS_MISMATCH = 2
195};
196
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700197struct kvm_vmx {
198 struct kvm kvm;
199
200 unsigned int tss_addr;
201 bool ept_identity_pagetable_done;
202 gpa_t ept_identity_map_addr;
Tianyu Lan877ad952018-07-19 08:40:23 +0000203
204 enum ept_pointers_status ept_pointers_match;
205 spinlock_t ept_pointer_lock;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700206};
207
Gleb Natapov8bf00a52011-10-05 14:01:22 +0200208#define NR_AUTOLOAD_MSRS 8
Avi Kivity61d2ef22010-04-28 16:40:38 +0300209
Liran Alon392b2f22018-06-23 02:35:01 +0300210struct vmcs_hdr {
211 u32 revision_id:31;
212 u32 shadow_vmcs:1;
213};
214
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400215struct vmcs {
Liran Alon392b2f22018-06-23 02:35:01 +0300216 struct vmcs_hdr hdr;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400217 u32 abort;
218 char data[0];
219};
220
Nadav Har'Eld462b812011-05-24 15:26:10 +0300221/*
Sean Christophersond7ee0392018-07-23 12:32:47 -0700222 * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
223 * and whose values change infrequently, but are not constant. I.e. this is
224 * used as a write-through cache of the corresponding VMCS fields.
225 */
226struct vmcs_host_state {
227 unsigned long cr3; /* May not match real cr3 */
228 unsigned long cr4; /* May not match real cr4 */
Sean Christopherson5e079c72018-07-23 12:32:50 -0700229 unsigned long gs_base;
230 unsigned long fs_base;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700231
232 u16 fs_sel, gs_sel, ldt_sel;
233#ifdef CONFIG_X86_64
234 u16 ds_sel, es_sel;
235#endif
236};
237
238/*
Nadav Har'Eld462b812011-05-24 15:26:10 +0300239 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
240 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
241 * loaded on this CPU (so we can clear them if the CPU goes down).
242 */
243struct loaded_vmcs {
244 struct vmcs *vmcs;
Jim Mattson355f4fb2016-10-28 08:29:39 -0700245 struct vmcs *shadow_vmcs;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300246 int cpu;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +0200247 bool launched;
248 bool nmi_known_unmasked;
Paolo Bonzini8a1b4392017-11-06 13:31:12 +0100249 /* Support for vnmi-less CPUs */
250 int soft_vnmi_blocked;
251 ktime_t entry_time;
252 s64 vnmi_blocked_time;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100253 unsigned long *msr_bitmap;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300254 struct list_head loaded_vmcss_on_cpu_link;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700255 struct vmcs_host_state host_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300256};
257
Avi Kivity26bb0982009-09-07 11:14:12 +0300258struct shared_msr_entry {
259 unsigned index;
260 u64 data;
Avi Kivityd5696722009-12-02 12:28:47 +0200261 u64 mask;
Avi Kivity26bb0982009-09-07 11:14:12 +0300262};
263
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300264/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300265 * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
266 * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
267 * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
268 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
269 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
270 * More than one of these structures may exist, if L1 runs multiple L2 guests.
Jim Mattsonde3a0022017-11-27 17:22:25 -0600271 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300272 * underlying hardware which will be used to run L2.
273 * This structure is packed to ensure that its layout is identical across
274 * machines (necessary for live migration).
Jim Mattsonb348e792018-05-01 15:40:27 -0700275 *
276 * IMPORTANT: Changing the layout of existing fields in this structure
277 * will break save/restore compatibility with older kvm releases. When
278 * adding new fields, either use space in the reserved padding* arrays
279 * or add the new fields to the end of the structure.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300280 */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300281typedef u64 natural_width;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300282struct __packed vmcs12 {
283 /* According to the Intel spec, a VMCS region must start with the
284 * following two fields. Then follow implementation-specific data.
285 */
Liran Alon392b2f22018-06-23 02:35:01 +0300286 struct vmcs_hdr hdr;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300287 u32 abort;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300288
Nadav Har'El27d6c862011-05-25 23:06:59 +0300289 u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
290 u32 padding[7]; /* room for future expansion */
291
Nadav Har'El22bd0352011-05-25 23:05:57 +0300292 u64 io_bitmap_a;
293 u64 io_bitmap_b;
294 u64 msr_bitmap;
295 u64 vm_exit_msr_store_addr;
296 u64 vm_exit_msr_load_addr;
297 u64 vm_entry_msr_load_addr;
298 u64 tsc_offset;
299 u64 virtual_apic_page_addr;
300 u64 apic_access_addr;
Wincy Van705699a2015-02-03 23:58:17 +0800301 u64 posted_intr_desc_addr;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300302 u64 ept_pointer;
Wincy Van608406e2015-02-03 23:57:51 +0800303 u64 eoi_exit_bitmap0;
304 u64 eoi_exit_bitmap1;
305 u64 eoi_exit_bitmap2;
306 u64 eoi_exit_bitmap3;
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800307 u64 xss_exit_bitmap;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300308 u64 guest_physical_address;
309 u64 vmcs_link_pointer;
310 u64 guest_ia32_debugctl;
311 u64 guest_ia32_pat;
312 u64 guest_ia32_efer;
313 u64 guest_ia32_perf_global_ctrl;
314 u64 guest_pdptr0;
315 u64 guest_pdptr1;
316 u64 guest_pdptr2;
317 u64 guest_pdptr3;
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100318 u64 guest_bndcfgs;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300319 u64 host_ia32_pat;
320 u64 host_ia32_efer;
321 u64 host_ia32_perf_global_ctrl;
Jim Mattsonb348e792018-05-01 15:40:27 -0700322 u64 vmread_bitmap;
323 u64 vmwrite_bitmap;
324 u64 vm_function_control;
325 u64 eptp_list_address;
326 u64 pml_address;
327 u64 padding64[3]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300328 /*
329 * To allow migration of L1 (complete with its L2 guests) between
330 * machines of different natural widths (32 or 64 bit), we cannot have
331 * unsigned long fields with no explict size. We use u64 (aliased
332 * natural_width) instead. Luckily, x86 is little-endian.
333 */
334 natural_width cr0_guest_host_mask;
335 natural_width cr4_guest_host_mask;
336 natural_width cr0_read_shadow;
337 natural_width cr4_read_shadow;
338 natural_width cr3_target_value0;
339 natural_width cr3_target_value1;
340 natural_width cr3_target_value2;
341 natural_width cr3_target_value3;
342 natural_width exit_qualification;
343 natural_width guest_linear_address;
344 natural_width guest_cr0;
345 natural_width guest_cr3;
346 natural_width guest_cr4;
347 natural_width guest_es_base;
348 natural_width guest_cs_base;
349 natural_width guest_ss_base;
350 natural_width guest_ds_base;
351 natural_width guest_fs_base;
352 natural_width guest_gs_base;
353 natural_width guest_ldtr_base;
354 natural_width guest_tr_base;
355 natural_width guest_gdtr_base;
356 natural_width guest_idtr_base;
357 natural_width guest_dr7;
358 natural_width guest_rsp;
359 natural_width guest_rip;
360 natural_width guest_rflags;
361 natural_width guest_pending_dbg_exceptions;
362 natural_width guest_sysenter_esp;
363 natural_width guest_sysenter_eip;
364 natural_width host_cr0;
365 natural_width host_cr3;
366 natural_width host_cr4;
367 natural_width host_fs_base;
368 natural_width host_gs_base;
369 natural_width host_tr_base;
370 natural_width host_gdtr_base;
371 natural_width host_idtr_base;
372 natural_width host_ia32_sysenter_esp;
373 natural_width host_ia32_sysenter_eip;
374 natural_width host_rsp;
375 natural_width host_rip;
376 natural_width paddingl[8]; /* room for future expansion */
377 u32 pin_based_vm_exec_control;
378 u32 cpu_based_vm_exec_control;
379 u32 exception_bitmap;
380 u32 page_fault_error_code_mask;
381 u32 page_fault_error_code_match;
382 u32 cr3_target_count;
383 u32 vm_exit_controls;
384 u32 vm_exit_msr_store_count;
385 u32 vm_exit_msr_load_count;
386 u32 vm_entry_controls;
387 u32 vm_entry_msr_load_count;
388 u32 vm_entry_intr_info_field;
389 u32 vm_entry_exception_error_code;
390 u32 vm_entry_instruction_len;
391 u32 tpr_threshold;
392 u32 secondary_vm_exec_control;
393 u32 vm_instruction_error;
394 u32 vm_exit_reason;
395 u32 vm_exit_intr_info;
396 u32 vm_exit_intr_error_code;
397 u32 idt_vectoring_info_field;
398 u32 idt_vectoring_error_code;
399 u32 vm_exit_instruction_len;
400 u32 vmx_instruction_info;
401 u32 guest_es_limit;
402 u32 guest_cs_limit;
403 u32 guest_ss_limit;
404 u32 guest_ds_limit;
405 u32 guest_fs_limit;
406 u32 guest_gs_limit;
407 u32 guest_ldtr_limit;
408 u32 guest_tr_limit;
409 u32 guest_gdtr_limit;
410 u32 guest_idtr_limit;
411 u32 guest_es_ar_bytes;
412 u32 guest_cs_ar_bytes;
413 u32 guest_ss_ar_bytes;
414 u32 guest_ds_ar_bytes;
415 u32 guest_fs_ar_bytes;
416 u32 guest_gs_ar_bytes;
417 u32 guest_ldtr_ar_bytes;
418 u32 guest_tr_ar_bytes;
419 u32 guest_interruptibility_info;
420 u32 guest_activity_state;
421 u32 guest_sysenter_cs;
422 u32 host_ia32_sysenter_cs;
Jan Kiszka0238ea92013-03-13 11:31:24 +0100423 u32 vmx_preemption_timer_value;
424 u32 padding32[7]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300425 u16 virtual_processor_id;
Wincy Van705699a2015-02-03 23:58:17 +0800426 u16 posted_intr_nv;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300427 u16 guest_es_selector;
428 u16 guest_cs_selector;
429 u16 guest_ss_selector;
430 u16 guest_ds_selector;
431 u16 guest_fs_selector;
432 u16 guest_gs_selector;
433 u16 guest_ldtr_selector;
434 u16 guest_tr_selector;
Wincy Van608406e2015-02-03 23:57:51 +0800435 u16 guest_intr_status;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300436 u16 host_es_selector;
437 u16 host_cs_selector;
438 u16 host_ss_selector;
439 u16 host_ds_selector;
440 u16 host_fs_selector;
441 u16 host_gs_selector;
442 u16 host_tr_selector;
Jim Mattsonb348e792018-05-01 15:40:27 -0700443 u16 guest_pml_index;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300444};
445
446/*
Jim Mattson21ebf532018-05-01 15:40:28 -0700447 * For save/restore compatibility, the vmcs12 field offsets must not change.
448 */
449#define CHECK_OFFSET(field, loc) \
450 BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
451 "Offset of " #field " in struct vmcs12 has changed.")
452
453static inline void vmx_check_vmcs12_offsets(void) {
Liran Alon392b2f22018-06-23 02:35:01 +0300454 CHECK_OFFSET(hdr, 0);
Jim Mattson21ebf532018-05-01 15:40:28 -0700455 CHECK_OFFSET(abort, 4);
456 CHECK_OFFSET(launch_state, 8);
457 CHECK_OFFSET(io_bitmap_a, 40);
458 CHECK_OFFSET(io_bitmap_b, 48);
459 CHECK_OFFSET(msr_bitmap, 56);
460 CHECK_OFFSET(vm_exit_msr_store_addr, 64);
461 CHECK_OFFSET(vm_exit_msr_load_addr, 72);
462 CHECK_OFFSET(vm_entry_msr_load_addr, 80);
463 CHECK_OFFSET(tsc_offset, 88);
464 CHECK_OFFSET(virtual_apic_page_addr, 96);
465 CHECK_OFFSET(apic_access_addr, 104);
466 CHECK_OFFSET(posted_intr_desc_addr, 112);
467 CHECK_OFFSET(ept_pointer, 120);
468 CHECK_OFFSET(eoi_exit_bitmap0, 128);
469 CHECK_OFFSET(eoi_exit_bitmap1, 136);
470 CHECK_OFFSET(eoi_exit_bitmap2, 144);
471 CHECK_OFFSET(eoi_exit_bitmap3, 152);
472 CHECK_OFFSET(xss_exit_bitmap, 160);
473 CHECK_OFFSET(guest_physical_address, 168);
474 CHECK_OFFSET(vmcs_link_pointer, 176);
475 CHECK_OFFSET(guest_ia32_debugctl, 184);
476 CHECK_OFFSET(guest_ia32_pat, 192);
477 CHECK_OFFSET(guest_ia32_efer, 200);
478 CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
479 CHECK_OFFSET(guest_pdptr0, 216);
480 CHECK_OFFSET(guest_pdptr1, 224);
481 CHECK_OFFSET(guest_pdptr2, 232);
482 CHECK_OFFSET(guest_pdptr3, 240);
483 CHECK_OFFSET(guest_bndcfgs, 248);
484 CHECK_OFFSET(host_ia32_pat, 256);
485 CHECK_OFFSET(host_ia32_efer, 264);
486 CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
487 CHECK_OFFSET(vmread_bitmap, 280);
488 CHECK_OFFSET(vmwrite_bitmap, 288);
489 CHECK_OFFSET(vm_function_control, 296);
490 CHECK_OFFSET(eptp_list_address, 304);
491 CHECK_OFFSET(pml_address, 312);
492 CHECK_OFFSET(cr0_guest_host_mask, 344);
493 CHECK_OFFSET(cr4_guest_host_mask, 352);
494 CHECK_OFFSET(cr0_read_shadow, 360);
495 CHECK_OFFSET(cr4_read_shadow, 368);
496 CHECK_OFFSET(cr3_target_value0, 376);
497 CHECK_OFFSET(cr3_target_value1, 384);
498 CHECK_OFFSET(cr3_target_value2, 392);
499 CHECK_OFFSET(cr3_target_value3, 400);
500 CHECK_OFFSET(exit_qualification, 408);
501 CHECK_OFFSET(guest_linear_address, 416);
502 CHECK_OFFSET(guest_cr0, 424);
503 CHECK_OFFSET(guest_cr3, 432);
504 CHECK_OFFSET(guest_cr4, 440);
505 CHECK_OFFSET(guest_es_base, 448);
506 CHECK_OFFSET(guest_cs_base, 456);
507 CHECK_OFFSET(guest_ss_base, 464);
508 CHECK_OFFSET(guest_ds_base, 472);
509 CHECK_OFFSET(guest_fs_base, 480);
510 CHECK_OFFSET(guest_gs_base, 488);
511 CHECK_OFFSET(guest_ldtr_base, 496);
512 CHECK_OFFSET(guest_tr_base, 504);
513 CHECK_OFFSET(guest_gdtr_base, 512);
514 CHECK_OFFSET(guest_idtr_base, 520);
515 CHECK_OFFSET(guest_dr7, 528);
516 CHECK_OFFSET(guest_rsp, 536);
517 CHECK_OFFSET(guest_rip, 544);
518 CHECK_OFFSET(guest_rflags, 552);
519 CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
520 CHECK_OFFSET(guest_sysenter_esp, 568);
521 CHECK_OFFSET(guest_sysenter_eip, 576);
522 CHECK_OFFSET(host_cr0, 584);
523 CHECK_OFFSET(host_cr3, 592);
524 CHECK_OFFSET(host_cr4, 600);
525 CHECK_OFFSET(host_fs_base, 608);
526 CHECK_OFFSET(host_gs_base, 616);
527 CHECK_OFFSET(host_tr_base, 624);
528 CHECK_OFFSET(host_gdtr_base, 632);
529 CHECK_OFFSET(host_idtr_base, 640);
530 CHECK_OFFSET(host_ia32_sysenter_esp, 648);
531 CHECK_OFFSET(host_ia32_sysenter_eip, 656);
532 CHECK_OFFSET(host_rsp, 664);
533 CHECK_OFFSET(host_rip, 672);
534 CHECK_OFFSET(pin_based_vm_exec_control, 744);
535 CHECK_OFFSET(cpu_based_vm_exec_control, 748);
536 CHECK_OFFSET(exception_bitmap, 752);
537 CHECK_OFFSET(page_fault_error_code_mask, 756);
538 CHECK_OFFSET(page_fault_error_code_match, 760);
539 CHECK_OFFSET(cr3_target_count, 764);
540 CHECK_OFFSET(vm_exit_controls, 768);
541 CHECK_OFFSET(vm_exit_msr_store_count, 772);
542 CHECK_OFFSET(vm_exit_msr_load_count, 776);
543 CHECK_OFFSET(vm_entry_controls, 780);
544 CHECK_OFFSET(vm_entry_msr_load_count, 784);
545 CHECK_OFFSET(vm_entry_intr_info_field, 788);
546 CHECK_OFFSET(vm_entry_exception_error_code, 792);
547 CHECK_OFFSET(vm_entry_instruction_len, 796);
548 CHECK_OFFSET(tpr_threshold, 800);
549 CHECK_OFFSET(secondary_vm_exec_control, 804);
550 CHECK_OFFSET(vm_instruction_error, 808);
551 CHECK_OFFSET(vm_exit_reason, 812);
552 CHECK_OFFSET(vm_exit_intr_info, 816);
553 CHECK_OFFSET(vm_exit_intr_error_code, 820);
554 CHECK_OFFSET(idt_vectoring_info_field, 824);
555 CHECK_OFFSET(idt_vectoring_error_code, 828);
556 CHECK_OFFSET(vm_exit_instruction_len, 832);
557 CHECK_OFFSET(vmx_instruction_info, 836);
558 CHECK_OFFSET(guest_es_limit, 840);
559 CHECK_OFFSET(guest_cs_limit, 844);
560 CHECK_OFFSET(guest_ss_limit, 848);
561 CHECK_OFFSET(guest_ds_limit, 852);
562 CHECK_OFFSET(guest_fs_limit, 856);
563 CHECK_OFFSET(guest_gs_limit, 860);
564 CHECK_OFFSET(guest_ldtr_limit, 864);
565 CHECK_OFFSET(guest_tr_limit, 868);
566 CHECK_OFFSET(guest_gdtr_limit, 872);
567 CHECK_OFFSET(guest_idtr_limit, 876);
568 CHECK_OFFSET(guest_es_ar_bytes, 880);
569 CHECK_OFFSET(guest_cs_ar_bytes, 884);
570 CHECK_OFFSET(guest_ss_ar_bytes, 888);
571 CHECK_OFFSET(guest_ds_ar_bytes, 892);
572 CHECK_OFFSET(guest_fs_ar_bytes, 896);
573 CHECK_OFFSET(guest_gs_ar_bytes, 900);
574 CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
575 CHECK_OFFSET(guest_tr_ar_bytes, 908);
576 CHECK_OFFSET(guest_interruptibility_info, 912);
577 CHECK_OFFSET(guest_activity_state, 916);
578 CHECK_OFFSET(guest_sysenter_cs, 920);
579 CHECK_OFFSET(host_ia32_sysenter_cs, 924);
580 CHECK_OFFSET(vmx_preemption_timer_value, 928);
581 CHECK_OFFSET(virtual_processor_id, 960);
582 CHECK_OFFSET(posted_intr_nv, 962);
583 CHECK_OFFSET(guest_es_selector, 964);
584 CHECK_OFFSET(guest_cs_selector, 966);
585 CHECK_OFFSET(guest_ss_selector, 968);
586 CHECK_OFFSET(guest_ds_selector, 970);
587 CHECK_OFFSET(guest_fs_selector, 972);
588 CHECK_OFFSET(guest_gs_selector, 974);
589 CHECK_OFFSET(guest_ldtr_selector, 976);
590 CHECK_OFFSET(guest_tr_selector, 978);
591 CHECK_OFFSET(guest_intr_status, 980);
592 CHECK_OFFSET(host_es_selector, 982);
593 CHECK_OFFSET(host_cs_selector, 984);
594 CHECK_OFFSET(host_ss_selector, 986);
595 CHECK_OFFSET(host_ds_selector, 988);
596 CHECK_OFFSET(host_fs_selector, 990);
597 CHECK_OFFSET(host_gs_selector, 992);
598 CHECK_OFFSET(host_tr_selector, 994);
599 CHECK_OFFSET(guest_pml_index, 996);
600}
601
602/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300603 * VMCS12_REVISION is an arbitrary id that should be changed if the content or
604 * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
605 * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
Jim Mattsonb348e792018-05-01 15:40:27 -0700606 *
607 * IMPORTANT: Changing this value will break save/restore compatibility with
608 * older kvm releases.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300609 */
610#define VMCS12_REVISION 0x11e57ed0
611
612/*
613 * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
614 * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
615 * current implementation, 4K are reserved to avoid future complications.
616 */
617#define VMCS12_SIZE 0x1000
618
619/*
Jim Mattson5b157062017-12-22 12:11:12 -0800620 * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
621 * supported VMCS12 field encoding.
622 */
623#define VMCS12_MAX_FIELD_INDEX 0x17
624
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100625struct nested_vmx_msrs {
626 /*
627 * We only store the "true" versions of the VMX capability MSRs. We
628 * generate the "non-true" versions by setting the must-be-1 bits
629 * according to the SDM.
630 */
631 u32 procbased_ctls_low;
632 u32 procbased_ctls_high;
633 u32 secondary_ctls_low;
634 u32 secondary_ctls_high;
635 u32 pinbased_ctls_low;
636 u32 pinbased_ctls_high;
637 u32 exit_ctls_low;
638 u32 exit_ctls_high;
639 u32 entry_ctls_low;
640 u32 entry_ctls_high;
641 u32 misc_low;
642 u32 misc_high;
643 u32 ept_caps;
644 u32 vpid_caps;
645 u64 basic;
646 u64 cr0_fixed0;
647 u64 cr0_fixed1;
648 u64 cr4_fixed0;
649 u64 cr4_fixed1;
650 u64 vmcs_enum;
651 u64 vmfunc_controls;
652};
653
Jim Mattson5b157062017-12-22 12:11:12 -0800654/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300655 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
656 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
657 */
658struct nested_vmx {
659 /* Has the level1 guest done vmxon? */
660 bool vmxon;
Bandan Das3573e222014-05-06 02:19:16 -0400661 gpa_t vmxon_ptr;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400662 bool pml_full;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300663
664 /* The guest-physical address of the current VMCS L1 keeps for L2 */
665 gpa_t current_vmptr;
David Matlack4f2777b2016-07-13 17:16:37 -0700666 /*
667 * Cache of the guest's VMCS, existing outside of guest memory.
668 * Loaded from guest memory during VMPTRLD. Flushed to guest
David Matlack8ca44e82017-08-01 14:00:39 -0700669 * memory during VMCLEAR and VMPTRLD.
David Matlack4f2777b2016-07-13 17:16:37 -0700670 */
671 struct vmcs12 *cached_vmcs12;
Abel Gordon012f83c2013-04-18 14:39:25 +0300672 /*
Liran Alon61ada742018-06-23 02:35:08 +0300673 * Cache of the guest's shadow VMCS, existing outside of guest
674 * memory. Loaded from guest memory during VM entry. Flushed
675 * to guest memory during VM exit.
676 */
677 struct vmcs12 *cached_shadow_vmcs12;
678 /*
Abel Gordon012f83c2013-04-18 14:39:25 +0300679 * Indicates if the shadow vmcs must be updated with the
680 * data hold by vmcs12
681 */
682 bool sync_shadow_vmcs;
Paolo Bonzini74a497f2017-12-20 13:55:39 +0100683 bool dirty_vmcs12;
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +0300684
Jim Mattson8d860bb2018-05-09 16:56:05 -0400685 bool change_vmcs01_virtual_apic_mode;
686
Nadav Har'El644d7112011-05-25 23:12:35 +0300687 /* L2 must run next, and mustn't decide to exit to L1. */
688 bool nested_run_pending;
Jim Mattsonde3a0022017-11-27 17:22:25 -0600689
690 struct loaded_vmcs vmcs02;
691
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300692 /*
Jim Mattsonde3a0022017-11-27 17:22:25 -0600693 * Guest pages referred to in the vmcs02 with host-physical
694 * pointers, so we must keep them pinned while L2 runs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300695 */
696 struct page *apic_access_page;
Wanpeng Lia7c0b072014-08-21 19:46:50 +0800697 struct page *virtual_apic_page;
Wincy Van705699a2015-02-03 23:58:17 +0800698 struct page *pi_desc_page;
699 struct pi_desc *pi_desc;
700 bool pi_pending;
701 u16 posted_intr_nv;
Jan Kiszkaf41245002014-03-07 20:03:13 +0100702
703 struct hrtimer preemption_timer;
704 bool preemption_timer_expired;
Jan Kiszka2996fca2014-06-16 13:59:43 +0200705
706 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
707 u64 vmcs01_debugctl;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800708
Wanpeng Li5c614b32015-10-13 09:18:36 -0700709 u16 vpid02;
710 u16 last_vpid;
711
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100712 struct nested_vmx_msrs msrs;
Ladi Prosek72e9cbd2017-10-11 16:54:43 +0200713
714 /* SMM related state */
715 struct {
716 /* in VMX operation on SMM entry? */
717 bool vmxon;
718 /* in guest mode on SMM entry? */
719 bool guest_mode;
720 } smm;
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300721};
722
Yang Zhang01e439b2013-04-11 19:25:12 +0800723#define POSTED_INTR_ON 0
Feng Wuebbfc762015-09-18 22:29:46 +0800724#define POSTED_INTR_SN 1
725
Yang Zhang01e439b2013-04-11 19:25:12 +0800726/* Posted-Interrupt Descriptor */
727struct pi_desc {
728 u32 pir[8]; /* Posted interrupt requested */
Feng Wu6ef15222015-09-18 22:29:45 +0800729 union {
730 struct {
731 /* bit 256 - Outstanding Notification */
732 u16 on : 1,
733 /* bit 257 - Suppress Notification */
734 sn : 1,
735 /* bit 271:258 - Reserved */
736 rsvd_1 : 14;
737 /* bit 279:272 - Notification Vector */
738 u8 nv;
739 /* bit 287:280 - Reserved */
740 u8 rsvd_2;
741 /* bit 319:288 - Notification Destination */
742 u32 ndst;
743 };
744 u64 control;
745 };
746 u32 rsvd[6];
Yang Zhang01e439b2013-04-11 19:25:12 +0800747} __aligned(64);
748
Yang Zhanga20ed542013-04-11 19:25:15 +0800749static bool pi_test_and_set_on(struct pi_desc *pi_desc)
750{
751 return test_and_set_bit(POSTED_INTR_ON,
752 (unsigned long *)&pi_desc->control);
753}
754
755static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
756{
757 return test_and_clear_bit(POSTED_INTR_ON,
758 (unsigned long *)&pi_desc->control);
759}
760
761static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
762{
763 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
764}
765
Feng Wuebbfc762015-09-18 22:29:46 +0800766static inline void pi_clear_sn(struct pi_desc *pi_desc)
767{
768 return clear_bit(POSTED_INTR_SN,
769 (unsigned long *)&pi_desc->control);
770}
771
772static inline void pi_set_sn(struct pi_desc *pi_desc)
773{
774 return set_bit(POSTED_INTR_SN,
775 (unsigned long *)&pi_desc->control);
776}
777
Paolo Bonziniad361092016-09-20 16:15:05 +0200778static inline void pi_clear_on(struct pi_desc *pi_desc)
779{
780 clear_bit(POSTED_INTR_ON,
781 (unsigned long *)&pi_desc->control);
782}
783
Feng Wuebbfc762015-09-18 22:29:46 +0800784static inline int pi_test_on(struct pi_desc *pi_desc)
785{
786 return test_bit(POSTED_INTR_ON,
787 (unsigned long *)&pi_desc->control);
788}
789
790static inline int pi_test_sn(struct pi_desc *pi_desc)
791{
792 return test_bit(POSTED_INTR_SN,
793 (unsigned long *)&pi_desc->control);
794}
795
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400796struct vcpu_vmx {
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000797 struct kvm_vcpu vcpu;
Avi Kivity313dbd492008-07-17 18:04:30 +0300798 unsigned long host_rsp;
Avi Kivity29bd8a72007-09-10 17:27:03 +0300799 u8 fail;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100800 u8 msr_bitmap_mode;
Avi Kivity51aa01d2010-07-20 14:31:20 +0300801 u32 exit_intr_info;
Avi Kivity1155f762007-11-22 11:30:47 +0200802 u32 idt_vectoring_info;
Avi Kivity6de12732011-03-07 12:51:22 +0200803 ulong rflags;
Avi Kivity26bb0982009-09-07 11:14:12 +0300804 struct shared_msr_entry *guest_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400805 int nmsrs;
806 int save_nmsrs;
Yang Zhanga547c6d2013-04-11 19:25:10 +0800807 unsigned long host_idt_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400808#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +0300809 u64 msr_host_kernel_gs_base;
810 u64 msr_guest_kernel_gs_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400811#endif
Ashok Raj15d45072018-02-01 22:59:43 +0100812
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100813 u64 arch_capabilities;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +0100814 u64 spec_ctrl;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100815
Gleb Natapov2961e8762013-11-25 15:37:13 +0200816 u32 vm_entry_controls_shadow;
817 u32 vm_exit_controls_shadow;
Paolo Bonzini80154d72017-08-24 13:55:35 +0200818 u32 secondary_exec_control;
819
Nadav Har'Eld462b812011-05-24 15:26:10 +0300820 /*
821 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
822 * non-nested (L1) guest, it always points to vmcs01. For a nested
Sean Christophersonbd9966d2018-07-23 12:32:42 -0700823 * guest (L2), it points to a different VMCS. loaded_cpu_state points
824 * to the VMCS whose state is loaded into the CPU registers that only
825 * need to be switched when transitioning to/from the kernel; a NULL
826 * value indicates that host state is loaded.
Nadav Har'Eld462b812011-05-24 15:26:10 +0300827 */
828 struct loaded_vmcs vmcs01;
829 struct loaded_vmcs *loaded_vmcs;
Sean Christophersonbd9966d2018-07-23 12:32:42 -0700830 struct loaded_vmcs *loaded_cpu_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300831 bool __launched; /* temporary, used in vmx_vcpu_run */
Avi Kivity61d2ef22010-04-28 16:40:38 +0300832 struct msr_autoload {
833 unsigned nr;
834 struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
835 struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
836 } msr_autoload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -0700837
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400838 struct {
Avi Kivity7ffd92c2009-06-09 14:10:45 +0300839 int vm86_active;
Avi Kivity78ac8b42010-04-08 18:19:35 +0300840 ulong save_rflags;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +0300841 struct kvm_segment segs[8];
842 } rmode;
843 struct {
844 u32 bitmask; /* 4 bits per segment (1 bit per field) */
Avi Kivity7ffd92c2009-06-09 14:10:45 +0300845 struct kvm_save_segment {
846 u16 selector;
847 unsigned long base;
848 u32 limit;
849 u32 ar;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +0300850 } seg[8];
Avi Kivity2fb92db2011-04-27 19:42:18 +0300851 } segment_cache;
Sheng Yang2384d2b2008-01-17 15:14:33 +0800852 int vpid;
Mohammed Gamal04fa4d32008-08-17 16:39:48 +0300853 bool emulation_required;
Jan Kiszka3b86cd92008-09-26 09:30:57 +0200854
Andi Kleena0861c02009-06-08 17:37:09 +0800855 u32 exit_reason;
Sheng Yang4e47c7a2009-12-18 16:48:47 +0800856
Yang Zhang01e439b2013-04-11 19:25:12 +0800857 /* Posted interrupt descriptor */
858 struct pi_desc pi_desc;
859
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300860 /* Support for a guest hypervisor (nested VMX) */
861 struct nested_vmx nested;
Radim Krčmářa7653ec2014-08-21 18:08:07 +0200862
863 /* Dynamic PLE window. */
864 int ple_window;
865 bool ple_window_dirty;
Kai Huang843e4332015-01-28 10:54:28 +0800866
867 /* Support for PML */
868#define PML_ENTITY_NUM 512
869 struct page *pml_pg;
Owen Hofmann2680d6d2016-03-01 13:36:13 -0800870
Yunhong Jiang64672c92016-06-13 14:19:59 -0700871 /* apic deadline value in host tsc */
872 u64 hv_deadline_tsc;
873
Owen Hofmann2680d6d2016-03-01 13:36:13 -0800874 u64 current_tsc_ratio;
Xiao Guangrong1be0e612016-03-22 16:51:18 +0800875
Xiao Guangrong1be0e612016-03-22 16:51:18 +0800876 u32 host_pkru;
Haozhong Zhang3b840802016-06-22 14:59:54 +0800877
Wanpeng Li74c55932017-11-29 01:31:20 -0800878 unsigned long host_debugctlmsr;
879
Haozhong Zhang37e4c992016-06-22 14:59:55 +0800880 /*
881 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
882 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
883 * in msr_ia32_feature_control_valid_bits.
884 */
Haozhong Zhang3b840802016-06-22 14:59:54 +0800885 u64 msr_ia32_feature_control;
Haozhong Zhang37e4c992016-06-22 14:59:55 +0800886 u64 msr_ia32_feature_control_valid_bits;
Tianyu Lan877ad952018-07-19 08:40:23 +0000887 u64 ept_pointer;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400888};
889
Avi Kivity2fb92db2011-04-27 19:42:18 +0300890enum segment_cache_field {
891 SEG_FIELD_SEL = 0,
892 SEG_FIELD_BASE = 1,
893 SEG_FIELD_LIMIT = 2,
894 SEG_FIELD_AR = 3,
895
896 SEG_FIELD_NR = 4
897};
898
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700899static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
900{
901 return container_of(kvm, struct kvm_vmx, kvm);
902}
903
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400904static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
905{
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000906 return container_of(vcpu, struct vcpu_vmx, vcpu);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400907}
908
Feng Wuefc64402015-09-18 22:29:51 +0800909static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
910{
911 return &(to_vmx(vcpu)->pi_desc);
912}
913
Jim Mattson58e9ffa2017-12-22 12:13:13 -0800914#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
Nadav Har'El22bd0352011-05-25 23:05:57 +0300915#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
Jim Mattson58e9ffa2017-12-22 12:13:13 -0800916#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
917#define FIELD64(number, name) \
918 FIELD(number, name), \
919 [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
Nadav Har'El22bd0352011-05-25 23:05:57 +0300920
Abel Gordon4607c2d2013-04-18 14:35:55 +0300921
Paolo Bonzini44900ba2017-12-13 12:58:02 +0100922static u16 shadow_read_only_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +0100923#define SHADOW_FIELD_RO(x) x,
924#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +0300925};
Bandan Dasfe2b2012014-04-21 15:20:14 -0400926static int max_shadow_read_only_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +0300927 ARRAY_SIZE(shadow_read_only_fields);
928
Paolo Bonzini44900ba2017-12-13 12:58:02 +0100929static u16 shadow_read_write_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +0100930#define SHADOW_FIELD_RW(x) x,
931#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +0300932};
Bandan Dasfe2b2012014-04-21 15:20:14 -0400933static int max_shadow_read_write_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +0300934 ARRAY_SIZE(shadow_read_write_fields);
935
Mathias Krause772e0312012-08-30 01:30:19 +0200936static const unsigned short vmcs_field_to_offset_table[] = {
Nadav Har'El22bd0352011-05-25 23:05:57 +0300937 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
Wincy Van705699a2015-02-03 23:58:17 +0800938 FIELD(POSTED_INTR_NV, posted_intr_nv),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300939 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
940 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
941 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
942 FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
943 FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
944 FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
945 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
946 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
Wincy Van608406e2015-02-03 23:57:51 +0800947 FIELD(GUEST_INTR_STATUS, guest_intr_status),
Bandan Dasc5f983f2017-05-05 15:25:14 -0400948 FIELD(GUEST_PML_INDEX, guest_pml_index),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300949 FIELD(HOST_ES_SELECTOR, host_es_selector),
950 FIELD(HOST_CS_SELECTOR, host_cs_selector),
951 FIELD(HOST_SS_SELECTOR, host_ss_selector),
952 FIELD(HOST_DS_SELECTOR, host_ds_selector),
953 FIELD(HOST_FS_SELECTOR, host_fs_selector),
954 FIELD(HOST_GS_SELECTOR, host_gs_selector),
955 FIELD(HOST_TR_SELECTOR, host_tr_selector),
956 FIELD64(IO_BITMAP_A, io_bitmap_a),
957 FIELD64(IO_BITMAP_B, io_bitmap_b),
958 FIELD64(MSR_BITMAP, msr_bitmap),
959 FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
960 FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
961 FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
Jim Mattsonb348e792018-05-01 15:40:27 -0700962 FIELD64(PML_ADDRESS, pml_address),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300963 FIELD64(TSC_OFFSET, tsc_offset),
964 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
965 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
Wincy Van705699a2015-02-03 23:58:17 +0800966 FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
Bandan Das27c42a12017-08-03 15:54:42 -0400967 FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300968 FIELD64(EPT_POINTER, ept_pointer),
Wincy Van608406e2015-02-03 23:57:51 +0800969 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
970 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
971 FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
972 FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
Bandan Das41ab9372017-08-03 15:54:43 -0400973 FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
Jim Mattsonb348e792018-05-01 15:40:27 -0700974 FIELD64(VMREAD_BITMAP, vmread_bitmap),
975 FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800976 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300977 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
978 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
979 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
980 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
981 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
982 FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
983 FIELD64(GUEST_PDPTR0, guest_pdptr0),
984 FIELD64(GUEST_PDPTR1, guest_pdptr1),
985 FIELD64(GUEST_PDPTR2, guest_pdptr2),
986 FIELD64(GUEST_PDPTR3, guest_pdptr3),
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100987 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300988 FIELD64(HOST_IA32_PAT, host_ia32_pat),
989 FIELD64(HOST_IA32_EFER, host_ia32_efer),
990 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
991 FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
992 FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
993 FIELD(EXCEPTION_BITMAP, exception_bitmap),
994 FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
995 FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
996 FIELD(CR3_TARGET_COUNT, cr3_target_count),
997 FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
998 FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
999 FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
1000 FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
1001 FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
1002 FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
1003 FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
1004 FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
1005 FIELD(TPR_THRESHOLD, tpr_threshold),
1006 FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
1007 FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
1008 FIELD(VM_EXIT_REASON, vm_exit_reason),
1009 FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
1010 FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
1011 FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
1012 FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
1013 FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
1014 FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
1015 FIELD(GUEST_ES_LIMIT, guest_es_limit),
1016 FIELD(GUEST_CS_LIMIT, guest_cs_limit),
1017 FIELD(GUEST_SS_LIMIT, guest_ss_limit),
1018 FIELD(GUEST_DS_LIMIT, guest_ds_limit),
1019 FIELD(GUEST_FS_LIMIT, guest_fs_limit),
1020 FIELD(GUEST_GS_LIMIT, guest_gs_limit),
1021 FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
1022 FIELD(GUEST_TR_LIMIT, guest_tr_limit),
1023 FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
1024 FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
1025 FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
1026 FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
1027 FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
1028 FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
1029 FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
1030 FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
1031 FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
1032 FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
1033 FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
1034 FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
1035 FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
1036 FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
Jan Kiszka0238ea92013-03-13 11:31:24 +01001037 FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001038 FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
1039 FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
1040 FIELD(CR0_READ_SHADOW, cr0_read_shadow),
1041 FIELD(CR4_READ_SHADOW, cr4_read_shadow),
1042 FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
1043 FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
1044 FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
1045 FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
1046 FIELD(EXIT_QUALIFICATION, exit_qualification),
1047 FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
1048 FIELD(GUEST_CR0, guest_cr0),
1049 FIELD(GUEST_CR3, guest_cr3),
1050 FIELD(GUEST_CR4, guest_cr4),
1051 FIELD(GUEST_ES_BASE, guest_es_base),
1052 FIELD(GUEST_CS_BASE, guest_cs_base),
1053 FIELD(GUEST_SS_BASE, guest_ss_base),
1054 FIELD(GUEST_DS_BASE, guest_ds_base),
1055 FIELD(GUEST_FS_BASE, guest_fs_base),
1056 FIELD(GUEST_GS_BASE, guest_gs_base),
1057 FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
1058 FIELD(GUEST_TR_BASE, guest_tr_base),
1059 FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
1060 FIELD(GUEST_IDTR_BASE, guest_idtr_base),
1061 FIELD(GUEST_DR7, guest_dr7),
1062 FIELD(GUEST_RSP, guest_rsp),
1063 FIELD(GUEST_RIP, guest_rip),
1064 FIELD(GUEST_RFLAGS, guest_rflags),
1065 FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
1066 FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
1067 FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
1068 FIELD(HOST_CR0, host_cr0),
1069 FIELD(HOST_CR3, host_cr3),
1070 FIELD(HOST_CR4, host_cr4),
1071 FIELD(HOST_FS_BASE, host_fs_base),
1072 FIELD(HOST_GS_BASE, host_gs_base),
1073 FIELD(HOST_TR_BASE, host_tr_base),
1074 FIELD(HOST_GDTR_BASE, host_gdtr_base),
1075 FIELD(HOST_IDTR_BASE, host_idtr_base),
1076 FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
1077 FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
1078 FIELD(HOST_RSP, host_rsp),
1079 FIELD(HOST_RIP, host_rip),
1080};
Nadav Har'El22bd0352011-05-25 23:05:57 +03001081
1082static inline short vmcs_field_to_offset(unsigned long field)
1083{
Dan Williams085331d2018-01-31 17:47:03 -08001084 const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
1085 unsigned short offset;
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001086 unsigned index;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001087
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001088 if (field >> 15)
Andrew Honig75f139a2018-01-10 10:12:03 -08001089 return -ENOENT;
1090
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001091 index = ROL16(field, 6);
Linus Torvalds15303ba2018-02-10 13:16:35 -08001092 if (index >= size)
Andrew Honig75f139a2018-01-10 10:12:03 -08001093 return -ENOENT;
1094
Linus Torvalds15303ba2018-02-10 13:16:35 -08001095 index = array_index_nospec(index, size);
1096 offset = vmcs_field_to_offset_table[index];
Dan Williams085331d2018-01-31 17:47:03 -08001097 if (offset == 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001098 return -ENOENT;
Dan Williams085331d2018-01-31 17:47:03 -08001099 return offset;
Nadav Har'El22bd0352011-05-25 23:05:57 +03001100}
1101
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001102static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
1103{
David Matlack4f2777b2016-07-13 17:16:37 -07001104 return to_vmx(vcpu)->nested.cached_vmcs12;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001105}
1106
Liran Alon61ada742018-06-23 02:35:08 +03001107static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
1108{
1109 return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
1110}
1111
Peter Feiner995f00a2017-06-30 17:26:32 -07001112static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03001113static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
Peter Feiner995f00a2017-06-30 17:26:32 -07001114static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
Wanpeng Lif53cd632014-12-02 19:14:58 +08001115static bool vmx_xsaves_supported(void);
Orit Wassermanb246dd52012-05-31 14:49:22 +03001116static void vmx_set_segment(struct kvm_vcpu *vcpu,
1117 struct kvm_segment *var, int seg);
1118static void vmx_get_segment(struct kvm_vcpu *vcpu,
1119 struct kvm_segment *var, int seg);
Gleb Natapovd99e4152012-12-20 16:57:45 +02001120static bool guest_state_valid(struct kvm_vcpu *vcpu);
1121static u32 vmx_segment_access_rights(struct kvm_segment *var);
Abel Gordon16f5b902013-04-18 14:38:25 +03001122static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
Paolo Bonzinib96fb432017-07-27 12:29:32 +02001123static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
1124static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
1125static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
1126 u16 error_code);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01001127static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
Ashok Raj15d45072018-02-01 22:59:43 +01001128static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1129 u32 msr, int type);
Avi Kivity75880a02007-06-20 11:20:04 +03001130
Avi Kivity6aa8b732006-12-10 02:21:36 -08001131static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1132static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001133/*
1134 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1135 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1136 */
1137static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001138
Feng Wubf9f6ac2015-09-18 22:29:55 +08001139/*
1140 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1141 * can find which vCPU should be waken up.
1142 */
1143static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1144static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1145
Radim Krčmář23611332016-09-29 22:41:33 +02001146enum {
Radim Krčmář23611332016-09-29 22:41:33 +02001147 VMX_VMREAD_BITMAP,
1148 VMX_VMWRITE_BITMAP,
1149 VMX_BITMAP_NR
1150};
1151
1152static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
1153
Radim Krčmář23611332016-09-29 22:41:33 +02001154#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
1155#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
He, Qingfdef3ad2007-04-30 09:45:24 +03001156
Avi Kivity110312c2010-12-21 12:54:20 +02001157static bool cpu_has_load_ia32_efer;
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001158static bool cpu_has_load_perf_global_ctrl;
Avi Kivity110312c2010-12-21 12:54:20 +02001159
Sheng Yang2384d2b2008-01-17 15:14:33 +08001160static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1161static DEFINE_SPINLOCK(vmx_vpid_lock);
1162
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001163static struct vmcs_config {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001164 int size;
1165 int order;
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001166 u32 basic_cap;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001167 u32 revision_id;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001168 u32 pin_based_exec_ctrl;
1169 u32 cpu_based_exec_ctrl;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001170 u32 cpu_based_2nd_exec_ctrl;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001171 u32 vmexit_ctrl;
1172 u32 vmentry_ctrl;
Paolo Bonzini13893092018-02-26 13:40:09 +01001173 struct nested_vmx_msrs nested;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001174} vmcs_config;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001175
Hannes Ederefff9e52008-11-28 17:02:06 +01001176static struct vmx_capability {
Sheng Yangd56f5462008-04-25 10:13:16 +08001177 u32 ept;
1178 u32 vpid;
1179} vmx_capability;
1180
Avi Kivity6aa8b732006-12-10 02:21:36 -08001181#define VMX_SEGMENT_FIELD(seg) \
1182 [VCPU_SREG_##seg] = { \
1183 .selector = GUEST_##seg##_SELECTOR, \
1184 .base = GUEST_##seg##_BASE, \
1185 .limit = GUEST_##seg##_LIMIT, \
1186 .ar_bytes = GUEST_##seg##_AR_BYTES, \
1187 }
1188
Mathias Krause772e0312012-08-30 01:30:19 +02001189static const struct kvm_vmx_segment_field {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001190 unsigned selector;
1191 unsigned base;
1192 unsigned limit;
1193 unsigned ar_bytes;
1194} kvm_vmx_segment_fields[] = {
1195 VMX_SEGMENT_FIELD(CS),
1196 VMX_SEGMENT_FIELD(DS),
1197 VMX_SEGMENT_FIELD(ES),
1198 VMX_SEGMENT_FIELD(FS),
1199 VMX_SEGMENT_FIELD(GS),
1200 VMX_SEGMENT_FIELD(SS),
1201 VMX_SEGMENT_FIELD(TR),
1202 VMX_SEGMENT_FIELD(LDTR),
1203};
1204
Avi Kivity26bb0982009-09-07 11:14:12 +03001205static u64 host_efer;
1206
Avi Kivity6de4f3a2009-05-31 22:58:47 +03001207static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
1208
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001209/*
Brian Gerst8c065852010-07-17 09:03:26 -04001210 * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001211 * away by decrementing the array size.
1212 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08001213static const u32 vmx_msr_index[] = {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001214#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +03001215 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001216#endif
Brian Gerst8c065852010-07-17 09:03:26 -04001217 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001218};
Avi Kivity6aa8b732006-12-10 02:21:36 -08001219
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001220DEFINE_STATIC_KEY_FALSE(enable_evmcs);
1221
1222#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
1223
1224#define KVM_EVMCS_VERSION 1
1225
1226#if IS_ENABLED(CONFIG_HYPERV)
1227static bool __read_mostly enlightened_vmcs = true;
1228module_param(enlightened_vmcs, bool, 0444);
1229
1230static inline void evmcs_write64(unsigned long field, u64 value)
1231{
1232 u16 clean_field;
1233 int offset = get_evmcs_offset(field, &clean_field);
1234
1235 if (offset < 0)
1236 return;
1237
1238 *(u64 *)((char *)current_evmcs + offset) = value;
1239
1240 current_evmcs->hv_clean_fields &= ~clean_field;
1241}
1242
1243static inline void evmcs_write32(unsigned long field, u32 value)
1244{
1245 u16 clean_field;
1246 int offset = get_evmcs_offset(field, &clean_field);
1247
1248 if (offset < 0)
1249 return;
1250
1251 *(u32 *)((char *)current_evmcs + offset) = value;
1252 current_evmcs->hv_clean_fields &= ~clean_field;
1253}
1254
1255static inline void evmcs_write16(unsigned long field, u16 value)
1256{
1257 u16 clean_field;
1258 int offset = get_evmcs_offset(field, &clean_field);
1259
1260 if (offset < 0)
1261 return;
1262
1263 *(u16 *)((char *)current_evmcs + offset) = value;
1264 current_evmcs->hv_clean_fields &= ~clean_field;
1265}
1266
1267static inline u64 evmcs_read64(unsigned long field)
1268{
1269 int offset = get_evmcs_offset(field, NULL);
1270
1271 if (offset < 0)
1272 return 0;
1273
1274 return *(u64 *)((char *)current_evmcs + offset);
1275}
1276
1277static inline u32 evmcs_read32(unsigned long field)
1278{
1279 int offset = get_evmcs_offset(field, NULL);
1280
1281 if (offset < 0)
1282 return 0;
1283
1284 return *(u32 *)((char *)current_evmcs + offset);
1285}
1286
1287static inline u16 evmcs_read16(unsigned long field)
1288{
1289 int offset = get_evmcs_offset(field, NULL);
1290
1291 if (offset < 0)
1292 return 0;
1293
1294 return *(u16 *)((char *)current_evmcs + offset);
1295}
1296
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001297static inline void evmcs_touch_msr_bitmap(void)
1298{
1299 if (unlikely(!current_evmcs))
1300 return;
1301
1302 if (current_evmcs->hv_enlightenments_control.msr_bitmap)
1303 current_evmcs->hv_clean_fields &=
1304 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
1305}
1306
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001307static void evmcs_load(u64 phys_addr)
1308{
1309 struct hv_vp_assist_page *vp_ap =
1310 hv_get_vp_assist_page(smp_processor_id());
1311
1312 vp_ap->current_nested_vmcs = phys_addr;
1313 vp_ap->enlighten_vmentry = 1;
1314}
1315
1316static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
1317{
1318 /*
1319 * Enlightened VMCSv1 doesn't support these:
1320 *
1321 * POSTED_INTR_NV = 0x00000002,
1322 * GUEST_INTR_STATUS = 0x00000810,
1323 * APIC_ACCESS_ADDR = 0x00002014,
1324 * POSTED_INTR_DESC_ADDR = 0x00002016,
1325 * EOI_EXIT_BITMAP0 = 0x0000201c,
1326 * EOI_EXIT_BITMAP1 = 0x0000201e,
1327 * EOI_EXIT_BITMAP2 = 0x00002020,
1328 * EOI_EXIT_BITMAP3 = 0x00002022,
1329 */
1330 vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
1331 vmcs_conf->cpu_based_2nd_exec_ctrl &=
1332 ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
1333 vmcs_conf->cpu_based_2nd_exec_ctrl &=
1334 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1335 vmcs_conf->cpu_based_2nd_exec_ctrl &=
1336 ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
1337
1338 /*
1339 * GUEST_PML_INDEX = 0x00000812,
1340 * PML_ADDRESS = 0x0000200e,
1341 */
1342 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
1343
1344 /* VM_FUNCTION_CONTROL = 0x00002018, */
1345 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
1346
1347 /*
1348 * EPTP_LIST_ADDRESS = 0x00002024,
1349 * VMREAD_BITMAP = 0x00002026,
1350 * VMWRITE_BITMAP = 0x00002028,
1351 */
1352 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
1353
1354 /*
1355 * TSC_MULTIPLIER = 0x00002032,
1356 */
1357 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
1358
1359 /*
1360 * PLE_GAP = 0x00004020,
1361 * PLE_WINDOW = 0x00004022,
1362 */
1363 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1364
1365 /*
1366 * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
1367 */
1368 vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
1369
1370 /*
1371 * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
1372 * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
1373 */
1374 vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
1375 vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
1376
1377 /*
1378 * Currently unsupported in KVM:
1379 * GUEST_IA32_RTIT_CTL = 0x00002814,
1380 */
1381}
Tianyu Lan877ad952018-07-19 08:40:23 +00001382
1383/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1384static void check_ept_pointer_match(struct kvm *kvm)
1385{
1386 struct kvm_vcpu *vcpu;
1387 u64 tmp_eptp = INVALID_PAGE;
1388 int i;
1389
1390 kvm_for_each_vcpu(i, vcpu, kvm) {
1391 if (!VALID_PAGE(tmp_eptp)) {
1392 tmp_eptp = to_vmx(vcpu)->ept_pointer;
1393 } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1394 to_kvm_vmx(kvm)->ept_pointers_match
1395 = EPT_POINTERS_MISMATCH;
1396 return;
1397 }
1398 }
1399
1400 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1401}
1402
1403static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
1404{
1405 int ret;
1406
1407 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1408
1409 if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1410 check_ept_pointer_match(kvm);
1411
1412 if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
1413 ret = -ENOTSUPP;
1414 goto out;
1415 }
1416
1417 ret = hyperv_flush_guest_mapping(
1418 to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
1419
1420out:
1421 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1422 return ret;
1423}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001424#else /* !IS_ENABLED(CONFIG_HYPERV) */
1425static inline void evmcs_write64(unsigned long field, u64 value) {}
1426static inline void evmcs_write32(unsigned long field, u32 value) {}
1427static inline void evmcs_write16(unsigned long field, u16 value) {}
1428static inline u64 evmcs_read64(unsigned long field) { return 0; }
1429static inline u32 evmcs_read32(unsigned long field) { return 0; }
1430static inline u16 evmcs_read16(unsigned long field) { return 0; }
1431static inline void evmcs_load(u64 phys_addr) {}
1432static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001433static inline void evmcs_touch_msr_bitmap(void) {}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001434#endif /* IS_ENABLED(CONFIG_HYPERV) */
1435
Jan Kiszka5bb16012016-02-09 20:14:21 +01001436static inline bool is_exception_n(u32 intr_info, u8 vector)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001437{
1438 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1439 INTR_INFO_VALID_MASK)) ==
Jan Kiszka5bb16012016-02-09 20:14:21 +01001440 (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
1441}
1442
Jan Kiszka6f054852016-02-09 20:15:18 +01001443static inline bool is_debug(u32 intr_info)
1444{
1445 return is_exception_n(intr_info, DB_VECTOR);
1446}
1447
1448static inline bool is_breakpoint(u32 intr_info)
1449{
1450 return is_exception_n(intr_info, BP_VECTOR);
1451}
1452
Jan Kiszka5bb16012016-02-09 20:14:21 +01001453static inline bool is_page_fault(u32 intr_info)
1454{
1455 return is_exception_n(intr_info, PF_VECTOR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001456}
1457
Gui Jianfeng31299942010-03-15 17:29:09 +08001458static inline bool is_no_device(u32 intr_info)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001459{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001460 return is_exception_n(intr_info, NM_VECTOR);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001461}
1462
Gui Jianfeng31299942010-03-15 17:29:09 +08001463static inline bool is_invalid_opcode(u32 intr_info)
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001464{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001465 return is_exception_n(intr_info, UD_VECTOR);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001466}
1467
Liran Alon9e869482018-03-12 13:12:51 +02001468static inline bool is_gp_fault(u32 intr_info)
1469{
1470 return is_exception_n(intr_info, GP_VECTOR);
1471}
1472
Gui Jianfeng31299942010-03-15 17:29:09 +08001473static inline bool is_external_interrupt(u32 intr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001474{
1475 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
1476 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
1477}
1478
Gui Jianfeng31299942010-03-15 17:29:09 +08001479static inline bool is_machine_check(u32 intr_info)
Andi Kleena0861c02009-06-08 17:37:09 +08001480{
1481 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1482 INTR_INFO_VALID_MASK)) ==
1483 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
1484}
1485
Linus Torvalds32d43cd2018-03-20 12:16:59 -07001486/* Undocumented: icebp/int1 */
1487static inline bool is_icebp(u32 intr_info)
1488{
1489 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
1490 == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
1491}
1492
Gui Jianfeng31299942010-03-15 17:29:09 +08001493static inline bool cpu_has_vmx_msr_bitmap(void)
Sheng Yang25c5f222008-03-28 13:18:56 +08001494{
Sheng Yang04547152009-04-01 15:52:31 +08001495 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
Sheng Yang25c5f222008-03-28 13:18:56 +08001496}
1497
Gui Jianfeng31299942010-03-15 17:29:09 +08001498static inline bool cpu_has_vmx_tpr_shadow(void)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001499{
Sheng Yang04547152009-04-01 15:52:31 +08001500 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001501}
1502
Paolo Bonzini35754c92015-07-29 12:05:37 +02001503static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001504{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001505 return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001506}
1507
Gui Jianfeng31299942010-03-15 17:29:09 +08001508static inline bool cpu_has_secondary_exec_ctrls(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001509{
Sheng Yang04547152009-04-01 15:52:31 +08001510 return vmcs_config.cpu_based_exec_ctrl &
1511 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001512}
1513
Avi Kivity774ead32007-12-26 13:57:04 +02001514static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001515{
Sheng Yang04547152009-04-01 15:52:31 +08001516 return vmcs_config.cpu_based_2nd_exec_ctrl &
1517 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1518}
1519
Yang Zhang8d146952013-01-25 10:18:50 +08001520static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
1521{
1522 return vmcs_config.cpu_based_2nd_exec_ctrl &
1523 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
1524}
1525
Yang Zhang83d4c282013-01-25 10:18:49 +08001526static inline bool cpu_has_vmx_apic_register_virt(void)
1527{
1528 return vmcs_config.cpu_based_2nd_exec_ctrl &
1529 SECONDARY_EXEC_APIC_REGISTER_VIRT;
1530}
1531
Yang Zhangc7c9c562013-01-25 10:18:51 +08001532static inline bool cpu_has_vmx_virtual_intr_delivery(void)
1533{
1534 return vmcs_config.cpu_based_2nd_exec_ctrl &
1535 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
1536}
1537
Yunhong Jiang64672c92016-06-13 14:19:59 -07001538/*
1539 * Comment's format: document - errata name - stepping - processor name.
1540 * Refer from
1541 * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1542 */
1543static u32 vmx_preemption_cpu_tfms[] = {
1544/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
15450x000206E6,
1546/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1547/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1548/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
15490x00020652,
1550/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
15510x00020655,
1552/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1553/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1554/*
1555 * 320767.pdf - AAP86 - B1 -
1556 * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1557 */
15580x000106E5,
1559/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
15600x000106A0,
1561/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
15620x000106A1,
1563/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
15640x000106A4,
1565 /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1566 /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1567 /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
15680x000106A5,
1569};
1570
1571static inline bool cpu_has_broken_vmx_preemption_timer(void)
1572{
1573 u32 eax = cpuid_eax(0x00000001), i;
1574
1575 /* Clear the reserved bits */
1576 eax &= ~(0x3U << 14 | 0xfU << 28);
Wei Yongjun03f6a222016-07-04 15:13:07 +00001577 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
Yunhong Jiang64672c92016-06-13 14:19:59 -07001578 if (eax == vmx_preemption_cpu_tfms[i])
1579 return true;
1580
1581 return false;
1582}
1583
1584static inline bool cpu_has_vmx_preemption_timer(void)
1585{
Yunhong Jiang64672c92016-06-13 14:19:59 -07001586 return vmcs_config.pin_based_exec_ctrl &
1587 PIN_BASED_VMX_PREEMPTION_TIMER;
1588}
1589
Yang Zhang01e439b2013-04-11 19:25:12 +08001590static inline bool cpu_has_vmx_posted_intr(void)
1591{
Paolo Bonzinid6a858d2015-09-28 11:58:14 +02001592 return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
1593 vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
Yang Zhang01e439b2013-04-11 19:25:12 +08001594}
1595
1596static inline bool cpu_has_vmx_apicv(void)
1597{
1598 return cpu_has_vmx_apic_register_virt() &&
1599 cpu_has_vmx_virtual_intr_delivery() &&
1600 cpu_has_vmx_posted_intr();
1601}
1602
Sheng Yang04547152009-04-01 15:52:31 +08001603static inline bool cpu_has_vmx_flexpriority(void)
1604{
1605 return cpu_has_vmx_tpr_shadow() &&
1606 cpu_has_vmx_virtualize_apic_accesses();
Sheng Yangf78e0e22007-10-29 09:40:42 +08001607}
1608
Marcelo Tosattie7997942009-06-11 12:07:40 -03001609static inline bool cpu_has_vmx_ept_execute_only(void)
1610{
Gui Jianfeng31299942010-03-15 17:29:09 +08001611 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001612}
1613
Marcelo Tosattie7997942009-06-11 12:07:40 -03001614static inline bool cpu_has_vmx_ept_2m_page(void)
1615{
Gui Jianfeng31299942010-03-15 17:29:09 +08001616 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001617}
1618
Sheng Yang878403b2010-01-05 19:02:29 +08001619static inline bool cpu_has_vmx_ept_1g_page(void)
1620{
Gui Jianfeng31299942010-03-15 17:29:09 +08001621 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
Sheng Yang878403b2010-01-05 19:02:29 +08001622}
1623
Sheng Yang4bc9b982010-06-02 14:05:24 +08001624static inline bool cpu_has_vmx_ept_4levels(void)
1625{
1626 return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
1627}
1628
David Hildenbrand42aa53b2017-08-10 23:15:29 +02001629static inline bool cpu_has_vmx_ept_mt_wb(void)
1630{
1631 return vmx_capability.ept & VMX_EPTP_WB_BIT;
1632}
1633
Yu Zhang855feb62017-08-24 20:27:55 +08001634static inline bool cpu_has_vmx_ept_5levels(void)
1635{
1636 return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
1637}
1638
Xudong Hao83c3a332012-05-28 19:33:35 +08001639static inline bool cpu_has_vmx_ept_ad_bits(void)
1640{
1641 return vmx_capability.ept & VMX_EPT_AD_BIT;
1642}
1643
Gui Jianfeng31299942010-03-15 17:29:09 +08001644static inline bool cpu_has_vmx_invept_context(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001645{
Gui Jianfeng31299942010-03-15 17:29:09 +08001646 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001647}
1648
Gui Jianfeng31299942010-03-15 17:29:09 +08001649static inline bool cpu_has_vmx_invept_global(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001650{
Gui Jianfeng31299942010-03-15 17:29:09 +08001651 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001652}
1653
Liran Aloncd9a4912018-05-22 17:16:15 +03001654static inline bool cpu_has_vmx_invvpid_individual_addr(void)
1655{
1656 return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
1657}
1658
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08001659static inline bool cpu_has_vmx_invvpid_single(void)
1660{
1661 return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
1662}
1663
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001664static inline bool cpu_has_vmx_invvpid_global(void)
1665{
1666 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
1667}
1668
Wanpeng Li08d839c2017-03-23 05:30:08 -07001669static inline bool cpu_has_vmx_invvpid(void)
1670{
1671 return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
1672}
1673
Gui Jianfeng31299942010-03-15 17:29:09 +08001674static inline bool cpu_has_vmx_ept(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001675{
Sheng Yang04547152009-04-01 15:52:31 +08001676 return vmcs_config.cpu_based_2nd_exec_ctrl &
1677 SECONDARY_EXEC_ENABLE_EPT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001678}
1679
Gui Jianfeng31299942010-03-15 17:29:09 +08001680static inline bool cpu_has_vmx_unrestricted_guest(void)
Nitin A Kamble3a624e22009-06-08 11:34:16 -07001681{
1682 return vmcs_config.cpu_based_2nd_exec_ctrl &
1683 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1684}
1685
Gui Jianfeng31299942010-03-15 17:29:09 +08001686static inline bool cpu_has_vmx_ple(void)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08001687{
1688 return vmcs_config.cpu_based_2nd_exec_ctrl &
1689 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1690}
1691
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001692static inline bool cpu_has_vmx_basic_inout(void)
1693{
1694 return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
1695}
1696
Paolo Bonzini35754c92015-07-29 12:05:37 +02001697static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001698{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001699 return flexpriority_enabled && lapic_in_kernel(vcpu);
Sheng Yangf78e0e22007-10-29 09:40:42 +08001700}
1701
Gui Jianfeng31299942010-03-15 17:29:09 +08001702static inline bool cpu_has_vmx_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001703{
Sheng Yang04547152009-04-01 15:52:31 +08001704 return vmcs_config.cpu_based_2nd_exec_ctrl &
1705 SECONDARY_EXEC_ENABLE_VPID;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001706}
1707
Gui Jianfeng31299942010-03-15 17:29:09 +08001708static inline bool cpu_has_vmx_rdtscp(void)
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001709{
1710 return vmcs_config.cpu_based_2nd_exec_ctrl &
1711 SECONDARY_EXEC_RDTSCP;
1712}
1713
Mao, Junjiead756a12012-07-02 01:18:48 +00001714static inline bool cpu_has_vmx_invpcid(void)
1715{
1716 return vmcs_config.cpu_based_2nd_exec_ctrl &
1717 SECONDARY_EXEC_ENABLE_INVPCID;
1718}
1719
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01001720static inline bool cpu_has_virtual_nmis(void)
1721{
1722 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
1723}
1724
Sheng Yangf5f48ee2010-06-30 12:25:15 +08001725static inline bool cpu_has_vmx_wbinvd_exit(void)
1726{
1727 return vmcs_config.cpu_based_2nd_exec_ctrl &
1728 SECONDARY_EXEC_WBINVD_EXITING;
1729}
1730
Abel Gordonabc4fc52013-04-18 14:35:25 +03001731static inline bool cpu_has_vmx_shadow_vmcs(void)
1732{
1733 u64 vmx_msr;
1734 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
1735 /* check if the cpu supports writing r/o exit information fields */
1736 if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
1737 return false;
1738
1739 return vmcs_config.cpu_based_2nd_exec_ctrl &
1740 SECONDARY_EXEC_SHADOW_VMCS;
1741}
1742
Kai Huang843e4332015-01-28 10:54:28 +08001743static inline bool cpu_has_vmx_pml(void)
1744{
1745 return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1746}
1747
Haozhong Zhang64903d62015-10-20 15:39:09 +08001748static inline bool cpu_has_vmx_tsc_scaling(void)
1749{
1750 return vmcs_config.cpu_based_2nd_exec_ctrl &
1751 SECONDARY_EXEC_TSC_SCALING;
1752}
1753
Bandan Das2a499e42017-08-03 15:54:41 -04001754static inline bool cpu_has_vmx_vmfunc(void)
1755{
1756 return vmcs_config.cpu_based_2nd_exec_ctrl &
1757 SECONDARY_EXEC_ENABLE_VMFUNC;
1758}
1759
Sean Christopherson64f7a112018-04-30 10:01:06 -07001760static bool vmx_umip_emulated(void)
1761{
1762 return vmcs_config.cpu_based_2nd_exec_ctrl &
1763 SECONDARY_EXEC_DESC;
1764}
1765
Sheng Yang04547152009-04-01 15:52:31 +08001766static inline bool report_flexpriority(void)
1767{
1768 return flexpriority_enabled;
1769}
1770
Jim Mattsonc7c2c7092017-05-05 11:28:09 -07001771static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
1772{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01001773 return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
Jim Mattsonc7c2c7092017-05-05 11:28:09 -07001774}
1775
Jim Mattsonf4160e42018-05-29 09:11:33 -07001776/*
1777 * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
1778 * to modify any valid field of the VMCS, or are the VM-exit
1779 * information fields read-only?
1780 */
1781static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
1782{
1783 return to_vmx(vcpu)->nested.msrs.misc_low &
1784 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
1785}
1786
Marc Orr04473782018-06-20 17:21:29 -07001787static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
1788{
1789 return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
1790}
1791
1792static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
1793{
1794 return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
1795 CPU_BASED_MONITOR_TRAP_FLAG;
1796}
1797
Liran Alonfa97d7d2018-07-18 14:07:59 +02001798static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
1799{
1800 return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
1801 SECONDARY_EXEC_SHADOW_VMCS;
1802}
1803
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03001804static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
1805{
1806 return vmcs12->cpu_based_vm_exec_control & bit;
1807}
1808
1809static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
1810{
1811 return (vmcs12->cpu_based_vm_exec_control &
1812 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
1813 (vmcs12->secondary_vm_exec_control & bit);
1814}
1815
Jan Kiszkaf41245002014-03-07 20:03:13 +01001816static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
1817{
1818 return vmcs12->pin_based_vm_exec_control &
1819 PIN_BASED_VMX_PREEMPTION_TIMER;
1820}
1821
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05001822static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
1823{
1824 return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
1825}
1826
1827static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
1828{
1829 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
1830}
1831
Nadav Har'El155a97a2013-08-05 11:07:16 +03001832static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1833{
1834 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
1835}
1836
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001837static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
1838{
Paolo Bonzini3db13482017-08-24 14:48:03 +02001839 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001840}
1841
Bandan Dasc5f983f2017-05-05 15:25:14 -04001842static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
1843{
1844 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
1845}
1846
Wincy Vanf2b93282015-02-03 23:56:03 +08001847static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
1848{
1849 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
1850}
1851
Wanpeng Li5c614b32015-10-13 09:18:36 -07001852static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
1853{
1854 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
1855}
1856
Wincy Van82f0dd42015-02-03 23:57:18 +08001857static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
1858{
1859 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
1860}
1861
Wincy Van608406e2015-02-03 23:57:51 +08001862static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
1863{
1864 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
1865}
1866
Wincy Van705699a2015-02-03 23:58:17 +08001867static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
1868{
1869 return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
1870}
1871
Bandan Das27c42a12017-08-03 15:54:42 -04001872static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
1873{
1874 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
1875}
1876
Bandan Das41ab9372017-08-03 15:54:43 -04001877static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
1878{
1879 return nested_cpu_has_vmfunc(vmcs12) &&
1880 (vmcs12->vm_function_control &
1881 VMX_VMFUNC_EPTP_SWITCHING);
1882}
1883
Liran Alonf792d272018-06-23 02:35:05 +03001884static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
1885{
1886 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
1887}
1888
Jim Mattsonef85b672016-12-12 11:01:37 -08001889static inline bool is_nmi(u32 intr_info)
Nadav Har'El644d7112011-05-25 23:12:35 +03001890{
1891 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
Jim Mattsonef85b672016-12-12 11:01:37 -08001892 == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
Nadav Har'El644d7112011-05-25 23:12:35 +03001893}
1894
Jan Kiszka533558b2014-01-04 18:47:20 +01001895static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1896 u32 exit_intr_info,
1897 unsigned long exit_qualification);
Nadav Har'El7c177932011-05-25 23:12:04 +03001898static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1899 struct vmcs12 *vmcs12,
1900 u32 reason, unsigned long qualification);
1901
Rusty Russell8b9cf982007-07-30 16:31:43 +10001902static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
Avi Kivity7725f0b2006-12-13 00:34:01 -08001903{
1904 int i;
1905
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001906 for (i = 0; i < vmx->nmsrs; ++i)
Avi Kivity26bb0982009-09-07 11:14:12 +03001907 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
Eddie Donga75beee2007-05-17 18:55:15 +03001908 return i;
1909 return -1;
1910}
1911
Sheng Yang2384d2b2008-01-17 15:14:33 +08001912static inline void __invvpid(int ext, u16 vpid, gva_t gva)
1913{
1914 struct {
1915 u64 vpid : 16;
1916 u64 rsvd : 48;
1917 u64 gva;
1918 } operand = { vpid, 0, gva };
1919
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001920 asm volatile (__ex(ASM_VMX_INVVPID)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001921 /* CF==1 or ZF==1 --> rc = -1 */
1922 "; ja 1f ; ud2 ; 1:"
1923 : : "a"(&operand), "c"(ext) : "cc", "memory");
1924}
1925
Sheng Yang14394422008-04-28 12:24:45 +08001926static inline void __invept(int ext, u64 eptp, gpa_t gpa)
1927{
1928 struct {
1929 u64 eptp, gpa;
1930 } operand = {eptp, gpa};
1931
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001932 asm volatile (__ex(ASM_VMX_INVEPT)
Sheng Yang14394422008-04-28 12:24:45 +08001933 /* CF==1 or ZF==1 --> rc = -1 */
1934 "; ja 1f ; ud2 ; 1:\n"
1935 : : "a" (&operand), "c" (ext) : "cc", "memory");
1936}
1937
Avi Kivity26bb0982009-09-07 11:14:12 +03001938static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
Eddie Donga75beee2007-05-17 18:55:15 +03001939{
1940 int i;
1941
Rusty Russell8b9cf982007-07-30 16:31:43 +10001942 i = __find_msr_index(vmx, msr);
Eddie Donga75beee2007-05-17 18:55:15 +03001943 if (i >= 0)
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001944 return &vmx->guest_msrs[i];
Al Viro8b6d44c2007-02-09 16:38:40 +00001945 return NULL;
Avi Kivity7725f0b2006-12-13 00:34:01 -08001946}
1947
Avi Kivity6aa8b732006-12-10 02:21:36 -08001948static void vmcs_clear(struct vmcs *vmcs)
1949{
1950 u64 phys_addr = __pa(vmcs);
1951 u8 error;
1952
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001953 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
Avi Kivity16d8f722010-12-21 16:51:50 +02001954 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001955 : "cc", "memory");
1956 if (error)
1957 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
1958 vmcs, phys_addr);
1959}
1960
Nadav Har'Eld462b812011-05-24 15:26:10 +03001961static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
1962{
1963 vmcs_clear(loaded_vmcs->vmcs);
Jim Mattson355f4fb2016-10-28 08:29:39 -07001964 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
1965 vmcs_clear(loaded_vmcs->shadow_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001966 loaded_vmcs->cpu = -1;
1967 loaded_vmcs->launched = 0;
1968}
1969
Dongxiao Xu7725b892010-05-11 18:29:38 +08001970static void vmcs_load(struct vmcs *vmcs)
1971{
1972 u64 phys_addr = __pa(vmcs);
1973 u8 error;
1974
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001975 if (static_branch_unlikely(&enable_evmcs))
1976 return evmcs_load(phys_addr);
1977
Dongxiao Xu7725b892010-05-11 18:29:38 +08001978 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
Avi Kivity16d8f722010-12-21 16:51:50 +02001979 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
Dongxiao Xu7725b892010-05-11 18:29:38 +08001980 : "cc", "memory");
1981 if (error)
Nadav Har'El2844d842011-05-25 23:16:40 +03001982 printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
Dongxiao Xu7725b892010-05-11 18:29:38 +08001983 vmcs, phys_addr);
1984}
1985
Dave Young2965faa2015-09-09 15:38:55 -07001986#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +08001987/*
1988 * This bitmap is used to indicate whether the vmclear
1989 * operation is enabled on all cpus. All disabled by
1990 * default.
1991 */
1992static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
1993
1994static inline void crash_enable_local_vmclear(int cpu)
1995{
1996 cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1997}
1998
1999static inline void crash_disable_local_vmclear(int cpu)
2000{
2001 cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
2002}
2003
2004static inline int crash_local_vmclear_enabled(int cpu)
2005{
2006 return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
2007}
2008
2009static void crash_vmclear_local_loaded_vmcss(void)
2010{
2011 int cpu = raw_smp_processor_id();
2012 struct loaded_vmcs *v;
2013
2014 if (!crash_local_vmclear_enabled(cpu))
2015 return;
2016
2017 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
2018 loaded_vmcss_on_cpu_link)
2019 vmcs_clear(v->vmcs);
2020}
2021#else
2022static inline void crash_enable_local_vmclear(int cpu) { }
2023static inline void crash_disable_local_vmclear(int cpu) { }
Dave Young2965faa2015-09-09 15:38:55 -07002024#endif /* CONFIG_KEXEC_CORE */
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002025
Nadav Har'Eld462b812011-05-24 15:26:10 +03002026static void __loaded_vmcs_clear(void *arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002027{
Nadav Har'Eld462b812011-05-24 15:26:10 +03002028 struct loaded_vmcs *loaded_vmcs = arg;
Ingo Molnard3b2c332007-01-05 16:36:23 -08002029 int cpu = raw_smp_processor_id();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002030
Nadav Har'Eld462b812011-05-24 15:26:10 +03002031 if (loaded_vmcs->cpu != cpu)
2032 return; /* vcpu migration can race with cpu offline */
2033 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002034 per_cpu(current_vmcs, cpu) = NULL;
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002035 crash_disable_local_vmclear(cpu);
Nadav Har'Eld462b812011-05-24 15:26:10 +03002036 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08002037
2038 /*
2039 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
2040 * is before setting loaded_vmcs->vcpu to -1 which is done in
2041 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
2042 * then adds the vmcs into percpu list before it is deleted.
2043 */
2044 smp_wmb();
2045
Nadav Har'Eld462b812011-05-24 15:26:10 +03002046 loaded_vmcs_init(loaded_vmcs);
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002047 crash_enable_local_vmclear(cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002048}
2049
Nadav Har'Eld462b812011-05-24 15:26:10 +03002050static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002051{
Xiao Guangronge6c7d322012-11-28 20:53:15 +08002052 int cpu = loaded_vmcs->cpu;
2053
2054 if (cpu != -1)
2055 smp_call_function_single(cpu,
2056 __loaded_vmcs_clear, loaded_vmcs, 1);
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002057}
2058
Junaid Shahidfaff8752018-06-29 13:10:05 -07002059static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
2060{
2061 if (vpid == 0)
2062 return true;
2063
2064 if (cpu_has_vmx_invvpid_individual_addr()) {
2065 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
2066 return true;
2067 }
2068
2069 return false;
2070}
2071
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002072static inline void vpid_sync_vcpu_single(int vpid)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002073{
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002074 if (vpid == 0)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002075 return;
2076
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08002077 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002078 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
Sheng Yang2384d2b2008-01-17 15:14:33 +08002079}
2080
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002081static inline void vpid_sync_vcpu_global(void)
2082{
2083 if (cpu_has_vmx_invvpid_global())
2084 __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
2085}
2086
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002087static inline void vpid_sync_context(int vpid)
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002088{
2089 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002090 vpid_sync_vcpu_single(vpid);
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002091 else
2092 vpid_sync_vcpu_global();
2093}
2094
Sheng Yang14394422008-04-28 12:24:45 +08002095static inline void ept_sync_global(void)
2096{
David Hildenbrandf5f51582017-08-24 20:51:30 +02002097 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
Sheng Yang14394422008-04-28 12:24:45 +08002098}
2099
2100static inline void ept_sync_context(u64 eptp)
2101{
David Hildenbrand0e1252d2017-08-24 20:51:28 +02002102 if (cpu_has_vmx_invept_context())
2103 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
2104 else
2105 ept_sync_global();
Sheng Yang14394422008-04-28 12:24:45 +08002106}
2107
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002108static __always_inline void vmcs_check16(unsigned long field)
2109{
2110 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2111 "16-bit accessor invalid for 64-bit field");
2112 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2113 "16-bit accessor invalid for 64-bit high field");
2114 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2115 "16-bit accessor invalid for 32-bit high field");
2116 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2117 "16-bit accessor invalid for natural width field");
2118}
2119
2120static __always_inline void vmcs_check32(unsigned long field)
2121{
2122 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2123 "32-bit accessor invalid for 16-bit field");
2124 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2125 "32-bit accessor invalid for natural width field");
2126}
2127
2128static __always_inline void vmcs_check64(unsigned long field)
2129{
2130 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2131 "64-bit accessor invalid for 16-bit field");
2132 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2133 "64-bit accessor invalid for 64-bit high field");
2134 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2135 "64-bit accessor invalid for 32-bit field");
2136 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2137 "64-bit accessor invalid for natural width field");
2138}
2139
2140static __always_inline void vmcs_checkl(unsigned long field)
2141{
2142 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2143 "Natural width accessor invalid for 16-bit field");
2144 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2145 "Natural width accessor invalid for 64-bit field");
2146 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2147 "Natural width accessor invalid for 64-bit high field");
2148 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2149 "Natural width accessor invalid for 32-bit field");
2150}
2151
2152static __always_inline unsigned long __vmcs_readl(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002153{
Avi Kivity5e520e62011-05-15 10:13:12 -04002154 unsigned long value;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002155
Avi Kivity5e520e62011-05-15 10:13:12 -04002156 asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
2157 : "=a"(value) : "d"(field) : "cc");
Avi Kivity6aa8b732006-12-10 02:21:36 -08002158 return value;
2159}
2160
Avi Kivity96304212011-05-15 10:13:13 -04002161static __always_inline u16 vmcs_read16(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002162{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002163 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002164 if (static_branch_unlikely(&enable_evmcs))
2165 return evmcs_read16(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002166 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002167}
2168
Avi Kivity96304212011-05-15 10:13:13 -04002169static __always_inline u32 vmcs_read32(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002170{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002171 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002172 if (static_branch_unlikely(&enable_evmcs))
2173 return evmcs_read32(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002174 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002175}
2176
Avi Kivity96304212011-05-15 10:13:13 -04002177static __always_inline u64 vmcs_read64(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002178{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002179 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002180 if (static_branch_unlikely(&enable_evmcs))
2181 return evmcs_read64(field);
Avi Kivity05b3e0c2006-12-13 00:33:45 -08002182#ifdef CONFIG_X86_64
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002183 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002184#else
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002185 return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002186#endif
2187}
2188
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002189static __always_inline unsigned long vmcs_readl(unsigned long field)
2190{
2191 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002192 if (static_branch_unlikely(&enable_evmcs))
2193 return evmcs_read64(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002194 return __vmcs_readl(field);
2195}
2196
Avi Kivitye52de1b2007-01-05 16:36:56 -08002197static noinline void vmwrite_error(unsigned long field, unsigned long value)
2198{
2199 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
2200 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
2201 dump_stack();
2202}
2203
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002204static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002205{
2206 u8 error;
2207
Avi Kivity4ecac3f2008-05-13 13:23:38 +03002208 asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
Mike Dayd77c26f2007-10-08 09:02:08 -04002209 : "=q"(error) : "a"(value), "d"(field) : "cc");
Avi Kivitye52de1b2007-01-05 16:36:56 -08002210 if (unlikely(error))
2211 vmwrite_error(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002212}
2213
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002214static __always_inline void vmcs_write16(unsigned long field, u16 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002215{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002216 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002217 if (static_branch_unlikely(&enable_evmcs))
2218 return evmcs_write16(field, value);
2219
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002220 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002221}
2222
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002223static __always_inline void vmcs_write32(unsigned long field, u32 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002224{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002225 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002226 if (static_branch_unlikely(&enable_evmcs))
2227 return evmcs_write32(field, value);
2228
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002229 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002230}
2231
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002232static __always_inline void vmcs_write64(unsigned long field, u64 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002233{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002234 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002235 if (static_branch_unlikely(&enable_evmcs))
2236 return evmcs_write64(field, value);
2237
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002238 __vmcs_writel(field, value);
Avi Kivity7682f2d2008-05-12 19:25:43 +03002239#ifndef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08002240 asm volatile ("");
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002241 __vmcs_writel(field+1, value >> 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002242#endif
2243}
2244
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002245static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002246{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002247 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002248 if (static_branch_unlikely(&enable_evmcs))
2249 return evmcs_write64(field, value);
2250
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002251 __vmcs_writel(field, value);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002252}
2253
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002254static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002255{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002256 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2257 "vmcs_clear_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002258 if (static_branch_unlikely(&enable_evmcs))
2259 return evmcs_write32(field, evmcs_read32(field) & ~mask);
2260
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002261 __vmcs_writel(field, __vmcs_readl(field) & ~mask);
2262}
2263
2264static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
2265{
2266 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2267 "vmcs_set_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002268 if (static_branch_unlikely(&enable_evmcs))
2269 return evmcs_write32(field, evmcs_read32(field) | mask);
2270
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002271 __vmcs_writel(field, __vmcs_readl(field) | mask);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002272}
2273
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002274static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
2275{
2276 vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
2277}
2278
Gleb Natapov2961e8762013-11-25 15:37:13 +02002279static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
2280{
2281 vmcs_write32(VM_ENTRY_CONTROLS, val);
2282 vmx->vm_entry_controls_shadow = val;
2283}
2284
2285static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
2286{
2287 if (vmx->vm_entry_controls_shadow != val)
2288 vm_entry_controls_init(vmx, val);
2289}
2290
2291static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
2292{
2293 return vmx->vm_entry_controls_shadow;
2294}
2295
2296
2297static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2298{
2299 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
2300}
2301
2302static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2303{
2304 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
2305}
2306
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002307static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
2308{
2309 vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
2310}
2311
Gleb Natapov2961e8762013-11-25 15:37:13 +02002312static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
2313{
2314 vmcs_write32(VM_EXIT_CONTROLS, val);
2315 vmx->vm_exit_controls_shadow = val;
2316}
2317
2318static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
2319{
2320 if (vmx->vm_exit_controls_shadow != val)
2321 vm_exit_controls_init(vmx, val);
2322}
2323
2324static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
2325{
2326 return vmx->vm_exit_controls_shadow;
2327}
2328
2329
2330static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2331{
2332 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
2333}
2334
2335static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2336{
2337 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
2338}
2339
Avi Kivity2fb92db2011-04-27 19:42:18 +03002340static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
2341{
2342 vmx->segment_cache.bitmask = 0;
2343}
2344
2345static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
2346 unsigned field)
2347{
2348 bool ret;
2349 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
2350
2351 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
2352 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
2353 vmx->segment_cache.bitmask = 0;
2354 }
2355 ret = vmx->segment_cache.bitmask & mask;
2356 vmx->segment_cache.bitmask |= mask;
2357 return ret;
2358}
2359
2360static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
2361{
2362 u16 *p = &vmx->segment_cache.seg[seg].selector;
2363
2364 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
2365 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
2366 return *p;
2367}
2368
2369static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
2370{
2371 ulong *p = &vmx->segment_cache.seg[seg].base;
2372
2373 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
2374 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
2375 return *p;
2376}
2377
2378static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
2379{
2380 u32 *p = &vmx->segment_cache.seg[seg].limit;
2381
2382 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
2383 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
2384 return *p;
2385}
2386
2387static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
2388{
2389 u32 *p = &vmx->segment_cache.seg[seg].ar;
2390
2391 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
2392 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
2393 return *p;
2394}
2395
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002396static void update_exception_bitmap(struct kvm_vcpu *vcpu)
2397{
2398 u32 eb;
2399
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002400 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08002401 (1u << DB_VECTOR) | (1u << AC_VECTOR);
Liran Alon9e869482018-03-12 13:12:51 +02002402 /*
2403 * Guest access to VMware backdoor ports could legitimately
2404 * trigger #GP because of TSS I/O permission bitmap.
2405 * We intercept those #GP and allow access to them anyway
2406 * as VMware does.
2407 */
2408 if (enable_vmware_backdoor)
2409 eb |= (1u << GP_VECTOR);
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002410 if ((vcpu->guest_debug &
2411 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
2412 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
2413 eb |= 1u << BP_VECTOR;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03002414 if (to_vmx(vcpu)->rmode.vm86_active)
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002415 eb = ~0;
Avi Kivity089d0342009-03-23 18:26:32 +02002416 if (enable_ept)
Sheng Yang14394422008-04-28 12:24:45 +08002417 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
Nadav Har'El36cf24e2011-05-25 23:15:08 +03002418
2419 /* When we are running a nested L2 guest and L1 specified for it a
2420 * certain exception bitmap, we must trap the same exceptions and pass
2421 * them to L1. When running L2, we will only handle the exceptions
2422 * specified above if L1 did not want them.
2423 */
2424 if (is_guest_mode(vcpu))
2425 eb |= get_vmcs12(vcpu)->exception_bitmap;
2426
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002427 vmcs_write32(EXCEPTION_BITMAP, eb);
2428}
2429
Ashok Raj15d45072018-02-01 22:59:43 +01002430/*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01002431 * Check if MSR is intercepted for currently loaded MSR bitmap.
2432 */
2433static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
2434{
2435 unsigned long *msr_bitmap;
2436 int f = sizeof(unsigned long);
2437
2438 if (!cpu_has_vmx_msr_bitmap())
2439 return true;
2440
2441 msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
2442
2443 if (msr <= 0x1fff) {
2444 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2445 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2446 msr &= 0x1fff;
2447 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2448 }
2449
2450 return true;
2451}
2452
2453/*
Ashok Raj15d45072018-02-01 22:59:43 +01002454 * Check if MSR is intercepted for L01 MSR bitmap.
2455 */
2456static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
2457{
2458 unsigned long *msr_bitmap;
2459 int f = sizeof(unsigned long);
2460
2461 if (!cpu_has_vmx_msr_bitmap())
2462 return true;
2463
2464 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
2465
2466 if (msr <= 0x1fff) {
2467 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2468 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2469 msr &= 0x1fff;
2470 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2471 }
2472
2473 return true;
2474}
2475
Gleb Natapov2961e8762013-11-25 15:37:13 +02002476static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2477 unsigned long entry, unsigned long exit)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002478{
Gleb Natapov2961e8762013-11-25 15:37:13 +02002479 vm_entry_controls_clearbit(vmx, entry);
2480 vm_exit_controls_clearbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002481}
2482
Avi Kivity61d2ef22010-04-28 16:40:38 +03002483static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
2484{
2485 unsigned i;
2486 struct msr_autoload *m = &vmx->msr_autoload;
2487
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002488 switch (msr) {
2489 case MSR_EFER:
2490 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002491 clear_atomic_switch_msr_special(vmx,
2492 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002493 VM_EXIT_LOAD_IA32_EFER);
2494 return;
2495 }
2496 break;
2497 case MSR_CORE_PERF_GLOBAL_CTRL:
2498 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002499 clear_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002500 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2501 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
2502 return;
2503 }
2504 break;
Avi Kivity110312c2010-12-21 12:54:20 +02002505 }
2506
Avi Kivity61d2ef22010-04-28 16:40:38 +03002507 for (i = 0; i < m->nr; ++i)
2508 if (m->guest[i].index == msr)
2509 break;
2510
2511 if (i == m->nr)
2512 return;
2513 --m->nr;
2514 m->guest[i] = m->guest[m->nr];
2515 m->host[i] = m->host[m->nr];
2516 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
2517 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
2518}
2519
Gleb Natapov2961e8762013-11-25 15:37:13 +02002520static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2521 unsigned long entry, unsigned long exit,
2522 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
2523 u64 guest_val, u64 host_val)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002524{
2525 vmcs_write64(guest_val_vmcs, guest_val);
2526 vmcs_write64(host_val_vmcs, host_val);
Gleb Natapov2961e8762013-11-25 15:37:13 +02002527 vm_entry_controls_setbit(vmx, entry);
2528 vm_exit_controls_setbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002529}
2530
Avi Kivity61d2ef22010-04-28 16:40:38 +03002531static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
2532 u64 guest_val, u64 host_val)
2533{
2534 unsigned i;
2535 struct msr_autoload *m = &vmx->msr_autoload;
2536
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002537 switch (msr) {
2538 case MSR_EFER:
2539 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002540 add_atomic_switch_msr_special(vmx,
2541 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002542 VM_EXIT_LOAD_IA32_EFER,
2543 GUEST_IA32_EFER,
2544 HOST_IA32_EFER,
2545 guest_val, host_val);
2546 return;
2547 }
2548 break;
2549 case MSR_CORE_PERF_GLOBAL_CTRL:
2550 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002551 add_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002552 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2553 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
2554 GUEST_IA32_PERF_GLOBAL_CTRL,
2555 HOST_IA32_PERF_GLOBAL_CTRL,
2556 guest_val, host_val);
2557 return;
2558 }
2559 break;
Radim Krčmář7099e2e2016-03-04 15:08:42 +01002560 case MSR_IA32_PEBS_ENABLE:
2561 /* PEBS needs a quiescent period after being disabled (to write
2562 * a record). Disabling PEBS through VMX MSR swapping doesn't
2563 * provide that period, so a CPU could write host's record into
2564 * guest's memory.
2565 */
2566 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
Avi Kivity110312c2010-12-21 12:54:20 +02002567 }
2568
Avi Kivity61d2ef22010-04-28 16:40:38 +03002569 for (i = 0; i < m->nr; ++i)
2570 if (m->guest[i].index == msr)
2571 break;
2572
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002573 if (i == NR_AUTOLOAD_MSRS) {
Michael S. Tsirkin60266202013-10-31 00:34:56 +02002574 printk_once(KERN_WARNING "Not enough msr switch entries. "
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002575 "Can't add msr %x\n", msr);
2576 return;
2577 } else if (i == m->nr) {
Avi Kivity61d2ef22010-04-28 16:40:38 +03002578 ++m->nr;
2579 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
2580 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
2581 }
2582
2583 m->guest[i].index = msr;
2584 m->guest[i].value = guest_val;
2585 m->host[i].index = msr;
2586 m->host[i].value = host_val;
2587}
2588
Avi Kivity92c0d902009-10-29 11:00:16 +02002589static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
Eddie Dong2cc51562007-05-21 07:28:09 +03002590{
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002591 u64 guest_efer = vmx->vcpu.arch.efer;
2592 u64 ignore_bits = 0;
Eddie Dong2cc51562007-05-21 07:28:09 +03002593
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002594 if (!enable_ept) {
2595 /*
2596 * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
2597 * host CPUID is more efficient than testing guest CPUID
2598 * or CR4. Host SMEP is anyway a requirement for guest SMEP.
2599 */
2600 if (boot_cpu_has(X86_FEATURE_SMEP))
2601 guest_efer |= EFER_NX;
2602 else if (!(guest_efer & EFER_NX))
2603 ignore_bits |= EFER_NX;
2604 }
Roel Kluin3a34a882009-08-04 02:08:45 -07002605
Avi Kivity51c6cf62007-08-29 03:48:05 +03002606 /*
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002607 * LMA and LME handled by hardware; SCE meaningless outside long mode.
Avi Kivity51c6cf62007-08-29 03:48:05 +03002608 */
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002609 ignore_bits |= EFER_SCE;
Avi Kivity51c6cf62007-08-29 03:48:05 +03002610#ifdef CONFIG_X86_64
2611 ignore_bits |= EFER_LMA | EFER_LME;
2612 /* SCE is meaningful only in long mode on Intel */
2613 if (guest_efer & EFER_LMA)
2614 ignore_bits &= ~(u64)EFER_SCE;
2615#endif
Avi Kivity84ad33e2010-04-28 16:42:29 +03002616
2617 clear_atomic_switch_msr(vmx, MSR_EFER);
Andy Lutomirskif6577a5f2014-11-07 18:25:18 -08002618
2619 /*
2620 * On EPT, we can't emulate NX, so we must switch EFER atomically.
2621 * On CPUs that support "load IA32_EFER", always switch EFER
2622 * atomically, since it's faster than switching it manually.
2623 */
2624 if (cpu_has_load_ia32_efer ||
2625 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
Avi Kivity84ad33e2010-04-28 16:42:29 +03002626 if (!(guest_efer & EFER_LMA))
2627 guest_efer &= ~EFER_LME;
Andy Lutomirski54b98bf2014-11-10 11:19:15 -08002628 if (guest_efer != host_efer)
2629 add_atomic_switch_msr(vmx, MSR_EFER,
2630 guest_efer, host_efer);
Avi Kivity84ad33e2010-04-28 16:42:29 +03002631 return false;
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002632 } else {
2633 guest_efer &= ~ignore_bits;
2634 guest_efer |= host_efer & ignore_bits;
Avi Kivity84ad33e2010-04-28 16:42:29 +03002635
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002636 vmx->guest_msrs[efer_offset].data = guest_efer;
2637 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
2638
2639 return true;
2640 }
Avi Kivity51c6cf62007-08-29 03:48:05 +03002641}
2642
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002643#ifdef CONFIG_X86_32
2644/*
2645 * On 32-bit kernels, VM exits still load the FS and GS bases from the
2646 * VMCS rather than the segment table. KVM uses this helper to figure
2647 * out the current bases to poke them into the VMCS before entry.
2648 */
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002649static unsigned long segment_base(u16 selector)
2650{
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002651 struct desc_struct *table;
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002652 unsigned long v;
2653
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002654 if (!(selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002655 return 0;
2656
Thomas Garnier45fc8752017-03-14 10:05:08 -07002657 table = get_current_gdt_ro();
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002658
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002659 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002660 u16 ldt_selector = kvm_read_ldt();
2661
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002662 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002663 return 0;
2664
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002665 table = (struct desc_struct *)segment_base(ldt_selector);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002666 }
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002667 v = get_desc_base(&table[selector >> 3]);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002668 return v;
2669}
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002670#endif
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002671
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002672static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
Avi Kivity33ed6322007-05-02 16:54:03 +03002673{
Avi Kivity04d2cc72007-09-10 18:10:54 +03002674 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002675 struct vmcs_host_state *host_state;
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002676#ifdef CONFIG_X86_64
Vitaly Kuznetsov35060ed2018-03-13 18:48:05 +01002677 int cpu = raw_smp_processor_id();
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002678#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002679 unsigned long fs_base, gs_base;
2680 u16 fs_sel, gs_sel;
Avi Kivity26bb0982009-09-07 11:14:12 +03002681 int i;
Avi Kivity04d2cc72007-09-10 18:10:54 +03002682
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002683 if (vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002684 return;
2685
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002686 vmx->loaded_cpu_state = vmx->loaded_vmcs;
Sean Christophersond7ee0392018-07-23 12:32:47 -07002687 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002688
Avi Kivity33ed6322007-05-02 16:54:03 +03002689 /*
2690 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2691 * allow segment selectors with cpl > 0 or ti == 1.
2692 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07002693 host_state->ldt_sel = kvm_read_ldt();
Vitaly Kuznetsov42b933b2018-03-13 18:48:04 +01002694
2695#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002696 savesegment(ds, host_state->ds_sel);
2697 savesegment(es, host_state->es_sel);
Sean Christophersone368b872018-07-23 12:32:41 -07002698
2699 gs_base = cpu_kernelmode_gs_base(cpu);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002700 if (likely(is_64bit_mm(current->mm))) {
2701 save_fsgs_for_kvm();
Sean Christophersone368b872018-07-23 12:32:41 -07002702 fs_sel = current->thread.fsindex;
2703 gs_sel = current->thread.gsindex;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002704 fs_base = current->thread.fsbase;
Sean Christophersone368b872018-07-23 12:32:41 -07002705 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002706 } else {
Sean Christophersone368b872018-07-23 12:32:41 -07002707 savesegment(fs, fs_sel);
2708 savesegment(gs, gs_sel);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002709 fs_base = read_msr(MSR_FS_BASE);
Sean Christophersone368b872018-07-23 12:32:41 -07002710 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002711 }
Sean Christophersone368b872018-07-23 12:32:41 -07002712
2713 if (is_long_mode(&vmx->vcpu))
2714 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2715#else
2716 savesegment(fs, fs_sel);
2717 savesegment(gs, gs_sel);
2718 fs_base = segment_base(fs_sel);
2719 gs_base = segment_base(gs_sel);
Vitaly Kuznetsov42b933b2018-03-13 18:48:04 +01002720#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002721
Sean Christopherson8f21a0b2018-07-23 12:32:49 -07002722 if (unlikely(fs_sel != host_state->fs_sel)) {
2723 if (!(fs_sel & 7))
2724 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
2725 else
2726 vmcs_write16(HOST_FS_SELECTOR, 0);
2727 host_state->fs_sel = fs_sel;
2728 }
2729 if (unlikely(gs_sel != host_state->gs_sel)) {
2730 if (!(gs_sel & 7))
2731 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
2732 else
2733 vmcs_write16(HOST_GS_SELECTOR, 0);
2734 host_state->gs_sel = gs_sel;
2735 }
Sean Christopherson5e079c72018-07-23 12:32:50 -07002736 if (unlikely(fs_base != host_state->fs_base)) {
2737 vmcs_writel(HOST_FS_BASE, fs_base);
2738 host_state->fs_base = fs_base;
2739 }
2740 if (unlikely(gs_base != host_state->gs_base)) {
2741 vmcs_writel(HOST_GS_BASE, gs_base);
2742 host_state->gs_base = gs_base;
2743 }
Avi Kivity707c0872007-05-02 17:33:43 +03002744
Avi Kivity26bb0982009-09-07 11:14:12 +03002745 for (i = 0; i < vmx->save_nmsrs; ++i)
2746 kvm_set_shared_msr(vmx->guest_msrs[i].index,
Avi Kivityd5696722009-12-02 12:28:47 +02002747 vmx->guest_msrs[i].data,
2748 vmx->guest_msrs[i].mask);
Avi Kivity33ed6322007-05-02 16:54:03 +03002749}
2750
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002751static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
Avi Kivity33ed6322007-05-02 16:54:03 +03002752{
Sean Christophersond7ee0392018-07-23 12:32:47 -07002753 struct vmcs_host_state *host_state;
2754
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002755 if (!vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002756 return;
2757
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002758 WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002759 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002760
Avi Kivitye1beb1d2007-11-18 13:50:24 +02002761 ++vmx->vcpu.stat.host_state_reload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002762 vmx->loaded_cpu_state = NULL;
2763
Avi Kivityc8770e72010-11-11 12:37:26 +02002764#ifdef CONFIG_X86_64
2765 if (is_long_mode(&vmx->vcpu))
2766 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2767#endif
Sean Christophersond7ee0392018-07-23 12:32:47 -07002768 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2769 kvm_load_ldt(host_state->ldt_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002770#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002771 load_gs_index(host_state->gs_sel);
Avi Kivity9581d442010-10-19 16:46:55 +02002772#else
Sean Christophersond7ee0392018-07-23 12:32:47 -07002773 loadsegment(gs, host_state->gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002774#endif
Avi Kivity33ed6322007-05-02 16:54:03 +03002775 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07002776 if (host_state->fs_sel & 7)
2777 loadsegment(fs, host_state->fs_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002778#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002779 if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2780 loadsegment(ds, host_state->ds_sel);
2781 loadsegment(es, host_state->es_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002782 }
Avi Kivityb2da15a2012-05-13 19:53:24 +03002783#endif
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002784 invalidate_tss_limit();
Avi Kivity44ea2b12009-09-06 15:55:37 +03002785#ifdef CONFIG_X86_64
Avi Kivityc8770e72010-11-11 12:37:26 +02002786 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
Avi Kivity44ea2b12009-09-06 15:55:37 +03002787#endif
Thomas Garnier45fc8752017-03-14 10:05:08 -07002788 load_fixmap_gdt(raw_smp_processor_id());
Avi Kivity33ed6322007-05-02 16:54:03 +03002789}
2790
Sean Christopherson678e3152018-07-23 12:32:43 -07002791#ifdef CONFIG_X86_64
2792static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
Avi Kivitya9b21b62008-06-24 11:48:49 +03002793{
Sean Christopherson678e3152018-07-23 12:32:43 -07002794 if (is_long_mode(&vmx->vcpu)) {
2795 preempt_disable();
2796 if (vmx->loaded_cpu_state)
2797 rdmsrl(MSR_KERNEL_GS_BASE,
2798 vmx->msr_guest_kernel_gs_base);
2799 preempt_enable();
2800 }
2801 return vmx->msr_guest_kernel_gs_base;
Avi Kivitya9b21b62008-06-24 11:48:49 +03002802}
2803
Sean Christopherson678e3152018-07-23 12:32:43 -07002804static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
2805{
2806 if (is_long_mode(&vmx->vcpu)) {
2807 preempt_disable();
2808 if (vmx->loaded_cpu_state)
2809 wrmsrl(MSR_KERNEL_GS_BASE, data);
2810 preempt_enable();
2811 }
2812 vmx->msr_guest_kernel_gs_base = data;
2813}
2814#endif
2815
Feng Wu28b835d2015-09-18 22:29:54 +08002816static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2817{
2818 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2819 struct pi_desc old, new;
2820 unsigned int dest;
2821
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002822 /*
2823 * In case of hot-plug or hot-unplug, we may have to undo
2824 * vmx_vcpu_pi_put even if there is no assigned device. And we
2825 * always keep PI.NDST up to date for simplicity: it makes the
2826 * code easier, and CPU migration is not a fast path.
2827 */
2828 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
Feng Wu28b835d2015-09-18 22:29:54 +08002829 return;
2830
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002831 /*
2832 * First handle the simple case where no cmpxchg is necessary; just
2833 * allow posting non-urgent interrupts.
2834 *
2835 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2836 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
2837 * expects the VCPU to be on the blocked_vcpu_list that matches
2838 * PI.NDST.
2839 */
2840 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
2841 vcpu->cpu == cpu) {
2842 pi_clear_sn(pi_desc);
2843 return;
2844 }
2845
2846 /* The full case. */
Feng Wu28b835d2015-09-18 22:29:54 +08002847 do {
2848 old.control = new.control = pi_desc->control;
2849
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002850 dest = cpu_physical_id(cpu);
Feng Wu28b835d2015-09-18 22:29:54 +08002851
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002852 if (x2apic_enabled())
2853 new.ndst = dest;
2854 else
2855 new.ndst = (dest << 8) & 0xFF00;
Feng Wu28b835d2015-09-18 22:29:54 +08002856
Feng Wu28b835d2015-09-18 22:29:54 +08002857 new.sn = 0;
Paolo Bonzinic0a16662017-09-28 17:58:41 +02002858 } while (cmpxchg64(&pi_desc->control, old.control,
2859 new.control) != old.control);
Feng Wu28b835d2015-09-18 22:29:54 +08002860}
Xiao Guangrong1be0e612016-03-22 16:51:18 +08002861
Peter Feinerc95ba922016-08-17 09:36:47 -07002862static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
2863{
2864 vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
2865 vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
2866}
2867
Avi Kivity6aa8b732006-12-10 02:21:36 -08002868/*
2869 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2870 * vcpu mutex is already taken.
2871 */
Avi Kivity15ad7142007-07-11 18:17:21 +03002872static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002873{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002874 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002875 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002876
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002877 if (!already_loaded) {
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01002878 loaded_vmcs_clear(vmx->loaded_vmcs);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002879 local_irq_disable();
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002880 crash_disable_local_vmclear(cpu);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08002881
2882 /*
2883 * Read loaded_vmcs->cpu should be before fetching
2884 * loaded_vmcs->loaded_vmcss_on_cpu_link.
2885 * See the comments in __loaded_vmcs_clear().
2886 */
2887 smp_rmb();
2888
Nadav Har'Eld462b812011-05-24 15:26:10 +03002889 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2890 &per_cpu(loaded_vmcss_on_cpu, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002891 crash_enable_local_vmclear(cpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002892 local_irq_enable();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002893 }
2894
2895 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2896 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2897 vmcs_load(vmx->loaded_vmcs->vmcs);
Ashok Raj15d45072018-02-01 22:59:43 +01002898 indirect_branch_prediction_barrier();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002899 }
2900
2901 if (!already_loaded) {
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07002902 void *gdt = get_current_gdt_ro();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002903 unsigned long sysenter_esp;
2904
2905 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002906
Avi Kivity6aa8b732006-12-10 02:21:36 -08002907 /*
2908 * Linux uses per-cpu TSS and GDT, so set these when switching
Andy Lutomirskie0c23062017-02-20 08:56:10 -08002909 * processors. See 22.2.4.
Avi Kivity6aa8b732006-12-10 02:21:36 -08002910 */
Andy Lutomirskie0c23062017-02-20 08:56:10 -08002911 vmcs_writel(HOST_TR_BASE,
Andy Lutomirski72f5e082017-12-04 15:07:20 +01002912 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07002913 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08002914
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002915 /*
2916 * VM exits change the host TR limit to 0x67 after a VM
2917 * exit. This is okay, since 0x67 covers everything except
2918 * the IO bitmap and have have code to handle the IO bitmap
2919 * being lost after a VM exit.
2920 */
2921 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
2922
Avi Kivity6aa8b732006-12-10 02:21:36 -08002923 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2924 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
Haozhong Zhangff2c3a12015-10-20 15:39:10 +08002925
Nadav Har'Eld462b812011-05-24 15:26:10 +03002926 vmx->loaded_vmcs->cpu = cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002927 }
Feng Wu28b835d2015-09-18 22:29:54 +08002928
Owen Hofmann2680d6d2016-03-01 13:36:13 -08002929 /* Setup TSC multiplier */
2930 if (kvm_has_tsc_control &&
Peter Feinerc95ba922016-08-17 09:36:47 -07002931 vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2932 decache_tsc_multiplier(vmx);
Owen Hofmann2680d6d2016-03-01 13:36:13 -08002933
Feng Wu28b835d2015-09-18 22:29:54 +08002934 vmx_vcpu_pi_load(vcpu, cpu);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08002935 vmx->host_pkru = read_pkru();
Wanpeng Li74c55932017-11-29 01:31:20 -08002936 vmx->host_debugctlmsr = get_debugctlmsr();
Feng Wu28b835d2015-09-18 22:29:54 +08002937}
2938
2939static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2940{
2941 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2942
2943 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +08002944 !irq_remapping_cap(IRQ_POSTING_CAP) ||
2945 !kvm_vcpu_apicv_active(vcpu))
Feng Wu28b835d2015-09-18 22:29:54 +08002946 return;
2947
2948 /* Set SN when the vCPU is preempted */
2949 if (vcpu->preempted)
2950 pi_set_sn(pi_desc);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002951}
2952
2953static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2954{
Feng Wu28b835d2015-09-18 22:29:54 +08002955 vmx_vcpu_pi_put(vcpu);
2956
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002957 vmx_prepare_switch_to_host(to_vmx(vcpu));
Avi Kivity6aa8b732006-12-10 02:21:36 -08002958}
2959
Wanpeng Lif244dee2017-07-20 01:11:54 -07002960static bool emulation_required(struct kvm_vcpu *vcpu)
2961{
2962 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2963}
2964
Avi Kivityedcafe32009-12-30 18:07:40 +02002965static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2966
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03002967/*
2968 * Return the cr0 value that a nested guest would read. This is a combination
2969 * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
2970 * its hypervisor (cr0_read_shadow).
2971 */
2972static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
2973{
2974 return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
2975 (fields->cr0_read_shadow & fields->cr0_guest_host_mask);
2976}
2977static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
2978{
2979 return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
2980 (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
2981}
2982
Avi Kivity6aa8b732006-12-10 02:21:36 -08002983static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2984{
Avi Kivity78ac8b42010-04-08 18:19:35 +03002985 unsigned long rflags, save_rflags;
Avi Kivity345dcaa2009-08-12 15:29:37 +03002986
Avi Kivity6de12732011-03-07 12:51:22 +02002987 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
2988 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
2989 rflags = vmcs_readl(GUEST_RFLAGS);
2990 if (to_vmx(vcpu)->rmode.vm86_active) {
2991 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2992 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
2993 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2994 }
2995 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03002996 }
Avi Kivity6de12732011-03-07 12:51:22 +02002997 return to_vmx(vcpu)->rflags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002998}
2999
3000static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3001{
Wanpeng Lif244dee2017-07-20 01:11:54 -07003002 unsigned long old_rflags = vmx_get_rflags(vcpu);
3003
Avi Kivity6de12732011-03-07 12:51:22 +02003004 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
3005 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003006 if (to_vmx(vcpu)->rmode.vm86_active) {
3007 to_vmx(vcpu)->rmode.save_rflags = rflags;
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01003008 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003009 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003010 vmcs_writel(GUEST_RFLAGS, rflags);
Wanpeng Lif244dee2017-07-20 01:11:54 -07003011
3012 if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
3013 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003014}
3015
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003016static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003017{
3018 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3019 int ret = 0;
3020
3021 if (interruptibility & GUEST_INTR_STATE_STI)
Jan Kiszka48005f62010-02-19 19:38:07 +01003022 ret |= KVM_X86_SHADOW_INT_STI;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003023 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
Jan Kiszka48005f62010-02-19 19:38:07 +01003024 ret |= KVM_X86_SHADOW_INT_MOV_SS;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003025
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003026 return ret;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003027}
3028
3029static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
3030{
3031 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3032 u32 interruptibility = interruptibility_old;
3033
3034 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
3035
Jan Kiszka48005f62010-02-19 19:38:07 +01003036 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003037 interruptibility |= GUEST_INTR_STATE_MOV_SS;
Jan Kiszka48005f62010-02-19 19:38:07 +01003038 else if (mask & KVM_X86_SHADOW_INT_STI)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003039 interruptibility |= GUEST_INTR_STATE_STI;
3040
3041 if ((interruptibility != interruptibility_old))
3042 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
3043}
3044
Avi Kivity6aa8b732006-12-10 02:21:36 -08003045static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
3046{
3047 unsigned long rip;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003048
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003049 rip = kvm_rip_read(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003050 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003051 kvm_rip_write(vcpu, rip);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003052
Glauber Costa2809f5d2009-05-12 16:21:05 -04003053 /* skipping an emulated instruction also counts */
3054 vmx_set_interrupt_shadow(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003055}
3056
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003057static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3058 unsigned long exit_qual)
3059{
3060 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3061 unsigned int nr = vcpu->arch.exception.nr;
3062 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3063
3064 if (vcpu->arch.exception.has_error_code) {
3065 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3066 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3067 }
3068
3069 if (kvm_exception_is_soft(nr))
3070 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3071 else
3072 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3073
3074 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3075 vmx_get_nmi_mask(vcpu))
3076 intr_info |= INTR_INFO_UNBLOCK_NMI;
3077
3078 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3079}
3080
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003081/*
3082 * KVM wants to inject page-faults which it got to the guest. This function
3083 * checks whether in a nested guest, we need to inject them to L1 or L2.
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003084 */
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003085static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003086{
3087 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003088 unsigned int nr = vcpu->arch.exception.nr;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003089
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003090 if (nr == PF_VECTOR) {
3091 if (vcpu->arch.exception.nested_apf) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003092 *exit_qual = vcpu->arch.apf.nested_apf_token;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003093 return 1;
3094 }
3095 /*
3096 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
3097 * The fix is to add the ancillary datum (CR2 or DR6) to structs
3098 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
3099 * can be written only when inject_pending_event runs. This should be
3100 * conditional on a new capability---if the capability is disabled,
3101 * kvm_multiple_exception would write the ancillary information to
3102 * CR2 or DR6, for backwards ABI-compatibility.
3103 */
3104 if (nested_vmx_is_page_fault_vmexit(vmcs12,
3105 vcpu->arch.exception.error_code)) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003106 *exit_qual = vcpu->arch.cr2;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003107 return 1;
3108 }
3109 } else {
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003110 if (vmcs12->exception_bitmap & (1u << nr)) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003111 if (nr == DB_VECTOR)
3112 *exit_qual = vcpu->arch.dr6;
3113 else
3114 *exit_qual = 0;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003115 return 1;
3116 }
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003117 }
3118
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003119 return 0;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003120}
3121
Wanpeng Licaa057a2018-03-12 04:53:03 -07003122static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
3123{
3124 /*
3125 * Ensure that we clear the HLT state in the VMCS. We don't need to
3126 * explicitly skip the instruction because if the HLT state is set,
3127 * then the instruction is already executing and RIP has already been
3128 * advanced.
3129 */
3130 if (kvm_hlt_in_guest(vcpu->kvm) &&
3131 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
3132 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
3133}
3134
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003135static void vmx_queue_exception(struct kvm_vcpu *vcpu)
Avi Kivity298101d2007-11-25 13:41:11 +02003136{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003137 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003138 unsigned nr = vcpu->arch.exception.nr;
3139 bool has_error_code = vcpu->arch.exception.has_error_code;
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003140 u32 error_code = vcpu->arch.exception.error_code;
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003141 u32 intr_info = nr | INTR_INFO_VALID_MASK;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003142
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003143 if (has_error_code) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003144 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003145 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3146 }
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003147
Avi Kivity7ffd92c2009-06-09 14:10:45 +03003148 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05003149 int inc_eip = 0;
3150 if (kvm_exception_is_soft(nr))
3151 inc_eip = vcpu->arch.event_exit_inst_len;
3152 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02003153 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003154 return;
3155 }
3156
Sean Christophersonadd5ff72018-03-23 09:34:00 -07003157 WARN_ON_ONCE(vmx->emulation_required);
3158
Gleb Natapov66fd3f72009-05-11 13:35:50 +03003159 if (kvm_exception_is_soft(nr)) {
3160 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
3161 vmx->vcpu.arch.event_exit_inst_len);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003162 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3163 } else
3164 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3165
3166 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
Wanpeng Licaa057a2018-03-12 04:53:03 -07003167
3168 vmx_clear_hlt(vcpu);
Avi Kivity298101d2007-11-25 13:41:11 +02003169}
3170
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003171static bool vmx_rdtscp_supported(void)
3172{
3173 return cpu_has_vmx_rdtscp();
3174}
3175
Mao, Junjiead756a12012-07-02 01:18:48 +00003176static bool vmx_invpcid_supported(void)
3177{
Junaid Shahideb4b2482018-06-27 14:59:14 -07003178 return cpu_has_vmx_invpcid();
Mao, Junjiead756a12012-07-02 01:18:48 +00003179}
3180
Avi Kivity6aa8b732006-12-10 02:21:36 -08003181/*
Eddie Donga75beee2007-05-17 18:55:15 +03003182 * Swap MSR entry in host/guest MSR entry array.
3183 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003184static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
Eddie Donga75beee2007-05-17 18:55:15 +03003185{
Avi Kivity26bb0982009-09-07 11:14:12 +03003186 struct shared_msr_entry tmp;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003187
3188 tmp = vmx->guest_msrs[to];
3189 vmx->guest_msrs[to] = vmx->guest_msrs[from];
3190 vmx->guest_msrs[from] = tmp;
Eddie Donga75beee2007-05-17 18:55:15 +03003191}
3192
3193/*
Avi Kivitye38aea32007-04-19 13:22:48 +03003194 * Set up the vmcs to automatically save and restore system
3195 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
3196 * mode, as fiddling with msrs is very expensive.
3197 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003198static void setup_msrs(struct vcpu_vmx *vmx)
Avi Kivitye38aea32007-04-19 13:22:48 +03003199{
Avi Kivity26bb0982009-09-07 11:14:12 +03003200 int save_nmsrs, index;
Avi Kivitye38aea32007-04-19 13:22:48 +03003201
Eddie Donga75beee2007-05-17 18:55:15 +03003202 save_nmsrs = 0;
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003203#ifdef CONFIG_X86_64
Rusty Russell8b9cf982007-07-30 16:31:43 +10003204 if (is_long_mode(&vmx->vcpu)) {
Rusty Russell8b9cf982007-07-30 16:31:43 +10003205 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
Eddie Donga75beee2007-05-17 18:55:15 +03003206 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003207 move_msr_up(vmx, index, save_nmsrs++);
3208 index = __find_msr_index(vmx, MSR_LSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003209 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003210 move_msr_up(vmx, index, save_nmsrs++);
3211 index = __find_msr_index(vmx, MSR_CSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003212 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003213 move_msr_up(vmx, index, save_nmsrs++);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003214 index = __find_msr_index(vmx, MSR_TSC_AUX);
Radim Krčmářd6321d42017-08-05 00:12:49 +02003215 if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003216 move_msr_up(vmx, index, save_nmsrs++);
Eddie Donga75beee2007-05-17 18:55:15 +03003217 /*
Brian Gerst8c065852010-07-17 09:03:26 -04003218 * MSR_STAR is only needed on long mode guests, and only
Eddie Donga75beee2007-05-17 18:55:15 +03003219 * if efer.sce is enabled.
3220 */
Brian Gerst8c065852010-07-17 09:03:26 -04003221 index = __find_msr_index(vmx, MSR_STAR);
Avi Kivityf6801df2010-01-21 15:31:50 +02003222 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
Rusty Russell8b9cf982007-07-30 16:31:43 +10003223 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003224 }
Eddie Donga75beee2007-05-17 18:55:15 +03003225#endif
Avi Kivity92c0d902009-10-29 11:00:16 +02003226 index = __find_msr_index(vmx, MSR_EFER);
3227 if (index >= 0 && update_transition_efer(vmx, index))
Avi Kivity26bb0982009-09-07 11:14:12 +03003228 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003229
Avi Kivity26bb0982009-09-07 11:14:12 +03003230 vmx->save_nmsrs = save_nmsrs;
Avi Kivity58972972009-02-24 22:26:47 +02003231
Yang Zhang8d146952013-01-25 10:18:50 +08003232 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01003233 vmx_update_msr_bitmap(&vmx->vcpu);
Avi Kivitye38aea32007-04-19 13:22:48 +03003234}
3235
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003236static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003237{
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003238 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003239
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003240 if (is_guest_mode(vcpu) &&
3241 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
3242 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
3243
3244 return vcpu->arch.tsc_offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003245}
3246
3247/*
Zachary Amsden99e3e302010-08-19 22:07:17 -10003248 * writes 'offset' into guest's timestamp counter offset register
Avi Kivity6aa8b732006-12-10 02:21:36 -08003249 */
Zachary Amsden99e3e302010-08-19 22:07:17 -10003250static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003251{
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003252 if (is_guest_mode(vcpu)) {
Nadav Har'El79918252011-05-25 23:15:39 +03003253 /*
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003254 * We're here if L1 chose not to trap WRMSR to TSC. According
3255 * to the spec, this should set L1's TSC; The offset that L1
3256 * set for L2 remains unchanged, and still needs to be added
3257 * to the newly set TSC to get L2's TSC.
Nadav Har'El79918252011-05-25 23:15:39 +03003258 */
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003259 struct vmcs12 *vmcs12;
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003260 /* recalculate vmcs02.TSC_OFFSET: */
3261 vmcs12 = get_vmcs12(vcpu);
3262 vmcs_write64(TSC_OFFSET, offset +
3263 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
3264 vmcs12->tsc_offset : 0));
3265 } else {
Yoshihiro YUNOMAE489223e2013-06-12 16:43:44 +09003266 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
3267 vmcs_read64(TSC_OFFSET), offset);
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003268 vmcs_write64(TSC_OFFSET, offset);
3269 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003270}
3271
Nadav Har'El801d3422011-05-25 23:02:23 +03003272/*
3273 * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
3274 * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
3275 * all guests if the "nested" module option is off, and can also be disabled
3276 * for a single guest by disabling its VMX cpuid bit.
3277 */
3278static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
3279{
Radim Krčmářd6321d42017-08-05 00:12:49 +02003280 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
Nadav Har'El801d3422011-05-25 23:02:23 +03003281}
3282
Avi Kivity6aa8b732006-12-10 02:21:36 -08003283/*
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003284 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
3285 * returned for the various VMX controls MSRs when nested VMX is enabled.
3286 * The same values should also be used to verify that vmcs12 control fields are
3287 * valid during nested entry from L1 to L2.
3288 * Each of these control msrs has a low and high 32-bit half: A low bit is on
3289 * if the corresponding bit in the (32-bit) control field *must* be on, and a
3290 * bit in the high half is on if the corresponding bit in the control field
3291 * may be on. See also vmx_control_verify().
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003292 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003293static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003294{
Paolo Bonzini13893092018-02-26 13:40:09 +01003295 if (!nested) {
3296 memset(msrs, 0, sizeof(*msrs));
3297 return;
3298 }
3299
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003300 /*
3301 * Note that as a general rule, the high half of the MSRs (bits in
3302 * the control fields which may be 1) should be initialized by the
3303 * intersection of the underlying hardware's MSR (i.e., features which
3304 * can be supported) and the list of features we want to expose -
3305 * because they are known to be properly supported in our code.
3306 * Also, usually, the low half of the MSRs (bits which must be 1) can
3307 * be set to 0, meaning that L1 may turn off any of these bits. The
3308 * reason is that if one of these bits is necessary, it will appear
3309 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
3310 * fields of vmcs01 and vmcs02, will turn these bits off - and
Paolo Bonzini7313c692017-07-27 10:31:25 +02003311 * nested_vmx_exit_reflected() will not pass related exits to L1.
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003312 * These rules have exceptions below.
3313 */
3314
3315 /* pin-based controls */
Jan Kiszkaeabeaac2013-03-13 11:30:50 +01003316 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003317 msrs->pinbased_ctls_low,
3318 msrs->pinbased_ctls_high);
3319 msrs->pinbased_ctls_low |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003320 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003321 msrs->pinbased_ctls_high &=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003322 PIN_BASED_EXT_INTR_MASK |
3323 PIN_BASED_NMI_EXITING |
Paolo Bonzini13893092018-02-26 13:40:09 +01003324 PIN_BASED_VIRTUAL_NMIS |
3325 (apicv ? PIN_BASED_POSTED_INTR : 0);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003326 msrs->pinbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003327 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka0238ea92013-03-13 11:31:24 +01003328 PIN_BASED_VMX_PREEMPTION_TIMER;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003329
Jan Kiszka3dbcd8d2014-06-16 13:59:40 +02003330 /* exit controls */
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003331 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003332 msrs->exit_ctls_low,
3333 msrs->exit_ctls_high);
3334 msrs->exit_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003335 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Bandan Dase0ba1a62014-04-19 18:17:46 -04003336
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003337 msrs->exit_ctls_high &=
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003338#ifdef CONFIG_X86_64
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003339 VM_EXIT_HOST_ADDR_SPACE_SIZE |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003340#endif
Jan Kiszkaf41245002014-03-07 20:03:13 +01003341 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003342 msrs->exit_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003343 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszkaf41245002014-03-07 20:03:13 +01003344 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
Bandan Dase0ba1a62014-04-19 18:17:46 -04003345 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
3346
Paolo Bonzinia87036a2016-03-08 09:52:13 +01003347 if (kvm_mpx_supported())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003348 msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003349
Jan Kiszka2996fca2014-06-16 13:59:43 +02003350 /* We support free control of debug control saving. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003351 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003352
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003353 /* entry controls */
3354 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003355 msrs->entry_ctls_low,
3356 msrs->entry_ctls_high);
3357 msrs->entry_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003358 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003359 msrs->entry_ctls_high &=
Jan Kiszka57435342013-08-06 10:39:56 +02003360#ifdef CONFIG_X86_64
3361 VM_ENTRY_IA32E_MODE |
3362#endif
3363 VM_ENTRY_LOAD_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003364 msrs->entry_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003365 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
Paolo Bonzinia87036a2016-03-08 09:52:13 +01003366 if (kvm_mpx_supported())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003367 msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
Jan Kiszka57435342013-08-06 10:39:56 +02003368
Jan Kiszka2996fca2014-06-16 13:59:43 +02003369 /* We support free control of debug control loading. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003370 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003371
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003372 /* cpu-based controls */
3373 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003374 msrs->procbased_ctls_low,
3375 msrs->procbased_ctls_high);
3376 msrs->procbased_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003377 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003378 msrs->procbased_ctls_high &=
Jan Kiszkaa294c9b2013-10-23 17:43:09 +01003379 CPU_BASED_VIRTUAL_INTR_PENDING |
3380 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003381 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
3382 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
3383 CPU_BASED_CR3_STORE_EXITING |
3384#ifdef CONFIG_X86_64
3385 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
3386#endif
3387 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03003388 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
3389 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
3390 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
3391 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003392 /*
3393 * We can allow some features even when not supported by the
3394 * hardware. For example, L1 can specify an MSR bitmap - and we
3395 * can use it to avoid exits to L1 - even when L0 runs L2
3396 * without MSR bitmaps.
3397 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003398 msrs->procbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003399 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka560b7ee2014-06-16 13:59:42 +02003400 CPU_BASED_USE_MSR_BITMAPS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003401
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003402 /* We support free control of CR3 access interception. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003403 msrs->procbased_ctls_low &=
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003404 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
3405
Paolo Bonzini80154d72017-08-24 13:55:35 +02003406 /*
3407 * secondary cpu-based controls. Do not include those that
3408 * depend on CPUID bits, they are added later by vmx_cpuid_update.
3409 */
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003410 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003411 msrs->secondary_ctls_low,
3412 msrs->secondary_ctls_high);
3413 msrs->secondary_ctls_low = 0;
3414 msrs->secondary_ctls_high &=
Jan Kiszkad6851fb2013-02-23 22:34:39 +01003415 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini1b073042016-10-25 16:06:30 +02003416 SECONDARY_EXEC_DESC |
Wincy Vanf2b93282015-02-03 23:56:03 +08003417 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Wincy Van82f0dd42015-02-03 23:57:18 +08003418 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Wincy Van608406e2015-02-03 23:57:51 +08003419 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini3db13482017-08-24 14:48:03 +02003420 SECONDARY_EXEC_WBINVD_EXITING;
Liran Alon32c7acf2018-06-23 02:35:11 +03003421 /*
3422 * We can emulate "VMCS shadowing," even if the hardware
3423 * doesn't support it.
3424 */
3425 msrs->secondary_ctls_high |=
3426 SECONDARY_EXEC_SHADOW_VMCS;
Jan Kiszkac18911a2013-03-13 16:06:41 +01003427
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02003428 if (enable_ept) {
3429 /* nested EPT: emulate EPT also to L1 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003430 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003431 SECONDARY_EXEC_ENABLE_EPT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003432 msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003433 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
Bandan Das02120c42016-07-12 18:18:52 -04003434 if (cpu_has_vmx_ept_execute_only())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003435 msrs->ept_caps |=
Bandan Das02120c42016-07-12 18:18:52 -04003436 VMX_EPT_EXECUTE_ONLY_BIT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003437 msrs->ept_caps &= vmx_capability.ept;
3438 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003439 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
3440 VMX_EPT_1GB_PAGE_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003441 if (enable_ept_ad_bits) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003442 msrs->secondary_ctls_high |=
Bandan Das03efce62017-05-05 15:25:15 -04003443 SECONDARY_EXEC_ENABLE_PML;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003444 msrs->ept_caps |= VMX_EPT_AD_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003445 }
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003446 }
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02003447
Bandan Das27c42a12017-08-03 15:54:42 -04003448 if (cpu_has_vmx_vmfunc()) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003449 msrs->secondary_ctls_high |=
Bandan Das27c42a12017-08-03 15:54:42 -04003450 SECONDARY_EXEC_ENABLE_VMFUNC;
Bandan Das41ab9372017-08-03 15:54:43 -04003451 /*
3452 * Advertise EPTP switching unconditionally
3453 * since we emulate it
3454 */
Wanpeng Li575b3a22017-10-19 07:00:34 +08003455 if (enable_ept)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003456 msrs->vmfunc_controls =
Wanpeng Li575b3a22017-10-19 07:00:34 +08003457 VMX_VMFUNC_EPTP_SWITCHING;
Bandan Das27c42a12017-08-03 15:54:42 -04003458 }
3459
Paolo Bonzinief697a72016-03-18 16:58:38 +01003460 /*
3461 * Old versions of KVM use the single-context version without
3462 * checking for support, so declare that it is supported even
3463 * though it is treated as global context. The alternative is
3464 * not failing the single-context invvpid, and it is worse.
3465 */
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003466 if (enable_vpid) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003467 msrs->secondary_ctls_high |=
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003468 SECONDARY_EXEC_ENABLE_VPID;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003469 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
Jan Dakinevichbcdde302016-10-28 07:00:30 +03003470 VMX_VPID_EXTENT_SUPPORTED_MASK;
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003471 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07003472
Radim Krčmář0790ec12015-03-17 14:02:32 +01003473 if (enable_unrestricted_guest)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003474 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003475 SECONDARY_EXEC_UNRESTRICTED_GUEST;
3476
Jan Kiszkac18911a2013-03-13 16:06:41 +01003477 /* miscellaneous data */
Wincy Vanb9c237b2015-02-03 23:56:30 +08003478 rdmsr(MSR_IA32_VMX_MISC,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003479 msrs->misc_low,
3480 msrs->misc_high);
3481 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
3482 msrs->misc_low |=
Jim Mattsonf4160e42018-05-29 09:11:33 -07003483 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
Wincy Vanb9c237b2015-02-03 23:56:30 +08003484 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Jan Kiszkaf41245002014-03-07 20:03:13 +01003485 VMX_MISC_ACTIVITY_HLT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003486 msrs->misc_high = 0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003487
3488 /*
3489 * This MSR reports some information about VMX support. We
3490 * should return information about the VMX we emulate for the
3491 * guest, and the VMCS structure we give it - not about the
3492 * VMX support of the underlying hardware.
3493 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003494 msrs->basic =
David Matlack62cc6b9d2016-11-29 18:14:07 -08003495 VMCS12_REVISION |
3496 VMX_BASIC_TRUE_CTLS |
3497 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
3498 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
3499
3500 if (cpu_has_vmx_basic_inout())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003501 msrs->basic |= VMX_BASIC_INOUT;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003502
3503 /*
David Matlack8322ebb2016-11-29 18:14:09 -08003504 * These MSRs specify bits which the guest must keep fixed on
David Matlack62cc6b9d2016-11-29 18:14:07 -08003505 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
3506 * We picked the standard core2 setting.
3507 */
3508#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
3509#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003510 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
3511 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
David Matlack8322ebb2016-11-29 18:14:09 -08003512
3513 /* These MSRs specify bits which the guest must keep fixed off. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003514 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
3515 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003516
3517 /* highest index: VMX_PREEMPTION_TIMER_VALUE */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003518 msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003519}
3520
David Matlack38991522016-11-29 18:14:08 -08003521/*
3522 * if fixed0[i] == 1: val[i] must be 1
3523 * if fixed1[i] == 0: val[i] must be 0
3524 */
3525static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
3526{
3527 return ((val & fixed1) | fixed0) == val;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003528}
3529
3530static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
3531{
David Matlack38991522016-11-29 18:14:08 -08003532 return fixed_bits_valid(control, low, high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003533}
3534
3535static inline u64 vmx_control_msr(u32 low, u32 high)
3536{
3537 return low | ((u64)high << 32);
3538}
3539
David Matlack62cc6b9d2016-11-29 18:14:07 -08003540static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
3541{
3542 superset &= mask;
3543 subset &= mask;
3544
3545 return (superset | subset) == superset;
3546}
3547
3548static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
3549{
3550 const u64 feature_and_reserved =
3551 /* feature (except bit 48; see below) */
3552 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
3553 /* reserved */
3554 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003555 u64 vmx_basic = vmx->nested.msrs.basic;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003556
3557 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
3558 return -EINVAL;
3559
3560 /*
3561 * KVM does not emulate a version of VMX that constrains physical
3562 * addresses of VMX structures (e.g. VMCS) to 32-bits.
3563 */
3564 if (data & BIT_ULL(48))
3565 return -EINVAL;
3566
3567 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
3568 vmx_basic_vmcs_revision_id(data))
3569 return -EINVAL;
3570
3571 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
3572 return -EINVAL;
3573
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003574 vmx->nested.msrs.basic = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003575 return 0;
3576}
3577
3578static int
3579vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3580{
3581 u64 supported;
3582 u32 *lowp, *highp;
3583
3584 switch (msr_index) {
3585 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003586 lowp = &vmx->nested.msrs.pinbased_ctls_low;
3587 highp = &vmx->nested.msrs.pinbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003588 break;
3589 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003590 lowp = &vmx->nested.msrs.procbased_ctls_low;
3591 highp = &vmx->nested.msrs.procbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003592 break;
3593 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003594 lowp = &vmx->nested.msrs.exit_ctls_low;
3595 highp = &vmx->nested.msrs.exit_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003596 break;
3597 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003598 lowp = &vmx->nested.msrs.entry_ctls_low;
3599 highp = &vmx->nested.msrs.entry_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003600 break;
3601 case MSR_IA32_VMX_PROCBASED_CTLS2:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003602 lowp = &vmx->nested.msrs.secondary_ctls_low;
3603 highp = &vmx->nested.msrs.secondary_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003604 break;
3605 default:
3606 BUG();
3607 }
3608
3609 supported = vmx_control_msr(*lowp, *highp);
3610
3611 /* Check must-be-1 bits are still 1. */
3612 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
3613 return -EINVAL;
3614
3615 /* Check must-be-0 bits are still 0. */
3616 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
3617 return -EINVAL;
3618
3619 *lowp = data;
3620 *highp = data >> 32;
3621 return 0;
3622}
3623
3624static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
3625{
3626 const u64 feature_and_reserved_bits =
3627 /* feature */
3628 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
3629 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
3630 /* reserved */
3631 GENMASK_ULL(13, 9) | BIT_ULL(31);
3632 u64 vmx_misc;
3633
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003634 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
3635 vmx->nested.msrs.misc_high);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003636
3637 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
3638 return -EINVAL;
3639
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003640 if ((vmx->nested.msrs.pinbased_ctls_high &
David Matlack62cc6b9d2016-11-29 18:14:07 -08003641 PIN_BASED_VMX_PREEMPTION_TIMER) &&
3642 vmx_misc_preemption_timer_rate(data) !=
3643 vmx_misc_preemption_timer_rate(vmx_misc))
3644 return -EINVAL;
3645
3646 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
3647 return -EINVAL;
3648
3649 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
3650 return -EINVAL;
3651
3652 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
3653 return -EINVAL;
3654
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003655 vmx->nested.msrs.misc_low = data;
3656 vmx->nested.msrs.misc_high = data >> 32;
Jim Mattsonf4160e42018-05-29 09:11:33 -07003657
3658 /*
3659 * If L1 has read-only VM-exit information fields, use the
3660 * less permissive vmx_vmwrite_bitmap to specify write
3661 * permissions for the shadow VMCS.
3662 */
3663 if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
3664 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
3665
David Matlack62cc6b9d2016-11-29 18:14:07 -08003666 return 0;
3667}
3668
3669static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
3670{
3671 u64 vmx_ept_vpid_cap;
3672
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003673 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
3674 vmx->nested.msrs.vpid_caps);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003675
3676 /* Every bit is either reserved or a feature bit. */
3677 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
3678 return -EINVAL;
3679
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003680 vmx->nested.msrs.ept_caps = data;
3681 vmx->nested.msrs.vpid_caps = data >> 32;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003682 return 0;
3683}
3684
3685static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3686{
3687 u64 *msr;
3688
3689 switch (msr_index) {
3690 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003691 msr = &vmx->nested.msrs.cr0_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003692 break;
3693 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003694 msr = &vmx->nested.msrs.cr4_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003695 break;
3696 default:
3697 BUG();
3698 }
3699
3700 /*
3701 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
3702 * must be 1 in the restored value.
3703 */
3704 if (!is_bitwise_subset(data, *msr, -1ULL))
3705 return -EINVAL;
3706
3707 *msr = data;
3708 return 0;
3709}
3710
3711/*
3712 * Called when userspace is restoring VMX MSRs.
3713 *
3714 * Returns 0 on success, non-0 otherwise.
3715 */
3716static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
3717{
3718 struct vcpu_vmx *vmx = to_vmx(vcpu);
3719
Jim Mattsona943ac52018-05-29 09:11:32 -07003720 /*
3721 * Don't allow changes to the VMX capability MSRs while the vCPU
3722 * is in VMX operation.
3723 */
3724 if (vmx->nested.vmxon)
3725 return -EBUSY;
3726
David Matlack62cc6b9d2016-11-29 18:14:07 -08003727 switch (msr_index) {
3728 case MSR_IA32_VMX_BASIC:
3729 return vmx_restore_vmx_basic(vmx, data);
3730 case MSR_IA32_VMX_PINBASED_CTLS:
3731 case MSR_IA32_VMX_PROCBASED_CTLS:
3732 case MSR_IA32_VMX_EXIT_CTLS:
3733 case MSR_IA32_VMX_ENTRY_CTLS:
3734 /*
3735 * The "non-true" VMX capability MSRs are generated from the
3736 * "true" MSRs, so we do not support restoring them directly.
3737 *
3738 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
3739 * should restore the "true" MSRs with the must-be-1 bits
3740 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3741 * DEFAULT SETTINGS".
3742 */
3743 return -EINVAL;
3744 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3745 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3746 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3747 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3748 case MSR_IA32_VMX_PROCBASED_CTLS2:
3749 return vmx_restore_control_msr(vmx, msr_index, data);
3750 case MSR_IA32_VMX_MISC:
3751 return vmx_restore_vmx_misc(vmx, data);
3752 case MSR_IA32_VMX_CR0_FIXED0:
3753 case MSR_IA32_VMX_CR4_FIXED0:
3754 return vmx_restore_fixed0_msr(vmx, msr_index, data);
3755 case MSR_IA32_VMX_CR0_FIXED1:
3756 case MSR_IA32_VMX_CR4_FIXED1:
3757 /*
3758 * These MSRs are generated based on the vCPU's CPUID, so we
3759 * do not support restoring them directly.
3760 */
3761 return -EINVAL;
3762 case MSR_IA32_VMX_EPT_VPID_CAP:
3763 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
3764 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003765 vmx->nested.msrs.vmcs_enum = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003766 return 0;
3767 default:
3768 /*
3769 * The rest of the VMX capability MSRs do not support restore.
3770 */
3771 return -EINVAL;
3772 }
3773}
3774
Jan Kiszkacae50132014-01-04 18:47:22 +01003775/* Returns 0 on success, non-0 otherwise. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003776static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003777{
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003778 switch (msr_index) {
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003779 case MSR_IA32_VMX_BASIC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003780 *pdata = msrs->basic;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003781 break;
3782 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3783 case MSR_IA32_VMX_PINBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003784 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003785 msrs->pinbased_ctls_low,
3786 msrs->pinbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003787 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
3788 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003789 break;
3790 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3791 case MSR_IA32_VMX_PROCBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003792 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003793 msrs->procbased_ctls_low,
3794 msrs->procbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003795 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
3796 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003797 break;
3798 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3799 case MSR_IA32_VMX_EXIT_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003800 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003801 msrs->exit_ctls_low,
3802 msrs->exit_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003803 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
3804 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003805 break;
3806 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3807 case MSR_IA32_VMX_ENTRY_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003808 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003809 msrs->entry_ctls_low,
3810 msrs->entry_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003811 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
3812 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003813 break;
3814 case MSR_IA32_VMX_MISC:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003815 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003816 msrs->misc_low,
3817 msrs->misc_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003818 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003819 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003820 *pdata = msrs->cr0_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003821 break;
3822 case MSR_IA32_VMX_CR0_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003823 *pdata = msrs->cr0_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003824 break;
3825 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003826 *pdata = msrs->cr4_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003827 break;
3828 case MSR_IA32_VMX_CR4_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003829 *pdata = msrs->cr4_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003830 break;
3831 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003832 *pdata = msrs->vmcs_enum;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003833 break;
3834 case MSR_IA32_VMX_PROCBASED_CTLS2:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003835 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003836 msrs->secondary_ctls_low,
3837 msrs->secondary_ctls_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003838 break;
3839 case MSR_IA32_VMX_EPT_VPID_CAP:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003840 *pdata = msrs->ept_caps |
3841 ((u64)msrs->vpid_caps << 32);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003842 break;
Bandan Das27c42a12017-08-03 15:54:42 -04003843 case MSR_IA32_VMX_VMFUNC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003844 *pdata = msrs->vmfunc_controls;
Bandan Das27c42a12017-08-03 15:54:42 -04003845 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003846 default:
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003847 return 1;
Nadav Har'Elb3897a42013-07-08 19:12:35 +08003848 }
3849
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003850 return 0;
3851}
3852
Haozhong Zhang37e4c992016-06-22 14:59:55 +08003853static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
3854 uint64_t val)
3855{
3856 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
3857
3858 return !(val & ~valid_bits);
3859}
3860
Tom Lendacky801e4592018-02-21 13:39:51 -06003861static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
3862{
Paolo Bonzini13893092018-02-26 13:40:09 +01003863 switch (msr->index) {
3864 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3865 if (!nested)
3866 return 1;
3867 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
3868 default:
3869 return 1;
3870 }
3871
3872 return 0;
Tom Lendacky801e4592018-02-21 13:39:51 -06003873}
3874
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003875/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08003876 * Reads an msr value (of 'msr_index') into 'pdata'.
3877 * Returns 0 on success, non-0 otherwise.
3878 * Assumes vcpu_load() was already called.
3879 */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003880static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003881{
Borislav Petkova6cb0992017-12-20 12:50:28 +01003882 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03003883 struct shared_msr_entry *msr;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003884
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003885 switch (msr_info->index) {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08003886#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08003887 case MSR_FS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003888 msr_info->data = vmcs_readl(GUEST_FS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003889 break;
3890 case MSR_GS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003891 msr_info->data = vmcs_readl(GUEST_GS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003892 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03003893 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07003894 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
Avi Kivity44ea2b12009-09-06 15:55:37 +03003895 break;
Avi Kivity26bb0982009-09-07 11:14:12 +03003896#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -08003897 case MSR_EFER:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003898 return kvm_get_msr_common(vcpu, msr_info);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01003899 case MSR_IA32_SPEC_CTRL:
3900 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01003901 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3902 return 1;
3903
3904 msr_info->data = to_vmx(vcpu)->spec_ctrl;
3905 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01003906 case MSR_IA32_ARCH_CAPABILITIES:
3907 if (!msr_info->host_initiated &&
3908 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3909 return 1;
3910 msr_info->data = to_vmx(vcpu)->arch_capabilities;
3911 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003912 case MSR_IA32_SYSENTER_CS:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003913 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003914 break;
3915 case MSR_IA32_SYSENTER_EIP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003916 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003917 break;
3918 case MSR_IA32_SYSENTER_ESP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003919 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003920 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00003921 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08003922 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02003923 (!msr_info->host_initiated &&
3924 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01003925 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003926 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00003927 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08003928 case MSR_IA32_MCG_EXT_CTL:
3929 if (!msr_info->host_initiated &&
Borislav Petkova6cb0992017-12-20 12:50:28 +01003930 !(vmx->msr_ia32_feature_control &
Ashok Rajc45dcc72016-06-22 14:59:56 +08003931 FEATURE_CONTROL_LMCE))
Jan Kiszkacae50132014-01-04 18:47:22 +01003932 return 1;
Ashok Rajc45dcc72016-06-22 14:59:56 +08003933 msr_info->data = vcpu->arch.mcg_ext_ctl;
3934 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01003935 case MSR_IA32_FEATURE_CONTROL:
Borislav Petkova6cb0992017-12-20 12:50:28 +01003936 msr_info->data = vmx->msr_ia32_feature_control;
Jan Kiszkacae50132014-01-04 18:47:22 +01003937 break;
3938 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3939 if (!nested_vmx_allowed(vcpu))
3940 return 1;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003941 return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
3942 &msr_info->data);
Wanpeng Li20300092014-12-02 19:14:59 +08003943 case MSR_IA32_XSS:
3944 if (!vmx_xsaves_supported())
3945 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003946 msr_info->data = vcpu->arch.ia32_xss;
Wanpeng Li20300092014-12-02 19:14:59 +08003947 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003948 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02003949 if (!msr_info->host_initiated &&
3950 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003951 return 1;
3952 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003953 default:
Borislav Petkova6cb0992017-12-20 12:50:28 +01003954 msr = find_msr_entry(vmx, msr_info->index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08003955 if (msr) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003956 msr_info->data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08003957 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003958 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003959 return kvm_get_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003960 }
3961
Avi Kivity6aa8b732006-12-10 02:21:36 -08003962 return 0;
3963}
3964
Jan Kiszkacae50132014-01-04 18:47:22 +01003965static void vmx_leave_nested(struct kvm_vcpu *vcpu);
3966
Avi Kivity6aa8b732006-12-10 02:21:36 -08003967/*
3968 * Writes msr value into into the appropriate "register".
3969 * Returns 0 on success, non-0 otherwise.
3970 * Assumes vcpu_load() was already called.
3971 */
Will Auld8fe8ab42012-11-29 12:42:12 -08003972static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003973{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003974 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03003975 struct shared_msr_entry *msr;
Eddie Dong2cc51562007-05-21 07:28:09 +03003976 int ret = 0;
Will Auld8fe8ab42012-11-29 12:42:12 -08003977 u32 msr_index = msr_info->index;
3978 u64 data = msr_info->data;
Eddie Dong2cc51562007-05-21 07:28:09 +03003979
Avi Kivity6aa8b732006-12-10 02:21:36 -08003980 switch (msr_index) {
Avi Kivity3bab1f52006-12-29 16:49:48 -08003981 case MSR_EFER:
Will Auld8fe8ab42012-11-29 12:42:12 -08003982 ret = kvm_set_msr_common(vcpu, msr_info);
Eddie Dong2cc51562007-05-21 07:28:09 +03003983 break;
Avi Kivity16175a72009-03-23 22:13:44 +02003984#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08003985 case MSR_FS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03003986 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003987 vmcs_writel(GUEST_FS_BASE, data);
3988 break;
3989 case MSR_GS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03003990 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003991 vmcs_writel(GUEST_GS_BASE, data);
3992 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03003993 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07003994 vmx_write_guest_kernel_gs_base(vmx, data);
Avi Kivity44ea2b12009-09-06 15:55:37 +03003995 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003996#endif
3997 case MSR_IA32_SYSENTER_CS:
3998 vmcs_write32(GUEST_SYSENTER_CS, data);
3999 break;
4000 case MSR_IA32_SYSENTER_EIP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004001 vmcs_writel(GUEST_SYSENTER_EIP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004002 break;
4003 case MSR_IA32_SYSENTER_ESP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004004 vmcs_writel(GUEST_SYSENTER_ESP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004005 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004006 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08004007 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02004008 (!msr_info->host_initiated &&
4009 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01004010 return 1;
Yu Zhangfd8cb432017-08-24 20:27:56 +08004011 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
Jim Mattson45316622017-05-23 11:52:54 -07004012 (data & MSR_IA32_BNDCFGS_RSVD))
Avi Kivity6aa8b732006-12-10 02:21:36 -08004013 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004014 vmcs_write64(GUEST_BNDCFGS, data);
4015 break;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004016 case MSR_IA32_SPEC_CTRL:
4017 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004018 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4019 return 1;
4020
4021 /* The STIBP bit doesn't fault even if it's not advertised */
Konrad Rzeszutek Wilk9f65fb22018-05-09 21:41:38 +02004022 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004023 return 1;
4024
4025 vmx->spec_ctrl = data;
4026
4027 if (!data)
4028 break;
4029
4030 /*
4031 * For non-nested:
4032 * When it's written (to non-zero) for the first time, pass
4033 * it through.
4034 *
4035 * For nested:
4036 * The handling of the MSR bitmap for L2 guests is done in
4037 * nested_vmx_merge_msr_bitmap. We should not touch the
4038 * vmcs02.msr_bitmap here since it gets completely overwritten
4039 * in the merging. We update the vmcs01 here for L1 as well
4040 * since it will end up touching the MSR anyway now.
4041 */
4042 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
4043 MSR_IA32_SPEC_CTRL,
4044 MSR_TYPE_RW);
4045 break;
Ashok Raj15d45072018-02-01 22:59:43 +01004046 case MSR_IA32_PRED_CMD:
4047 if (!msr_info->host_initiated &&
Ashok Raj15d45072018-02-01 22:59:43 +01004048 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4049 return 1;
4050
4051 if (data & ~PRED_CMD_IBPB)
4052 return 1;
4053
4054 if (!data)
4055 break;
4056
4057 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4058
4059 /*
4060 * For non-nested:
4061 * When it's written (to non-zero) for the first time, pass
4062 * it through.
4063 *
4064 * For nested:
4065 * The handling of the MSR bitmap for L2 guests is done in
4066 * nested_vmx_merge_msr_bitmap. We should not touch the
4067 * vmcs02.msr_bitmap here since it gets completely overwritten
4068 * in the merging.
4069 */
4070 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
4071 MSR_TYPE_W);
4072 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01004073 case MSR_IA32_ARCH_CAPABILITIES:
4074 if (!msr_info->host_initiated)
4075 return 1;
4076 vmx->arch_capabilities = data;
4077 break;
Sheng Yang468d4722008-10-09 16:01:55 +08004078 case MSR_IA32_CR_PAT:
4079 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Amit45666542014-09-18 22:39:44 +03004080 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4081 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004082 vmcs_write64(GUEST_IA32_PAT, data);
4083 vcpu->arch.pat = data;
4084 break;
4085 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004086 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004087 break;
Will Auldba904632012-11-29 12:42:50 -08004088 case MSR_IA32_TSC_ADJUST:
4089 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004090 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004091 case MSR_IA32_MCG_EXT_CTL:
4092 if ((!msr_info->host_initiated &&
4093 !(to_vmx(vcpu)->msr_ia32_feature_control &
4094 FEATURE_CONTROL_LMCE)) ||
4095 (data & ~MCG_EXT_CTL_LMCE_EN))
4096 return 1;
4097 vcpu->arch.mcg_ext_ctl = data;
4098 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01004099 case MSR_IA32_FEATURE_CONTROL:
Haozhong Zhang37e4c992016-06-22 14:59:55 +08004100 if (!vmx_feature_control_msr_valid(vcpu, data) ||
Haozhong Zhang3b840802016-06-22 14:59:54 +08004101 (to_vmx(vcpu)->msr_ia32_feature_control &
Jan Kiszkacae50132014-01-04 18:47:22 +01004102 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
4103 return 1;
Haozhong Zhang3b840802016-06-22 14:59:54 +08004104 vmx->msr_ia32_feature_control = data;
Jan Kiszkacae50132014-01-04 18:47:22 +01004105 if (msr_info->host_initiated && data == 0)
4106 vmx_leave_nested(vcpu);
4107 break;
4108 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
David Matlack62cc6b9d2016-11-29 18:14:07 -08004109 if (!msr_info->host_initiated)
4110 return 1; /* they are read-only */
4111 if (!nested_vmx_allowed(vcpu))
4112 return 1;
4113 return vmx_set_vmx_msr(vcpu, msr_index, data);
Wanpeng Li20300092014-12-02 19:14:59 +08004114 case MSR_IA32_XSS:
4115 if (!vmx_xsaves_supported())
4116 return 1;
4117 /*
4118 * The only supported bit as of Skylake is bit 8, but
4119 * it is not supported on KVM.
4120 */
4121 if (data != 0)
4122 return 1;
4123 vcpu->arch.ia32_xss = data;
4124 if (vcpu->arch.ia32_xss != host_xss)
4125 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
4126 vcpu->arch.ia32_xss, host_xss);
4127 else
4128 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
4129 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004130 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02004131 if (!msr_info->host_initiated &&
4132 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004133 return 1;
4134 /* Check reserved bit, higher 32 bits should be zero */
4135 if ((data >> 32) != 0)
4136 return 1;
4137 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08004138 default:
Rusty Russell8b9cf982007-07-30 16:31:43 +10004139 msr = find_msr_entry(vmx, msr_index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08004140 if (msr) {
Andy Honig8b3c3102014-08-27 11:16:44 -07004141 u64 old_msr_data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08004142 msr->data = data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004143 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
4144 preempt_disable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004145 ret = kvm_set_shared_msr(msr->index, msr->data,
4146 msr->mask);
Avi Kivity2225fd52012-04-18 15:03:04 +03004147 preempt_enable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004148 if (ret)
4149 msr->data = old_msr_data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004150 }
Avi Kivity3bab1f52006-12-29 16:49:48 -08004151 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004152 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004153 ret = kvm_set_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004154 }
4155
Eddie Dong2cc51562007-05-21 07:28:09 +03004156 return ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004157}
4158
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004159static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004160{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004161 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
4162 switch (reg) {
4163 case VCPU_REGS_RSP:
4164 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
4165 break;
4166 case VCPU_REGS_RIP:
4167 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
4168 break;
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004169 case VCPU_EXREG_PDPTR:
4170 if (enable_ept)
4171 ept_save_pdptrs(vcpu);
4172 break;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004173 default:
4174 break;
4175 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004176}
4177
Avi Kivity6aa8b732006-12-10 02:21:36 -08004178static __init int cpu_has_kvm_support(void)
4179{
Eduardo Habkost6210e372008-11-17 19:03:16 -02004180 return cpu_has_vmx();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004181}
4182
4183static __init int vmx_disabled_by_bios(void)
4184{
4185 u64 msr;
4186
4187 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
Shane Wangcafd6652010-04-29 12:09:01 -04004188 if (msr & FEATURE_CONTROL_LOCKED) {
Joseph Cihula23f3e992011-02-08 11:45:56 -08004189 /* launched w/ TXT and VMX disabled */
Shane Wangcafd6652010-04-29 12:09:01 -04004190 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
4191 && tboot_enabled())
4192 return 1;
Joseph Cihula23f3e992011-02-08 11:45:56 -08004193 /* launched w/o TXT and VMX only enabled w/ TXT */
Shane Wangcafd6652010-04-29 12:09:01 -04004194 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
Joseph Cihula23f3e992011-02-08 11:45:56 -08004195 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
Shane Wangf9335af2010-11-17 11:40:17 +08004196 && !tboot_enabled()) {
4197 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
Joseph Cihula23f3e992011-02-08 11:45:56 -08004198 "activate TXT before enabling KVM\n");
Shane Wangcafd6652010-04-29 12:09:01 -04004199 return 1;
Shane Wangf9335af2010-11-17 11:40:17 +08004200 }
Joseph Cihula23f3e992011-02-08 11:45:56 -08004201 /* launched w/o TXT and VMX disabled */
4202 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
4203 && !tboot_enabled())
4204 return 1;
Shane Wangcafd6652010-04-29 12:09:01 -04004205 }
4206
4207 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004208}
4209
Dongxiao Xu7725b892010-05-11 18:29:38 +08004210static void kvm_cpu_vmxon(u64 addr)
4211{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004212 cr4_set_bits(X86_CR4_VMXE);
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004213 intel_pt_handle_vmx(1);
4214
Dongxiao Xu7725b892010-05-11 18:29:38 +08004215 asm volatile (ASM_VMX_VMXON_RAX
4216 : : "a"(&addr), "m"(addr)
4217 : "memory", "cc");
4218}
4219
Radim Krčmář13a34e02014-08-28 15:13:03 +02004220static int hardware_enable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004221{
4222 int cpu = raw_smp_processor_id();
4223 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
Shane Wangcafd6652010-04-29 12:09:01 -04004224 u64 old, test_bits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004225
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07004226 if (cr4_read_shadow() & X86_CR4_VMXE)
Alexander Graf10474ae2009-09-15 11:37:46 +02004227 return -EBUSY;
4228
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004229 /*
4230 * This can happen if we hot-added a CPU but failed to allocate
4231 * VP assist page for it.
4232 */
4233 if (static_branch_unlikely(&enable_evmcs) &&
4234 !hv_get_vp_assist_page(cpu))
4235 return -EFAULT;
4236
Nadav Har'Eld462b812011-05-24 15:26:10 +03004237 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
Feng Wubf9f6ac2015-09-18 22:29:55 +08004238 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
4239 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08004240
4241 /*
4242 * Now we can enable the vmclear operation in kdump
4243 * since the loaded_vmcss_on_cpu list on this cpu
4244 * has been initialized.
4245 *
4246 * Though the cpu is not in VMX operation now, there
4247 * is no problem to enable the vmclear operation
4248 * for the loaded_vmcss_on_cpu list is empty!
4249 */
4250 crash_enable_local_vmclear(cpu);
4251
Avi Kivity6aa8b732006-12-10 02:21:36 -08004252 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
Shane Wangcafd6652010-04-29 12:09:01 -04004253
4254 test_bits = FEATURE_CONTROL_LOCKED;
4255 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
4256 if (tboot_enabled())
4257 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
4258
4259 if ((old & test_bits) != test_bits) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004260 /* enable and lock */
Shane Wangcafd6652010-04-29 12:09:01 -04004261 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
4262 }
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004263 kvm_cpu_vmxon(phys_addr);
David Hildenbrandfdf288b2017-08-24 20:51:29 +02004264 if (enable_ept)
4265 ept_sync_global();
Alexander Graf10474ae2009-09-15 11:37:46 +02004266
4267 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004268}
4269
Nadav Har'Eld462b812011-05-24 15:26:10 +03004270static void vmclear_local_loaded_vmcss(void)
Avi Kivity543e4242008-05-13 16:22:47 +03004271{
4272 int cpu = raw_smp_processor_id();
Nadav Har'Eld462b812011-05-24 15:26:10 +03004273 struct loaded_vmcs *v, *n;
Avi Kivity543e4242008-05-13 16:22:47 +03004274
Nadav Har'Eld462b812011-05-24 15:26:10 +03004275 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
4276 loaded_vmcss_on_cpu_link)
4277 __loaded_vmcs_clear(v);
Avi Kivity543e4242008-05-13 16:22:47 +03004278}
4279
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004280
4281/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
4282 * tricks.
4283 */
4284static void kvm_cpu_vmxoff(void)
4285{
4286 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004287
4288 intel_pt_handle_vmx(0);
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004289 cr4_clear_bits(X86_CR4_VMXE);
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004290}
4291
Radim Krčmář13a34e02014-08-28 15:13:03 +02004292static void hardware_disable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004293{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004294 vmclear_local_loaded_vmcss();
4295 kvm_cpu_vmxoff();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004296}
4297
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004298static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
Mike Dayd77c26f2007-10-08 09:02:08 -04004299 u32 msr, u32 *result)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004300{
4301 u32 vmx_msr_low, vmx_msr_high;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004302 u32 ctl = ctl_min | ctl_opt;
4303
4304 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4305
4306 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
4307 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
4308
4309 /* Ensure minimum (required) set of control bits are supported. */
4310 if (ctl_min & ~ctl)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004311 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004312
4313 *result = ctl;
4314 return 0;
4315}
4316
Avi Kivity110312c2010-12-21 12:54:20 +02004317static __init bool allow_1_setting(u32 msr, u32 ctl)
4318{
4319 u32 vmx_msr_low, vmx_msr_high;
4320
4321 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4322 return vmx_msr_high & ctl;
4323}
4324
Yang, Sheng002c7f72007-07-31 14:23:01 +03004325static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004326{
4327 u32 vmx_msr_low, vmx_msr_high;
Sheng Yangd56f5462008-04-25 10:13:16 +08004328 u32 min, opt, min2, opt2;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004329 u32 _pin_based_exec_control = 0;
4330 u32 _cpu_based_exec_control = 0;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004331 u32 _cpu_based_2nd_exec_control = 0;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004332 u32 _vmexit_control = 0;
4333 u32 _vmentry_control = 0;
4334
Paolo Bonzini13893092018-02-26 13:40:09 +01004335 memset(vmcs_conf, 0, sizeof(*vmcs_conf));
Raghavendra K T10166742012-02-07 23:19:20 +05304336 min = CPU_BASED_HLT_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004337#ifdef CONFIG_X86_64
4338 CPU_BASED_CR8_LOAD_EXITING |
4339 CPU_BASED_CR8_STORE_EXITING |
4340#endif
Sheng Yangd56f5462008-04-25 10:13:16 +08004341 CPU_BASED_CR3_LOAD_EXITING |
4342 CPU_BASED_CR3_STORE_EXITING |
Quan Xu8eb73e22017-12-12 16:44:21 +08004343 CPU_BASED_UNCOND_IO_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004344 CPU_BASED_MOV_DR_EXITING |
Marcelo Tosattia7052892008-09-23 13:18:35 -03004345 CPU_BASED_USE_TSC_OFFSETING |
Wanpeng Li4d5422c2018-03-12 04:53:02 -07004346 CPU_BASED_MWAIT_EXITING |
4347 CPU_BASED_MONITOR_EXITING |
Avi Kivityfee84b02011-11-10 14:57:25 +02004348 CPU_BASED_INVLPG_EXITING |
4349 CPU_BASED_RDPMC_EXITING;
Anthony Liguori443381a2010-12-06 10:53:38 -06004350
Sheng Yangf78e0e22007-10-29 09:40:42 +08004351 opt = CPU_BASED_TPR_SHADOW |
Sheng Yang25c5f222008-03-28 13:18:56 +08004352 CPU_BASED_USE_MSR_BITMAPS |
Sheng Yangf78e0e22007-10-29 09:40:42 +08004353 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004354 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
4355 &_cpu_based_exec_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004356 return -EIO;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08004357#ifdef CONFIG_X86_64
4358 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4359 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
4360 ~CPU_BASED_CR8_STORE_EXITING;
4361#endif
Sheng Yangf78e0e22007-10-29 09:40:42 +08004362 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
Sheng Yangd56f5462008-04-25 10:13:16 +08004363 min2 = 0;
4364 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Yang Zhang8d146952013-01-25 10:18:50 +08004365 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sheng Yang2384d2b2008-01-17 15:14:33 +08004366 SECONDARY_EXEC_WBINVD_EXITING |
Sheng Yangd56f5462008-04-25 10:13:16 +08004367 SECONDARY_EXEC_ENABLE_VPID |
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004368 SECONDARY_EXEC_ENABLE_EPT |
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08004369 SECONDARY_EXEC_UNRESTRICTED_GUEST |
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004370 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
Paolo Bonzini0367f202016-07-12 10:44:55 +02004371 SECONDARY_EXEC_DESC |
Mao, Junjiead756a12012-07-02 01:18:48 +00004372 SECONDARY_EXEC_RDTSCP |
Yang Zhang83d4c282013-01-25 10:18:49 +08004373 SECONDARY_EXEC_ENABLE_INVPCID |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004374 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Abel Gordonabc4fc52013-04-18 14:35:25 +03004375 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Wanpeng Li20300092014-12-02 19:14:59 +08004376 SECONDARY_EXEC_SHADOW_VMCS |
Kai Huang843e4332015-01-28 10:54:28 +08004377 SECONDARY_EXEC_XSAVES |
David Hildenbrand736fdf72017-08-24 20:51:37 +02004378 SECONDARY_EXEC_RDSEED_EXITING |
4379 SECONDARY_EXEC_RDRAND_EXITING |
Xiao Guangrong8b3e34e2015-09-09 14:05:51 +08004380 SECONDARY_EXEC_ENABLE_PML |
Bandan Das2a499e42017-08-03 15:54:41 -04004381 SECONDARY_EXEC_TSC_SCALING |
4382 SECONDARY_EXEC_ENABLE_VMFUNC;
Sheng Yangd56f5462008-04-25 10:13:16 +08004383 if (adjust_vmx_controls(min2, opt2,
4384 MSR_IA32_VMX_PROCBASED_CTLS2,
Sheng Yangf78e0e22007-10-29 09:40:42 +08004385 &_cpu_based_2nd_exec_control) < 0)
4386 return -EIO;
4387 }
4388#ifndef CONFIG_X86_64
4389 if (!(_cpu_based_2nd_exec_control &
4390 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
4391 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
4392#endif
Yang Zhang83d4c282013-01-25 10:18:49 +08004393
4394 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4395 _cpu_based_2nd_exec_control &= ~(
Yang Zhang8d146952013-01-25 10:18:50 +08004396 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004397 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
4398 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang83d4c282013-01-25 10:18:49 +08004399
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004400 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
4401 &vmx_capability.ept, &vmx_capability.vpid);
4402
Sheng Yangd56f5462008-04-25 10:13:16 +08004403 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
Marcelo Tosattia7052892008-09-23 13:18:35 -03004404 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
4405 enabled */
Gleb Natapov5fff7d22009-08-27 18:41:30 +03004406 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
4407 CPU_BASED_CR3_STORE_EXITING |
4408 CPU_BASED_INVLPG_EXITING);
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004409 } else if (vmx_capability.ept) {
4410 vmx_capability.ept = 0;
4411 pr_warn_once("EPT CAP should not exist if not support "
4412 "1-setting enable EPT VM-execution control\n");
4413 }
4414 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
4415 vmx_capability.vpid) {
4416 vmx_capability.vpid = 0;
4417 pr_warn_once("VPID CAP should not exist if not support "
4418 "1-setting enable VPID VM-execution control\n");
Sheng Yangd56f5462008-04-25 10:13:16 +08004419 }
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004420
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004421 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004422#ifdef CONFIG_X86_64
4423 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
4424#endif
Yang Zhanga547c6d2013-04-11 19:25:10 +08004425 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004426 VM_EXIT_CLEAR_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004427 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
4428 &_vmexit_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004429 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004430
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01004431 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
4432 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
4433 PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004434 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
4435 &_pin_based_exec_control) < 0)
4436 return -EIO;
4437
Paolo Bonzini1c17c3e2016-07-08 11:53:38 +02004438 if (cpu_has_broken_vmx_preemption_timer())
4439 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004440 if (!(_cpu_based_2nd_exec_control &
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004441 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
Yang Zhang01e439b2013-04-11 19:25:12 +08004442 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
4443
Paolo Bonzinic845f9c2014-02-21 10:55:44 +01004444 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
Liu, Jinsongda8999d2014-02-24 10:55:46 +00004445 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004446 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
4447 &_vmentry_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004448 return -EIO;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004449
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004450 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004451
4452 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
4453 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004454 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004455
4456#ifdef CONFIG_X86_64
4457 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
4458 if (vmx_msr_high & (1u<<16))
Yang, Sheng002c7f72007-07-31 14:23:01 +03004459 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004460#endif
4461
4462 /* Require Write-Back (WB) memory type for VMCS accesses. */
4463 if (((vmx_msr_high >> 18) & 15) != 6)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004464 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004465
Yang, Sheng002c7f72007-07-31 14:23:01 +03004466 vmcs_conf->size = vmx_msr_high & 0x1fff;
Paolo Bonzini16cb0252016-09-05 15:57:00 +02004467 vmcs_conf->order = get_order(vmcs_conf->size);
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03004468 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004469
Liran Alon2307af12018-06-29 22:59:04 +03004470 vmcs_conf->revision_id = vmx_msr_low;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004471
Yang, Sheng002c7f72007-07-31 14:23:01 +03004472 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
4473 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004474 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
Yang, Sheng002c7f72007-07-31 14:23:01 +03004475 vmcs_conf->vmexit_ctrl = _vmexit_control;
4476 vmcs_conf->vmentry_ctrl = _vmentry_control;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004477
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004478 if (static_branch_unlikely(&enable_evmcs))
4479 evmcs_sanitize_exec_ctrls(vmcs_conf);
4480
Avi Kivity110312c2010-12-21 12:54:20 +02004481 cpu_has_load_ia32_efer =
4482 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4483 VM_ENTRY_LOAD_IA32_EFER)
4484 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4485 VM_EXIT_LOAD_IA32_EFER);
4486
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004487 cpu_has_load_perf_global_ctrl =
4488 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4489 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
4490 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4491 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
4492
4493 /*
4494 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
Andrea Gelminibb3541f2016-05-21 14:14:44 +02004495 * but due to errata below it can't be used. Workaround is to use
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004496 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
4497 *
4498 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
4499 *
4500 * AAK155 (model 26)
4501 * AAP115 (model 30)
4502 * AAT100 (model 37)
4503 * BC86,AAY89,BD102 (model 44)
4504 * BA97 (model 46)
4505 *
4506 */
4507 if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
4508 switch (boot_cpu_data.x86_model) {
4509 case 26:
4510 case 30:
4511 case 37:
4512 case 44:
4513 case 46:
4514 cpu_has_load_perf_global_ctrl = false;
4515 printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
4516 "does not work properly. Using workaround\n");
4517 break;
4518 default:
4519 break;
4520 }
4521 }
4522
Borislav Petkov782511b2016-04-04 22:25:03 +02004523 if (boot_cpu_has(X86_FEATURE_XSAVES))
Wanpeng Li20300092014-12-02 19:14:59 +08004524 rdmsrl(MSR_IA32_XSS, host_xss);
4525
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004526 return 0;
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004527}
Avi Kivity6aa8b732006-12-10 02:21:36 -08004528
Liran Alon491a6032018-06-23 02:35:12 +03004529static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004530{
4531 int node = cpu_to_node(cpu);
4532 struct page *pages;
4533 struct vmcs *vmcs;
4534
Vlastimil Babka96db8002015-09-08 15:03:50 -07004535 pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004536 if (!pages)
4537 return NULL;
4538 vmcs = page_address(pages);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004539 memset(vmcs, 0, vmcs_config.size);
Liran Alon2307af12018-06-29 22:59:04 +03004540
4541 /* KVM supports Enlightened VMCS v1 only */
4542 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004543 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
Liran Alon2307af12018-06-29 22:59:04 +03004544 else
Liran Alon392b2f22018-06-23 02:35:01 +03004545 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004546
Liran Alon491a6032018-06-23 02:35:12 +03004547 if (shadow)
4548 vmcs->hdr.shadow_vmcs = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004549 return vmcs;
4550}
4551
Avi Kivity6aa8b732006-12-10 02:21:36 -08004552static void free_vmcs(struct vmcs *vmcs)
4553{
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004554 free_pages((unsigned long)vmcs, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004555}
4556
Nadav Har'Eld462b812011-05-24 15:26:10 +03004557/*
4558 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
4559 */
4560static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4561{
4562 if (!loaded_vmcs->vmcs)
4563 return;
4564 loaded_vmcs_clear(loaded_vmcs);
4565 free_vmcs(loaded_vmcs->vmcs);
4566 loaded_vmcs->vmcs = NULL;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004567 if (loaded_vmcs->msr_bitmap)
4568 free_page((unsigned long)loaded_vmcs->msr_bitmap);
Jim Mattson355f4fb2016-10-28 08:29:39 -07004569 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
Nadav Har'Eld462b812011-05-24 15:26:10 +03004570}
4571
Liran Alon491a6032018-06-23 02:35:12 +03004572static struct vmcs *alloc_vmcs(bool shadow)
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004573{
Liran Alon491a6032018-06-23 02:35:12 +03004574 return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004575}
4576
4577static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4578{
Liran Alon491a6032018-06-23 02:35:12 +03004579 loaded_vmcs->vmcs = alloc_vmcs(false);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004580 if (!loaded_vmcs->vmcs)
4581 return -ENOMEM;
4582
4583 loaded_vmcs->shadow_vmcs = NULL;
4584 loaded_vmcs_init(loaded_vmcs);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004585
4586 if (cpu_has_vmx_msr_bitmap()) {
4587 loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
4588 if (!loaded_vmcs->msr_bitmap)
4589 goto out_vmcs;
4590 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004591
Arnd Bergmann1f008e12018-05-25 17:36:17 +02004592 if (IS_ENABLED(CONFIG_HYPERV) &&
4593 static_branch_unlikely(&enable_evmcs) &&
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004594 (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
4595 struct hv_enlightened_vmcs *evmcs =
4596 (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
4597
4598 evmcs->hv_enlightenments_control.msr_bitmap = 1;
4599 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004600 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07004601
4602 memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
4603
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004604 return 0;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004605
4606out_vmcs:
4607 free_loaded_vmcs(loaded_vmcs);
4608 return -ENOMEM;
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004609}
4610
Sam Ravnborg39959582007-06-01 00:47:13 -07004611static void free_kvm_area(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004612{
4613 int cpu;
4614
Zachary Amsden3230bb42009-09-29 11:38:37 -10004615 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004616 free_vmcs(per_cpu(vmxarea, cpu));
Zachary Amsden3230bb42009-09-29 11:38:37 -10004617 per_cpu(vmxarea, cpu) = NULL;
4618 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004619}
4620
Jim Mattsond37f4262017-12-22 12:12:16 -08004621enum vmcs_field_width {
4622 VMCS_FIELD_WIDTH_U16 = 0,
4623 VMCS_FIELD_WIDTH_U64 = 1,
4624 VMCS_FIELD_WIDTH_U32 = 2,
4625 VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
Jim Mattson85fd5142017-07-07 12:51:41 -07004626};
4627
Jim Mattsond37f4262017-12-22 12:12:16 -08004628static inline int vmcs_field_width(unsigned long field)
Jim Mattson85fd5142017-07-07 12:51:41 -07004629{
4630 if (0x1 & field) /* the *_HIGH fields are all 32 bit */
Jim Mattsond37f4262017-12-22 12:12:16 -08004631 return VMCS_FIELD_WIDTH_U32;
Jim Mattson85fd5142017-07-07 12:51:41 -07004632 return (field >> 13) & 0x3 ;
4633}
4634
4635static inline int vmcs_field_readonly(unsigned long field)
4636{
4637 return (((field >> 10) & 0x3) == 1);
4638}
4639
Bandan Dasfe2b2012014-04-21 15:20:14 -04004640static void init_vmcs_shadow_fields(void)
4641{
4642 int i, j;
4643
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004644 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
4645 u16 field = shadow_read_only_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004646 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004647 (i + 1 == max_shadow_read_only_fields ||
4648 shadow_read_only_fields[i + 1] != field + 1))
4649 pr_err("Missing field from shadow_read_only_field %x\n",
4650 field + 1);
4651
4652 clear_bit(field, vmx_vmread_bitmap);
4653#ifdef CONFIG_X86_64
4654 if (field & 1)
4655 continue;
4656#endif
4657 if (j < i)
4658 shadow_read_only_fields[j] = field;
4659 j++;
4660 }
4661 max_shadow_read_only_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004662
4663 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004664 u16 field = shadow_read_write_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004665 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004666 (i + 1 == max_shadow_read_write_fields ||
4667 shadow_read_write_fields[i + 1] != field + 1))
4668 pr_err("Missing field from shadow_read_write_field %x\n",
4669 field + 1);
4670
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004671 /*
4672 * PML and the preemption timer can be emulated, but the
4673 * processor cannot vmwrite to fields that don't exist
4674 * on bare metal.
4675 */
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004676 switch (field) {
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004677 case GUEST_PML_INDEX:
4678 if (!cpu_has_vmx_pml())
4679 continue;
4680 break;
4681 case VMX_PREEMPTION_TIMER_VALUE:
4682 if (!cpu_has_vmx_preemption_timer())
4683 continue;
4684 break;
4685 case GUEST_INTR_STATUS:
4686 if (!cpu_has_vmx_apicv())
Bandan Dasfe2b2012014-04-21 15:20:14 -04004687 continue;
4688 break;
4689 default:
4690 break;
4691 }
4692
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004693 clear_bit(field, vmx_vmwrite_bitmap);
4694 clear_bit(field, vmx_vmread_bitmap);
4695#ifdef CONFIG_X86_64
4696 if (field & 1)
4697 continue;
4698#endif
Bandan Dasfe2b2012014-04-21 15:20:14 -04004699 if (j < i)
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004700 shadow_read_write_fields[j] = field;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004701 j++;
4702 }
4703 max_shadow_read_write_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004704}
4705
Avi Kivity6aa8b732006-12-10 02:21:36 -08004706static __init int alloc_kvm_area(void)
4707{
4708 int cpu;
4709
Zachary Amsden3230bb42009-09-29 11:38:37 -10004710 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004711 struct vmcs *vmcs;
4712
Liran Alon491a6032018-06-23 02:35:12 +03004713 vmcs = alloc_vmcs_cpu(false, cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004714 if (!vmcs) {
4715 free_kvm_area();
4716 return -ENOMEM;
4717 }
4718
Liran Alon2307af12018-06-29 22:59:04 +03004719 /*
4720 * When eVMCS is enabled, alloc_vmcs_cpu() sets
4721 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
4722 * revision_id reported by MSR_IA32_VMX_BASIC.
4723 *
4724 * However, even though not explictly documented by
4725 * TLFS, VMXArea passed as VMXON argument should
4726 * still be marked with revision_id reported by
4727 * physical CPU.
4728 */
4729 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004730 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004731
Avi Kivity6aa8b732006-12-10 02:21:36 -08004732 per_cpu(vmxarea, cpu) = vmcs;
4733 }
4734 return 0;
4735}
4736
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004737static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
Gleb Natapovd99e4152012-12-20 16:57:45 +02004738 struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004739{
Gleb Natapovd99e4152012-12-20 16:57:45 +02004740 if (!emulate_invalid_guest_state) {
4741 /*
4742 * CS and SS RPL should be equal during guest entry according
4743 * to VMX spec, but in reality it is not always so. Since vcpu
4744 * is in the middle of the transition from real mode to
4745 * protected mode it is safe to assume that RPL 0 is a good
4746 * default value.
4747 */
4748 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
Nadav Amitb32a9912015-03-29 16:33:04 +03004749 save->selector &= ~SEGMENT_RPL_MASK;
4750 save->dpl = save->selector & SEGMENT_RPL_MASK;
Gleb Natapovd99e4152012-12-20 16:57:45 +02004751 save->s = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004752 }
Gleb Natapovd99e4152012-12-20 16:57:45 +02004753 vmx_set_segment(vcpu, save, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004754}
4755
4756static void enter_pmode(struct kvm_vcpu *vcpu)
4757{
4758 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004759 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004760
Gleb Natapovd99e4152012-12-20 16:57:45 +02004761 /*
4762 * Update real mode segment cache. It may be not up-to-date if sement
4763 * register was written while vcpu was in a guest mode.
4764 */
4765 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4766 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4767 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4768 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
4769 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4770 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
4771
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004772 vmx->rmode.vm86_active = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004773
Avi Kivity2fb92db2011-04-27 19:42:18 +03004774 vmx_segment_cache_clear(vmx);
4775
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004776 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004777
4778 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03004779 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
4780 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004781 vmcs_writel(GUEST_RFLAGS, flags);
4782
Rusty Russell66aee912007-07-17 23:34:16 +10004783 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
4784 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
Avi Kivity6aa8b732006-12-10 02:21:36 -08004785
4786 update_exception_bitmap(vcpu);
4787
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004788 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4789 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4790 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4791 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4792 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4793 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004794}
4795
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004796static void fix_rmode_seg(int seg, struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004797{
Mathias Krause772e0312012-08-30 01:30:19 +02004798 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Gleb Natapovd99e4152012-12-20 16:57:45 +02004799 struct kvm_segment var = *save;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004800
Gleb Natapovd99e4152012-12-20 16:57:45 +02004801 var.dpl = 0x3;
4802 if (seg == VCPU_SREG_CS)
4803 var.type = 0x3;
4804
4805 if (!emulate_invalid_guest_state) {
4806 var.selector = var.base >> 4;
4807 var.base = var.base & 0xffff0;
4808 var.limit = 0xffff;
4809 var.g = 0;
4810 var.db = 0;
4811 var.present = 1;
4812 var.s = 1;
4813 var.l = 0;
4814 var.unusable = 0;
4815 var.type = 0x3;
4816 var.avl = 0;
4817 if (save->base & 0xf)
4818 printk_once(KERN_WARNING "kvm: segment base is not "
4819 "paragraph aligned when entering "
4820 "protected mode (seg=%d)", seg);
4821 }
4822
4823 vmcs_write16(sf->selector, var.selector);
Chao Peng96794e42017-02-21 03:50:01 -05004824 vmcs_writel(sf->base, var.base);
Gleb Natapovd99e4152012-12-20 16:57:45 +02004825 vmcs_write32(sf->limit, var.limit);
4826 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
Avi Kivity6aa8b732006-12-10 02:21:36 -08004827}
4828
4829static void enter_rmode(struct kvm_vcpu *vcpu)
4830{
4831 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004832 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07004833 struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004834
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004835 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
4836 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4837 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4838 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4839 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
Gleb Natapovc6ad11532012-12-12 19:10:51 +02004840 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4841 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004842
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004843 vmx->rmode.vm86_active = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004844
Gleb Natapov776e58e2011-03-13 12:34:27 +02004845 /*
4846 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
Jan Kiszka4918c6c2013-03-15 08:38:56 +01004847 * vcpu. Warn the user that an update is overdue.
Gleb Natapov776e58e2011-03-13 12:34:27 +02004848 */
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07004849 if (!kvm_vmx->tss_addr)
Gleb Natapov776e58e2011-03-13 12:34:27 +02004850 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
4851 "called before entering vcpu\n");
Gleb Natapov776e58e2011-03-13 12:34:27 +02004852
Avi Kivity2fb92db2011-04-27 19:42:18 +03004853 vmx_segment_cache_clear(vmx);
4854
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07004855 vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004856 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004857 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4858
4859 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03004860 vmx->rmode.save_rflags = flags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004861
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01004862 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004863
4864 vmcs_writel(GUEST_RFLAGS, flags);
Rusty Russell66aee912007-07-17 23:34:16 +10004865 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004866 update_exception_bitmap(vcpu);
4867
Gleb Natapovd99e4152012-12-20 16:57:45 +02004868 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4869 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4870 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4871 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4872 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
4873 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004874
Eddie Dong8668a3c2007-10-10 14:26:45 +08004875 kvm_mmu_reset_context(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004876}
4877
Amit Shah401d10d2009-02-20 22:53:37 +05304878static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
4879{
4880 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004881 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
4882
4883 if (!msr)
4884 return;
Amit Shah401d10d2009-02-20 22:53:37 +05304885
Avi Kivity44ea2b12009-09-06 15:55:37 +03004886 /*
Sean Christopherson678e3152018-07-23 12:32:43 -07004887 * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
4888 * 64-bit mode as a 64-bit kernel may frequently access the
4889 * MSR. This means we need to manually save/restore the MSR
4890 * when switching between guest and host state, but only if
4891 * the guest is in 64-bit mode. Sync our cached value if the
4892 * guest is transitioning to 32-bit mode and the CPU contains
4893 * guest state, i.e. the cache is stale.
Avi Kivity44ea2b12009-09-06 15:55:37 +03004894 */
Sean Christopherson678e3152018-07-23 12:32:43 -07004895#ifdef CONFIG_X86_64
4896 if (!(efer & EFER_LMA))
4897 (void)vmx_read_guest_kernel_gs_base(vmx);
4898#endif
Avi Kivityf6801df2010-01-21 15:31:50 +02004899 vcpu->arch.efer = efer;
Amit Shah401d10d2009-02-20 22:53:37 +05304900 if (efer & EFER_LMA) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02004901 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05304902 msr->data = efer;
4903 } else {
Gleb Natapov2961e8762013-11-25 15:37:13 +02004904 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05304905
4906 msr->data = efer & ~EFER_LME;
4907 }
4908 setup_msrs(vmx);
4909}
4910
Avi Kivity05b3e0c2006-12-13 00:33:45 -08004911#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004912
4913static void enter_lmode(struct kvm_vcpu *vcpu)
4914{
4915 u32 guest_tr_ar;
4916
Avi Kivity2fb92db2011-04-27 19:42:18 +03004917 vmx_segment_cache_clear(to_vmx(vcpu));
4918
Avi Kivity6aa8b732006-12-10 02:21:36 -08004919 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004920 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
Jan Kiszkabd801582011-09-12 11:26:22 +02004921 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
4922 __func__);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004923 vmcs_write32(GUEST_TR_AR_BYTES,
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004924 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
4925 | VMX_AR_TYPE_BUSY_64_TSS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004926 }
Avi Kivityda38f432010-07-06 11:30:49 +03004927 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004928}
4929
4930static void exit_lmode(struct kvm_vcpu *vcpu)
4931{
Gleb Natapov2961e8762013-11-25 15:37:13 +02004932 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Avi Kivityda38f432010-07-06 11:30:49 +03004933 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004934}
4935
4936#endif
4937
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004938static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
4939 bool invalidate_gpa)
Sheng Yang2384d2b2008-01-17 15:14:33 +08004940{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004941 if (enable_ept && (invalidate_gpa || !enable_vpid)) {
Xiao Guangrongdd180b32010-07-03 16:02:42 +08004942 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4943 return;
Peter Feiner995f00a2017-06-30 17:26:32 -07004944 ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
Jim Mattsonf0b98c02017-03-15 07:56:11 -07004945 } else {
4946 vpid_sync_context(vpid);
Xiao Guangrongdd180b32010-07-03 16:02:42 +08004947 }
Sheng Yang2384d2b2008-01-17 15:14:33 +08004948}
4949
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004950static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
Wanpeng Lidd5f5342015-09-23 18:26:57 +08004951{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004952 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
Wanpeng Lidd5f5342015-09-23 18:26:57 +08004953}
4954
Junaid Shahidfaff8752018-06-29 13:10:05 -07004955static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
4956{
4957 int vpid = to_vmx(vcpu)->vpid;
4958
4959 if (!vpid_sync_vcpu_addr(vpid, addr))
4960 vpid_sync_context(vpid);
4961
4962 /*
4963 * If VPIDs are not supported or enabled, then the above is a no-op.
4964 * But we don't really need a TLB flush in that case anyway, because
4965 * each VM entry/exit includes an implicit flush when VPID is 0.
4966 */
4967}
4968
Avi Kivitye8467fd2009-12-29 18:43:06 +02004969static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
4970{
4971 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
4972
4973 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
4974 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
4975}
4976
Avi Kivityaff48ba2010-12-05 18:56:11 +02004977static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
4978{
Sean Christophersonb4d18512018-03-05 12:04:40 -08004979 if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
Avi Kivityaff48ba2010-12-05 18:56:11 +02004980 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
4981 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
4982}
4983
Anthony Liguori25c4c272007-04-27 09:29:21 +03004984static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
Avi Kivity399badf2007-01-05 16:36:38 -08004985{
Avi Kivityfc78f512009-12-07 12:16:48 +02004986 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
4987
4988 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
4989 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
Avi Kivity399badf2007-01-05 16:36:38 -08004990}
4991
Sheng Yang14394422008-04-28 12:24:45 +08004992static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
4993{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03004994 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4995
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004996 if (!test_bit(VCPU_EXREG_PDPTR,
4997 (unsigned long *)&vcpu->arch.regs_dirty))
4998 return;
4999
Sheng Yang14394422008-04-28 12:24:45 +08005000 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005001 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
5002 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
5003 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
5004 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
Sheng Yang14394422008-04-28 12:24:45 +08005005 }
5006}
5007
Avi Kivity8f5d5492009-05-31 18:41:29 +03005008static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
5009{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005010 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5011
Avi Kivity8f5d5492009-05-31 18:41:29 +03005012 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005013 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
5014 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
5015 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
5016 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005017 }
Avi Kivity6de4f3a2009-05-31 22:58:47 +03005018
5019 __set_bit(VCPU_EXREG_PDPTR,
5020 (unsigned long *)&vcpu->arch.regs_avail);
5021 __set_bit(VCPU_EXREG_PDPTR,
5022 (unsigned long *)&vcpu->arch.regs_dirty);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005023}
5024
David Matlack38991522016-11-29 18:14:08 -08005025static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5026{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005027 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5028 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005029 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5030
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005031 if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
David Matlack38991522016-11-29 18:14:08 -08005032 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
5033 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
5034 fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
5035
5036 return fixed_bits_valid(val, fixed0, fixed1);
5037}
5038
5039static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5040{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005041 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5042 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005043
5044 return fixed_bits_valid(val, fixed0, fixed1);
5045}
5046
5047static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
5048{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005049 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
5050 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005051
5052 return fixed_bits_valid(val, fixed0, fixed1);
5053}
5054
5055/* No difference in the restrictions on guest and host CR4 in VMX operation. */
5056#define nested_guest_cr4_valid nested_cr4_valid
5057#define nested_host_cr4_valid nested_cr4_valid
5058
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005059static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
Sheng Yang14394422008-04-28 12:24:45 +08005060
5061static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
5062 unsigned long cr0,
5063 struct kvm_vcpu *vcpu)
5064{
Marcelo Tosatti5233dd52011-06-06 14:27:47 -03005065 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
5066 vmx_decache_cr3(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005067 if (!(cr0 & X86_CR0_PG)) {
5068 /* From paging/starting to nonpaging */
5069 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005070 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
Sheng Yang14394422008-04-28 12:24:45 +08005071 (CPU_BASED_CR3_LOAD_EXITING |
5072 CPU_BASED_CR3_STORE_EXITING));
5073 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005074 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005075 } else if (!is_paging(vcpu)) {
5076 /* From nonpaging to paging */
5077 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005078 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
Sheng Yang14394422008-04-28 12:24:45 +08005079 ~(CPU_BASED_CR3_LOAD_EXITING |
5080 CPU_BASED_CR3_STORE_EXITING));
5081 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005082 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005083 }
Sheng Yang95eb84a2009-08-19 09:52:18 +08005084
5085 if (!(cr0 & X86_CR0_WP))
5086 *hw_cr0 &= ~X86_CR0_WP;
Sheng Yang14394422008-04-28 12:24:45 +08005087}
5088
Avi Kivity6aa8b732006-12-10 02:21:36 -08005089static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
5090{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005091 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005092 unsigned long hw_cr0;
5093
Gleb Natapov50378782013-02-04 16:00:28 +02005094 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005095 if (enable_unrestricted_guest)
Gleb Natapov50378782013-02-04 16:00:28 +02005096 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
Gleb Natapov218e7632013-01-21 15:36:45 +02005097 else {
Gleb Natapov50378782013-02-04 16:00:28 +02005098 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005099
Gleb Natapov218e7632013-01-21 15:36:45 +02005100 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
5101 enter_pmode(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005102
Gleb Natapov218e7632013-01-21 15:36:45 +02005103 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
5104 enter_rmode(vcpu);
5105 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08005106
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005107#ifdef CONFIG_X86_64
Avi Kivityf6801df2010-01-21 15:31:50 +02005108 if (vcpu->arch.efer & EFER_LME) {
Rusty Russell707d92fa2007-07-17 23:19:08 +10005109 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005110 enter_lmode(vcpu);
Rusty Russell707d92fa2007-07-17 23:19:08 +10005111 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005112 exit_lmode(vcpu);
5113 }
5114#endif
5115
Sean Christophersonb4d18512018-03-05 12:04:40 -08005116 if (enable_ept && !enable_unrestricted_guest)
Sheng Yang14394422008-04-28 12:24:45 +08005117 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
5118
Avi Kivity6aa8b732006-12-10 02:21:36 -08005119 vmcs_writel(CR0_READ_SHADOW, cr0);
Sheng Yang14394422008-04-28 12:24:45 +08005120 vmcs_writel(GUEST_CR0, hw_cr0);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005121 vcpu->arch.cr0 = cr0;
Gleb Natapov14168782013-01-21 15:36:49 +02005122
5123 /* depends on vcpu->arch.cr0 to be set to a new value */
5124 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005125}
5126
Yu Zhang855feb62017-08-24 20:27:55 +08005127static int get_ept_level(struct kvm_vcpu *vcpu)
5128{
5129 if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
5130 return 5;
5131 return 4;
5132}
5133
Peter Feiner995f00a2017-06-30 17:26:32 -07005134static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
Sheng Yang14394422008-04-28 12:24:45 +08005135{
Yu Zhang855feb62017-08-24 20:27:55 +08005136 u64 eptp = VMX_EPTP_MT_WB;
Sheng Yang14394422008-04-28 12:24:45 +08005137
Yu Zhang855feb62017-08-24 20:27:55 +08005138 eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
Sheng Yang14394422008-04-28 12:24:45 +08005139
Peter Feiner995f00a2017-06-30 17:26:32 -07005140 if (enable_ept_ad_bits &&
5141 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
David Hildenbrandbb97a012017-08-10 23:15:28 +02005142 eptp |= VMX_EPTP_AD_ENABLE_BIT;
Sheng Yang14394422008-04-28 12:24:45 +08005143 eptp |= (root_hpa & PAGE_MASK);
5144
5145 return eptp;
5146}
5147
Avi Kivity6aa8b732006-12-10 02:21:36 -08005148static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
5149{
Tianyu Lan877ad952018-07-19 08:40:23 +00005150 struct kvm *kvm = vcpu->kvm;
Sheng Yang14394422008-04-28 12:24:45 +08005151 unsigned long guest_cr3;
5152 u64 eptp;
5153
5154 guest_cr3 = cr3;
Avi Kivity089d0342009-03-23 18:26:32 +02005155 if (enable_ept) {
Peter Feiner995f00a2017-06-30 17:26:32 -07005156 eptp = construct_eptp(vcpu, cr3);
Sheng Yang14394422008-04-28 12:24:45 +08005157 vmcs_write64(EPT_POINTER, eptp);
Tianyu Lan877ad952018-07-19 08:40:23 +00005158
5159 if (kvm_x86_ops->tlb_remote_flush) {
5160 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5161 to_vmx(vcpu)->ept_pointer = eptp;
5162 to_kvm_vmx(kvm)->ept_pointers_match
5163 = EPT_POINTERS_CHECK;
5164 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5165 }
5166
Sean Christophersone90008d2018-03-05 12:04:37 -08005167 if (enable_unrestricted_guest || is_paging(vcpu) ||
5168 is_guest_mode(vcpu))
Jan Kiszka59ab5a82013-08-08 16:26:29 +02005169 guest_cr3 = kvm_read_cr3(vcpu);
5170 else
Tianyu Lan877ad952018-07-19 08:40:23 +00005171 guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
Marcelo Tosatti7c93be442009-10-26 16:48:33 -02005172 ept_load_pdptrs(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005173 }
5174
Sheng Yang14394422008-04-28 12:24:45 +08005175 vmcs_writel(GUEST_CR3, guest_cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005176}
5177
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005178static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005179{
Ben Serebrin085e68e2015-04-16 11:58:05 -07005180 /*
5181 * Pass through host's Machine Check Enable value to hw_cr4, which
5182 * is in force while we are in guest mode. Do not let guests control
5183 * this bit, even if host CR4.MCE == 0.
5184 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005185 unsigned long hw_cr4;
5186
5187 hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
5188 if (enable_unrestricted_guest)
5189 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
5190 else if (to_vmx(vcpu)->rmode.vm86_active)
5191 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
5192 else
5193 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005194
Sean Christopherson64f7a112018-04-30 10:01:06 -07005195 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
5196 if (cr4 & X86_CR4_UMIP) {
5197 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini0367f202016-07-12 10:44:55 +02005198 SECONDARY_EXEC_DESC);
Sean Christopherson64f7a112018-04-30 10:01:06 -07005199 hw_cr4 &= ~X86_CR4_UMIP;
5200 } else if (!is_guest_mode(vcpu) ||
5201 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
5202 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
5203 SECONDARY_EXEC_DESC);
5204 }
Paolo Bonzini0367f202016-07-12 10:44:55 +02005205
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005206 if (cr4 & X86_CR4_VMXE) {
5207 /*
5208 * To use VMXON (and later other VMX instructions), a guest
5209 * must first be able to turn on cr4.VMXE (see handle_vmon()).
5210 * So basically the check on whether to allow nested VMX
5211 * is here.
5212 */
5213 if (!nested_vmx_allowed(vcpu))
5214 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01005215 }
David Matlack38991522016-11-29 18:14:08 -08005216
5217 if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005218 return 1;
5219
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005220 vcpu->arch.cr4 = cr4;
Sheng Yang14394422008-04-28 12:24:45 +08005221
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005222 if (!enable_unrestricted_guest) {
5223 if (enable_ept) {
5224 if (!is_paging(vcpu)) {
5225 hw_cr4 &= ~X86_CR4_PAE;
5226 hw_cr4 |= X86_CR4_PSE;
5227 } else if (!(cr4 & X86_CR4_PAE)) {
5228 hw_cr4 &= ~X86_CR4_PAE;
5229 }
5230 }
5231
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005232 /*
Huaitong Handdba2622016-03-22 16:51:15 +08005233 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
5234 * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
5235 * to be manually disabled when guest switches to non-paging
5236 * mode.
5237 *
5238 * If !enable_unrestricted_guest, the CPU is always running
5239 * with CR0.PG=1 and CR4 needs to be modified.
5240 * If enable_unrestricted_guest, the CPU automatically
5241 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005242 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005243 if (!is_paging(vcpu))
5244 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
5245 }
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005246
Sheng Yang14394422008-04-28 12:24:45 +08005247 vmcs_writel(CR4_READ_SHADOW, cr4);
5248 vmcs_writel(GUEST_CR4, hw_cr4);
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005249 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005250}
5251
Avi Kivity6aa8b732006-12-10 02:21:36 -08005252static void vmx_get_segment(struct kvm_vcpu *vcpu,
5253 struct kvm_segment *var, int seg)
5254{
Avi Kivitya9179492011-01-03 14:28:52 +02005255 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005256 u32 ar;
5257
Gleb Natapovc6ad11532012-12-12 19:10:51 +02005258 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005259 *var = vmx->rmode.segs[seg];
Avi Kivitya9179492011-01-03 14:28:52 +02005260 if (seg == VCPU_SREG_TR
Avi Kivity2fb92db2011-04-27 19:42:18 +03005261 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005262 return;
Avi Kivity1390a282012-08-21 17:07:08 +03005263 var->base = vmx_read_guest_seg_base(vmx, seg);
5264 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5265 return;
Avi Kivitya9179492011-01-03 14:28:52 +02005266 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005267 var->base = vmx_read_guest_seg_base(vmx, seg);
5268 var->limit = vmx_read_guest_seg_limit(vmx, seg);
5269 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5270 ar = vmx_read_guest_seg_ar(vmx, seg);
Gleb Natapov03617c12013-06-28 13:17:18 +03005271 var->unusable = (ar >> 16) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005272 var->type = ar & 15;
5273 var->s = (ar >> 4) & 1;
5274 var->dpl = (ar >> 5) & 3;
Gleb Natapov03617c12013-06-28 13:17:18 +03005275 /*
5276 * Some userspaces do not preserve unusable property. Since usable
5277 * segment has to be present according to VMX spec we can use present
5278 * property to amend userspace bug by making unusable segment always
5279 * nonpresent. vmx_segment_access_rights() already marks nonpresent
5280 * segment as unusable.
5281 */
5282 var->present = !var->unusable;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005283 var->avl = (ar >> 12) & 1;
5284 var->l = (ar >> 13) & 1;
5285 var->db = (ar >> 14) & 1;
5286 var->g = (ar >> 15) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005287}
5288
Avi Kivitya9179492011-01-03 14:28:52 +02005289static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
5290{
Avi Kivitya9179492011-01-03 14:28:52 +02005291 struct kvm_segment s;
5292
5293 if (to_vmx(vcpu)->rmode.vm86_active) {
5294 vmx_get_segment(vcpu, &s, seg);
5295 return s.base;
5296 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005297 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
Avi Kivitya9179492011-01-03 14:28:52 +02005298}
5299
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005300static int vmx_get_cpl(struct kvm_vcpu *vcpu)
Izik Eidus2e4d2652008-03-24 19:38:34 +02005301{
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005302 struct vcpu_vmx *vmx = to_vmx(vcpu);
5303
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005304 if (unlikely(vmx->rmode.vm86_active))
Izik Eidus2e4d2652008-03-24 19:38:34 +02005305 return 0;
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005306 else {
5307 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005308 return VMX_AR_DPL(ar);
Avi Kivity69c73022011-03-07 15:26:44 +02005309 }
Avi Kivity69c73022011-03-07 15:26:44 +02005310}
5311
Avi Kivity653e3102007-05-07 10:55:37 +03005312static u32 vmx_segment_access_rights(struct kvm_segment *var)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005313{
Avi Kivity6aa8b732006-12-10 02:21:36 -08005314 u32 ar;
5315
Avi Kivityf0495f92012-06-07 17:06:10 +03005316 if (var->unusable || !var->present)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005317 ar = 1 << 16;
5318 else {
5319 ar = var->type & 15;
5320 ar |= (var->s & 1) << 4;
5321 ar |= (var->dpl & 3) << 5;
5322 ar |= (var->present & 1) << 7;
5323 ar |= (var->avl & 1) << 12;
5324 ar |= (var->l & 1) << 13;
5325 ar |= (var->db & 1) << 14;
5326 ar |= (var->g & 1) << 15;
5327 }
Avi Kivity653e3102007-05-07 10:55:37 +03005328
5329 return ar;
5330}
5331
5332static void vmx_set_segment(struct kvm_vcpu *vcpu,
5333 struct kvm_segment *var, int seg)
5334{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005335 struct vcpu_vmx *vmx = to_vmx(vcpu);
Mathias Krause772e0312012-08-30 01:30:19 +02005336 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Avi Kivity653e3102007-05-07 10:55:37 +03005337
Avi Kivity2fb92db2011-04-27 19:42:18 +03005338 vmx_segment_cache_clear(vmx);
5339
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005340 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
5341 vmx->rmode.segs[seg] = *var;
5342 if (seg == VCPU_SREG_TR)
5343 vmcs_write16(sf->selector, var->selector);
5344 else if (var->s)
5345 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
Gleb Natapovd99e4152012-12-20 16:57:45 +02005346 goto out;
Avi Kivity653e3102007-05-07 10:55:37 +03005347 }
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005348
Avi Kivity653e3102007-05-07 10:55:37 +03005349 vmcs_writel(sf->base, var->base);
5350 vmcs_write32(sf->limit, var->limit);
5351 vmcs_write16(sf->selector, var->selector);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005352
5353 /*
5354 * Fix the "Accessed" bit in AR field of segment registers for older
5355 * qemu binaries.
5356 * IA32 arch specifies that at the time of processor reset the
5357 * "Accessed" bit in the AR field of segment registers is 1. And qemu
Guo Chao0fa06072012-06-28 15:16:19 +08005358 * is setting it to 0 in the userland code. This causes invalid guest
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005359 * state vmexit when "unrestricted guest" mode is turned on.
5360 * Fix for this setup issue in cpu_reset is being pushed in the qemu
5361 * tree. Newer qemu binaries with that qemu fix would not need this
5362 * kvm hack.
5363 */
5364 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
Gleb Natapovf924d662012-12-12 19:10:55 +02005365 var->type |= 0x1; /* Accessed */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005366
Gleb Natapovf924d662012-12-12 19:10:55 +02005367 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
Gleb Natapovd99e4152012-12-20 16:57:45 +02005368
5369out:
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01005370 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005371}
5372
Avi Kivity6aa8b732006-12-10 02:21:36 -08005373static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5374{
Avi Kivity2fb92db2011-04-27 19:42:18 +03005375 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005376
5377 *db = (ar >> 14) & 1;
5378 *l = (ar >> 13) & 1;
5379}
5380
Gleb Natapov89a27f42010-02-16 10:51:48 +02005381static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005382{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005383 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
5384 dt->address = vmcs_readl(GUEST_IDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005385}
5386
Gleb Natapov89a27f42010-02-16 10:51:48 +02005387static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005388{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005389 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
5390 vmcs_writel(GUEST_IDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005391}
5392
Gleb Natapov89a27f42010-02-16 10:51:48 +02005393static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005394{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005395 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
5396 dt->address = vmcs_readl(GUEST_GDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005397}
5398
Gleb Natapov89a27f42010-02-16 10:51:48 +02005399static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005400{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005401 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
5402 vmcs_writel(GUEST_GDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005403}
5404
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005405static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
5406{
5407 struct kvm_segment var;
5408 u32 ar;
5409
5410 vmx_get_segment(vcpu, &var, seg);
Gleb Natapov07f42f52012-12-12 19:10:49 +02005411 var.dpl = 0x3;
Gleb Natapov0647f4a2012-12-12 19:10:50 +02005412 if (seg == VCPU_SREG_CS)
5413 var.type = 0x3;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005414 ar = vmx_segment_access_rights(&var);
5415
5416 if (var.base != (var.selector << 4))
5417 return false;
Gleb Natapov89efbed2012-12-20 16:57:44 +02005418 if (var.limit != 0xffff)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005419 return false;
Gleb Natapov07f42f52012-12-12 19:10:49 +02005420 if (ar != 0xf3)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005421 return false;
5422
5423 return true;
5424}
5425
5426static bool code_segment_valid(struct kvm_vcpu *vcpu)
5427{
5428 struct kvm_segment cs;
5429 unsigned int cs_rpl;
5430
5431 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005432 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005433
Avi Kivity1872a3f2009-01-04 23:26:52 +02005434 if (cs.unusable)
5435 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005436 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005437 return false;
5438 if (!cs.s)
5439 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005440 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005441 if (cs.dpl > cs_rpl)
5442 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005443 } else {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005444 if (cs.dpl != cs_rpl)
5445 return false;
5446 }
5447 if (!cs.present)
5448 return false;
5449
5450 /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
5451 return true;
5452}
5453
5454static bool stack_segment_valid(struct kvm_vcpu *vcpu)
5455{
5456 struct kvm_segment ss;
5457 unsigned int ss_rpl;
5458
5459 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005460 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005461
Avi Kivity1872a3f2009-01-04 23:26:52 +02005462 if (ss.unusable)
5463 return true;
5464 if (ss.type != 3 && ss.type != 7)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005465 return false;
5466 if (!ss.s)
5467 return false;
5468 if (ss.dpl != ss_rpl) /* DPL != RPL */
5469 return false;
5470 if (!ss.present)
5471 return false;
5472
5473 return true;
5474}
5475
5476static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
5477{
5478 struct kvm_segment var;
5479 unsigned int rpl;
5480
5481 vmx_get_segment(vcpu, &var, seg);
Nadav Amitb32a9912015-03-29 16:33:04 +03005482 rpl = var.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005483
Avi Kivity1872a3f2009-01-04 23:26:52 +02005484 if (var.unusable)
5485 return true;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005486 if (!var.s)
5487 return false;
5488 if (!var.present)
5489 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005490 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005491 if (var.dpl < rpl) /* DPL < RPL */
5492 return false;
5493 }
5494
5495 /* TODO: Add other members to kvm_segment_field to allow checking for other access
5496 * rights flags
5497 */
5498 return true;
5499}
5500
5501static bool tr_valid(struct kvm_vcpu *vcpu)
5502{
5503 struct kvm_segment tr;
5504
5505 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
5506
Avi Kivity1872a3f2009-01-04 23:26:52 +02005507 if (tr.unusable)
5508 return false;
Nadav Amitb32a9912015-03-29 16:33:04 +03005509 if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005510 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005511 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005512 return false;
5513 if (!tr.present)
5514 return false;
5515
5516 return true;
5517}
5518
5519static bool ldtr_valid(struct kvm_vcpu *vcpu)
5520{
5521 struct kvm_segment ldtr;
5522
5523 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
5524
Avi Kivity1872a3f2009-01-04 23:26:52 +02005525 if (ldtr.unusable)
5526 return true;
Nadav Amitb32a9912015-03-29 16:33:04 +03005527 if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005528 return false;
5529 if (ldtr.type != 2)
5530 return false;
5531 if (!ldtr.present)
5532 return false;
5533
5534 return true;
5535}
5536
5537static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
5538{
5539 struct kvm_segment cs, ss;
5540
5541 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
5542 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
5543
Nadav Amitb32a9912015-03-29 16:33:04 +03005544 return ((cs.selector & SEGMENT_RPL_MASK) ==
5545 (ss.selector & SEGMENT_RPL_MASK));
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005546}
5547
5548/*
5549 * Check if guest state is valid. Returns true if valid, false if
5550 * not.
5551 * We assume that registers are always usable
5552 */
5553static bool guest_state_valid(struct kvm_vcpu *vcpu)
5554{
Gleb Natapovc5e97c82013-01-21 15:36:43 +02005555 if (enable_unrestricted_guest)
5556 return true;
5557
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005558 /* real mode guest state checks */
Gleb Natapovf13882d2013-04-14 16:07:37 +03005559 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005560 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
5561 return false;
5562 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
5563 return false;
5564 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
5565 return false;
5566 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
5567 return false;
5568 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
5569 return false;
5570 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
5571 return false;
5572 } else {
5573 /* protected mode guest state checks */
5574 if (!cs_ss_rpl_check(vcpu))
5575 return false;
5576 if (!code_segment_valid(vcpu))
5577 return false;
5578 if (!stack_segment_valid(vcpu))
5579 return false;
5580 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
5581 return false;
5582 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
5583 return false;
5584 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
5585 return false;
5586 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
5587 return false;
5588 if (!tr_valid(vcpu))
5589 return false;
5590 if (!ldtr_valid(vcpu))
5591 return false;
5592 }
5593 /* TODO:
5594 * - Add checks on RIP
5595 * - Add checks on RFLAGS
5596 */
5597
5598 return true;
5599}
5600
Jim Mattson5fa99cb2017-07-06 16:33:07 -07005601static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
5602{
5603 return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
5604}
5605
Mike Dayd77c26f2007-10-08 09:02:08 -04005606static int init_rmode_tss(struct kvm *kvm)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005607{
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005608 gfn_t fn;
Izik Eidus195aefd2007-10-01 22:14:18 +02005609 u16 data = 0;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005610 int idx, r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005611
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005612 idx = srcu_read_lock(&kvm->srcu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005613 fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
Izik Eidus195aefd2007-10-01 22:14:18 +02005614 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5615 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005616 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005617 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
Sheng Yang464d17c2008-08-13 14:10:33 +08005618 r = kvm_write_guest_page(kvm, fn++, &data,
5619 TSS_IOPB_BASE_OFFSET, sizeof(u16));
Izik Eidus195aefd2007-10-01 22:14:18 +02005620 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005621 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005622 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
5623 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005624 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005625 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5626 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005627 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005628 data = ~0;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005629 r = kvm_write_guest_page(kvm, fn, &data,
5630 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
5631 sizeof(u8));
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005632out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005633 srcu_read_unlock(&kvm->srcu, idx);
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005634 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005635}
5636
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005637static int init_rmode_identity_map(struct kvm *kvm)
5638{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005639 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
Tang Chenf51770e2014-09-16 18:41:59 +08005640 int i, idx, r = 0;
Dan Williamsba049e92016-01-15 16:56:11 -08005641 kvm_pfn_t identity_map_pfn;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005642 u32 tmp;
5643
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005644 /* Protect kvm_vmx->ept_identity_pagetable_done. */
Tang Chena255d472014-09-16 18:41:58 +08005645 mutex_lock(&kvm->slots_lock);
5646
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005647 if (likely(kvm_vmx->ept_identity_pagetable_done))
Tang Chena255d472014-09-16 18:41:58 +08005648 goto out2;
Tang Chena255d472014-09-16 18:41:58 +08005649
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005650 if (!kvm_vmx->ept_identity_map_addr)
5651 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
5652 identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
Tang Chena255d472014-09-16 18:41:58 +08005653
David Hildenbrandd8a6e362017-08-24 20:51:34 +02005654 r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005655 kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
Tang Chenf51770e2014-09-16 18:41:59 +08005656 if (r < 0)
Tang Chena255d472014-09-16 18:41:58 +08005657 goto out2;
5658
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005659 idx = srcu_read_lock(&kvm->srcu);
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005660 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
5661 if (r < 0)
5662 goto out;
5663 /* Set up identity-mapping pagetable for EPT in real mode */
5664 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
5665 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
5666 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
5667 r = kvm_write_guest_page(kvm, identity_map_pfn,
5668 &tmp, i * sizeof(tmp), sizeof(tmp));
5669 if (r < 0)
5670 goto out;
5671 }
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005672 kvm_vmx->ept_identity_pagetable_done = true;
Tang Chenf51770e2014-09-16 18:41:59 +08005673
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005674out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005675 srcu_read_unlock(&kvm->srcu, idx);
Tang Chena255d472014-09-16 18:41:58 +08005676
5677out2:
5678 mutex_unlock(&kvm->slots_lock);
Tang Chenf51770e2014-09-16 18:41:59 +08005679 return r;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005680}
5681
Avi Kivity6aa8b732006-12-10 02:21:36 -08005682static void seg_setup(int seg)
5683{
Mathias Krause772e0312012-08-30 01:30:19 +02005684 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005685 unsigned int ar;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005686
5687 vmcs_write16(sf->selector, 0);
5688 vmcs_writel(sf->base, 0);
5689 vmcs_write32(sf->limit, 0xffff);
Gleb Natapovd54d07b2012-12-20 16:57:46 +02005690 ar = 0x93;
5691 if (seg == VCPU_SREG_CS)
5692 ar |= 0x08; /* code segment */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005693
5694 vmcs_write32(sf->ar_bytes, ar);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005695}
5696
Sheng Yangf78e0e22007-10-29 09:40:42 +08005697static int alloc_apic_access_page(struct kvm *kvm)
5698{
Xiao Guangrong44841412012-09-07 14:14:20 +08005699 struct page *page;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005700 int r = 0;
5701
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005702 mutex_lock(&kvm->slots_lock);
Tang Chenc24ae0d2014-09-24 15:57:58 +08005703 if (kvm->arch.apic_access_page_done)
Sheng Yangf78e0e22007-10-29 09:40:42 +08005704 goto out;
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02005705 r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
5706 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005707 if (r)
5708 goto out;
Izik Eidus72dc67a2008-02-10 18:04:15 +02005709
Tang Chen73a6d942014-09-11 13:38:00 +08005710 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
Xiao Guangrong44841412012-09-07 14:14:20 +08005711 if (is_error_page(page)) {
5712 r = -EFAULT;
5713 goto out;
5714 }
5715
Tang Chenc24ae0d2014-09-24 15:57:58 +08005716 /*
5717 * Do not pin the page in memory, so that memory hot-unplug
5718 * is able to migrate it.
5719 */
5720 put_page(page);
5721 kvm->arch.apic_access_page_done = true;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005722out:
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005723 mutex_unlock(&kvm->slots_lock);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005724 return r;
5725}
5726
Wanpeng Li991e7a02015-09-16 17:30:05 +08005727static int allocate_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005728{
5729 int vpid;
5730
Avi Kivity919818a2009-03-23 18:01:29 +02005731 if (!enable_vpid)
Wanpeng Li991e7a02015-09-16 17:30:05 +08005732 return 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005733 spin_lock(&vmx_vpid_lock);
5734 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005735 if (vpid < VMX_NR_VPIDS)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005736 __set_bit(vpid, vmx_vpid_bitmap);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005737 else
5738 vpid = 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005739 spin_unlock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005740 return vpid;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005741}
5742
Wanpeng Li991e7a02015-09-16 17:30:05 +08005743static void free_vpid(int vpid)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005744{
Wanpeng Li991e7a02015-09-16 17:30:05 +08005745 if (!enable_vpid || vpid == 0)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005746 return;
5747 spin_lock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005748 __clear_bit(vpid, vmx_vpid_bitmap);
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005749 spin_unlock(&vmx_vpid_lock);
5750}
5751
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005752static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
5753 u32 msr, int type)
Sheng Yang25c5f222008-03-28 13:18:56 +08005754{
Avi Kivity3e7c73e2009-02-24 21:46:19 +02005755 int f = sizeof(unsigned long);
Sheng Yang25c5f222008-03-28 13:18:56 +08005756
5757 if (!cpu_has_vmx_msr_bitmap())
5758 return;
5759
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005760 if (static_branch_unlikely(&enable_evmcs))
5761 evmcs_touch_msr_bitmap();
5762
Sheng Yang25c5f222008-03-28 13:18:56 +08005763 /*
5764 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5765 * have the write-low and read-high bitmap offsets the wrong way round.
5766 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5767 */
Sheng Yang25c5f222008-03-28 13:18:56 +08005768 if (msr <= 0x1fff) {
Yang Zhang8d146952013-01-25 10:18:50 +08005769 if (type & MSR_TYPE_R)
5770 /* read-low */
5771 __clear_bit(msr, msr_bitmap + 0x000 / f);
5772
5773 if (type & MSR_TYPE_W)
5774 /* write-low */
5775 __clear_bit(msr, msr_bitmap + 0x800 / f);
5776
Sheng Yang25c5f222008-03-28 13:18:56 +08005777 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5778 msr &= 0x1fff;
Yang Zhang8d146952013-01-25 10:18:50 +08005779 if (type & MSR_TYPE_R)
5780 /* read-high */
5781 __clear_bit(msr, msr_bitmap + 0x400 / f);
5782
5783 if (type & MSR_TYPE_W)
5784 /* write-high */
5785 __clear_bit(msr, msr_bitmap + 0xc00 / f);
5786
5787 }
5788}
5789
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005790static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
5791 u32 msr, int type)
5792{
5793 int f = sizeof(unsigned long);
5794
5795 if (!cpu_has_vmx_msr_bitmap())
5796 return;
5797
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005798 if (static_branch_unlikely(&enable_evmcs))
5799 evmcs_touch_msr_bitmap();
5800
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005801 /*
5802 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5803 * have the write-low and read-high bitmap offsets the wrong way round.
5804 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5805 */
5806 if (msr <= 0x1fff) {
5807 if (type & MSR_TYPE_R)
5808 /* read-low */
5809 __set_bit(msr, msr_bitmap + 0x000 / f);
5810
5811 if (type & MSR_TYPE_W)
5812 /* write-low */
5813 __set_bit(msr, msr_bitmap + 0x800 / f);
5814
5815 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5816 msr &= 0x1fff;
5817 if (type & MSR_TYPE_R)
5818 /* read-high */
5819 __set_bit(msr, msr_bitmap + 0x400 / f);
5820
5821 if (type & MSR_TYPE_W)
5822 /* write-high */
5823 __set_bit(msr, msr_bitmap + 0xc00 / f);
5824
5825 }
5826}
5827
5828static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
5829 u32 msr, int type, bool value)
5830{
5831 if (value)
5832 vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
5833 else
5834 vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
5835}
5836
Wincy Vanf2b93282015-02-03 23:56:03 +08005837/*
5838 * If a msr is allowed by L0, we should check whether it is allowed by L1.
5839 * The corresponding bit will be cleared unless both of L0 and L1 allow it.
5840 */
5841static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
5842 unsigned long *msr_bitmap_nested,
5843 u32 msr, int type)
5844{
5845 int f = sizeof(unsigned long);
5846
Wincy Vanf2b93282015-02-03 23:56:03 +08005847 /*
5848 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5849 * have the write-low and read-high bitmap offsets the wrong way round.
5850 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5851 */
5852 if (msr <= 0x1fff) {
5853 if (type & MSR_TYPE_R &&
5854 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
5855 /* read-low */
5856 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
5857
5858 if (type & MSR_TYPE_W &&
5859 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
5860 /* write-low */
5861 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
5862
5863 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5864 msr &= 0x1fff;
5865 if (type & MSR_TYPE_R &&
5866 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
5867 /* read-high */
5868 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
5869
5870 if (type & MSR_TYPE_W &&
5871 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
5872 /* write-high */
5873 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
5874
5875 }
5876}
5877
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005878static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
Avi Kivity58972972009-02-24 22:26:47 +02005879{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005880 u8 mode = 0;
5881
5882 if (cpu_has_secondary_exec_ctrls() &&
5883 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
5884 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
5885 mode |= MSR_BITMAP_MODE_X2APIC;
5886 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
5887 mode |= MSR_BITMAP_MODE_X2APIC_APICV;
5888 }
5889
5890 if (is_long_mode(vcpu))
5891 mode |= MSR_BITMAP_MODE_LM;
5892
5893 return mode;
Yang Zhang8d146952013-01-25 10:18:50 +08005894}
5895
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005896#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
5897
5898static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
5899 u8 mode)
Yang Zhang8d146952013-01-25 10:18:50 +08005900{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005901 int msr;
5902
5903 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
5904 unsigned word = msr / BITS_PER_LONG;
5905 msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
5906 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005907 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005908
5909 if (mode & MSR_BITMAP_MODE_X2APIC) {
5910 /*
5911 * TPR reads and writes can be virtualized even if virtual interrupt
5912 * delivery is not in use.
5913 */
5914 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
5915 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
5916 vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
5917 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
5918 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
5919 }
5920 }
5921}
5922
5923static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
5924{
5925 struct vcpu_vmx *vmx = to_vmx(vcpu);
5926 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
5927 u8 mode = vmx_msr_bitmap_mode(vcpu);
5928 u8 changed = mode ^ vmx->msr_bitmap_mode;
5929
5930 if (!changed)
5931 return;
5932
5933 vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
5934 !(mode & MSR_BITMAP_MODE_LM));
5935
5936 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
5937 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
5938
5939 vmx->msr_bitmap_mode = mode;
Avi Kivity58972972009-02-24 22:26:47 +02005940}
5941
Suravee Suthikulpanitb2a05fe2017-09-12 10:42:41 -05005942static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02005943{
Andrey Smetanind62caab2015-11-10 15:36:33 +03005944 return enable_apicv;
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02005945}
5946
David Matlackc9f04402017-08-01 14:00:40 -07005947static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
5948{
5949 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5950 gfn_t gfn;
5951
5952 /*
5953 * Don't need to mark the APIC access page dirty; it is never
5954 * written to by the CPU during APIC virtualization.
5955 */
5956
5957 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
5958 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
5959 kvm_vcpu_mark_page_dirty(vcpu, gfn);
5960 }
5961
5962 if (nested_cpu_has_posted_intr(vmcs12)) {
5963 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
5964 kvm_vcpu_mark_page_dirty(vcpu, gfn);
5965 }
5966}
5967
5968
David Hildenbrand6342c502017-01-25 11:58:58 +01005969static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Wincy Van705699a2015-02-03 23:58:17 +08005970{
5971 struct vcpu_vmx *vmx = to_vmx(vcpu);
5972 int max_irr;
5973 void *vapic_page;
5974 u16 status;
5975
David Matlackc9f04402017-08-01 14:00:40 -07005976 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
5977 return;
Wincy Van705699a2015-02-03 23:58:17 +08005978
David Matlackc9f04402017-08-01 14:00:40 -07005979 vmx->nested.pi_pending = false;
5980 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
5981 return;
Wincy Van705699a2015-02-03 23:58:17 +08005982
David Matlackc9f04402017-08-01 14:00:40 -07005983 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
5984 if (max_irr != 256) {
Wincy Van705699a2015-02-03 23:58:17 +08005985 vapic_page = kmap(vmx->nested.virtual_apic_page);
Liran Alone7387b02017-12-24 18:12:54 +02005986 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
5987 vapic_page, &max_irr);
Wincy Van705699a2015-02-03 23:58:17 +08005988 kunmap(vmx->nested.virtual_apic_page);
5989
5990 status = vmcs_read16(GUEST_INTR_STATUS);
5991 if ((u8)max_irr > ((u8)status & 0xff)) {
5992 status &= ~0xff;
5993 status |= (u8)max_irr;
5994 vmcs_write16(GUEST_INTR_STATUS, status);
5995 }
5996 }
David Matlackc9f04402017-08-01 14:00:40 -07005997
5998 nested_mark_vmcs12_pages_dirty(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08005999}
6000
Wincy Van06a55242017-04-28 13:13:59 +08006001static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
6002 bool nested)
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006003{
6004#ifdef CONFIG_SMP
Wincy Van06a55242017-04-28 13:13:59 +08006005 int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
6006
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006007 if (vcpu->mode == IN_GUEST_MODE) {
Feng Wu28b835d2015-09-18 22:29:54 +08006008 /*
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006009 * The vector of interrupt to be delivered to vcpu had
6010 * been set in PIR before this function.
Feng Wu28b835d2015-09-18 22:29:54 +08006011 *
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006012 * Following cases will be reached in this block, and
6013 * we always send a notification event in all cases as
6014 * explained below.
6015 *
6016 * Case 1: vcpu keeps in non-root mode. Sending a
6017 * notification event posts the interrupt to vcpu.
6018 *
6019 * Case 2: vcpu exits to root mode and is still
6020 * runnable. PIR will be synced to vIRR before the
6021 * next vcpu entry. Sending a notification event in
6022 * this case has no effect, as vcpu is not in root
6023 * mode.
6024 *
6025 * Case 3: vcpu exits to root mode and is blocked.
6026 * vcpu_block() has already synced PIR to vIRR and
6027 * never blocks vcpu if vIRR is not cleared. Therefore,
6028 * a blocked vcpu here does not wait for any requested
6029 * interrupts in PIR, and sending a notification event
6030 * which has no effect is safe here.
Feng Wu28b835d2015-09-18 22:29:54 +08006031 */
Feng Wu28b835d2015-09-18 22:29:54 +08006032
Wincy Van06a55242017-04-28 13:13:59 +08006033 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006034 return true;
6035 }
6036#endif
6037 return false;
6038}
6039
Wincy Van705699a2015-02-03 23:58:17 +08006040static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
6041 int vector)
6042{
6043 struct vcpu_vmx *vmx = to_vmx(vcpu);
6044
6045 if (is_guest_mode(vcpu) &&
6046 vector == vmx->nested.posted_intr_nv) {
Wincy Van705699a2015-02-03 23:58:17 +08006047 /*
6048 * If a posted intr is not recognized by hardware,
6049 * we will accomplish it in the next vmentry.
6050 */
6051 vmx->nested.pi_pending = true;
6052 kvm_make_request(KVM_REQ_EVENT, vcpu);
Liran Alon6b697712017-11-09 20:27:20 +02006053 /* the PIR and ON have been set by L1. */
6054 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
6055 kvm_vcpu_kick(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08006056 return 0;
6057 }
6058 return -1;
6059}
Avi Kivity6aa8b732006-12-10 02:21:36 -08006060/*
Yang Zhanga20ed542013-04-11 19:25:15 +08006061 * Send interrupt to vcpu via posted interrupt way.
6062 * 1. If target vcpu is running(non-root mode), send posted interrupt
6063 * notification to vcpu and hardware will sync PIR to vIRR atomically.
6064 * 2. If target vcpu isn't running(root mode), kick it to pick up the
6065 * interrupt from PIR in next vmentry.
6066 */
6067static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
6068{
6069 struct vcpu_vmx *vmx = to_vmx(vcpu);
6070 int r;
6071
Wincy Van705699a2015-02-03 23:58:17 +08006072 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
6073 if (!r)
6074 return;
6075
Yang Zhanga20ed542013-04-11 19:25:15 +08006076 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
6077 return;
6078
Paolo Bonzinib95234c2016-12-19 13:57:33 +01006079 /* If a previous notification has sent the IPI, nothing to do. */
6080 if (pi_test_and_set_on(&vmx->pi_desc))
6081 return;
6082
Wincy Van06a55242017-04-28 13:13:59 +08006083 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
Yang Zhanga20ed542013-04-11 19:25:15 +08006084 kvm_vcpu_kick(vcpu);
6085}
6086
Avi Kivity6aa8b732006-12-10 02:21:36 -08006087/*
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006088 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
6089 * will not change in the lifetime of the guest.
6090 * Note that host-state that does change is set elsewhere. E.g., host-state
6091 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
6092 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006093static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006094{
6095 u32 low32, high32;
6096 unsigned long tmpl;
6097 struct desc_ptr dt;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006098 unsigned long cr0, cr3, cr4;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006099
Andy Lutomirski04ac88a2016-10-31 15:18:45 -07006100 cr0 = read_cr0();
6101 WARN_ON(cr0 & X86_CR0_TS);
6102 vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006103
6104 /*
6105 * Save the most likely value for this task's CR3 in the VMCS.
6106 * We can't use __get_current_cr3_fast() because we're not atomic.
6107 */
Andy Lutomirski6c690ee2017-06-12 10:26:14 -07006108 cr3 = __read_cr3();
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006109 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006110 vmx->loaded_vmcs->host_state.cr3 = cr3;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006111
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006112 /* Save the most likely value for this task's CR4 in the VMCS. */
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07006113 cr4 = cr4_read_shadow();
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006114 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006115 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006116
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006117 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006118#ifdef CONFIG_X86_64
6119 /*
6120 * Load null selectors, so we can avoid reloading them in
Sean Christopherson6d6095b2018-07-23 12:32:44 -07006121 * vmx_prepare_switch_to_host(), in case userspace uses
6122 * the null selectors too (the expected case).
Avi Kivityb2da15a2012-05-13 19:53:24 +03006123 */
6124 vmcs_write16(HOST_DS_SELECTOR, 0);
6125 vmcs_write16(HOST_ES_SELECTOR, 0);
6126#else
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006127 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6128 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006129#endif
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006130 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6131 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
6132
Juergen Gross87930012017-09-04 12:25:27 +02006133 store_idt(&dt);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006134 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006135 vmx->host_idt_base = dt.address;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006136
Avi Kivity83287ea422012-09-16 15:10:57 +03006137 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006138
6139 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
6140 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
6141 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
6142 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
6143
6144 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
6145 rdmsr(MSR_IA32_CR_PAT, low32, high32);
6146 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
6147 }
6148}
6149
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006150static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
6151{
6152 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
6153 if (enable_ept)
6154 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03006155 if (is_guest_mode(&vmx->vcpu))
6156 vmx->vcpu.arch.cr4_guest_owned_bits &=
6157 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006158 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
6159}
6160
Yang Zhang01e439b2013-04-11 19:25:12 +08006161static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
6162{
6163 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
6164
Andrey Smetanind62caab2015-11-10 15:36:33 +03006165 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
Yang Zhang01e439b2013-04-11 19:25:12 +08006166 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006167
6168 if (!enable_vnmi)
6169 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
6170
Yunhong Jiang64672c92016-06-13 14:19:59 -07006171 /* Enable the preemption timer dynamically */
6172 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08006173 return pin_based_exec_ctrl;
6174}
6175
Andrey Smetanind62caab2015-11-10 15:36:33 +03006176static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
6177{
6178 struct vcpu_vmx *vmx = to_vmx(vcpu);
6179
6180 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Roman Kagan3ce424e2016-05-18 17:48:20 +03006181 if (cpu_has_secondary_exec_ctrls()) {
6182 if (kvm_vcpu_apicv_active(vcpu))
6183 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
6184 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6185 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6186 else
6187 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
6188 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6189 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6190 }
6191
6192 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006193 vmx_update_msr_bitmap(vcpu);
Andrey Smetanind62caab2015-11-10 15:36:33 +03006194}
6195
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006196static u32 vmx_exec_control(struct vcpu_vmx *vmx)
6197{
6198 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
Paolo Bonzinid16c2932014-02-21 10:36:37 +01006199
6200 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
6201 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
6202
Paolo Bonzini35754c92015-07-29 12:05:37 +02006203 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006204 exec_control &= ~CPU_BASED_TPR_SHADOW;
6205#ifdef CONFIG_X86_64
6206 exec_control |= CPU_BASED_CR8_STORE_EXITING |
6207 CPU_BASED_CR8_LOAD_EXITING;
6208#endif
6209 }
6210 if (!enable_ept)
6211 exec_control |= CPU_BASED_CR3_STORE_EXITING |
6212 CPU_BASED_CR3_LOAD_EXITING |
6213 CPU_BASED_INVLPG_EXITING;
Wanpeng Li4d5422c2018-03-12 04:53:02 -07006214 if (kvm_mwait_in_guest(vmx->vcpu.kvm))
6215 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
6216 CPU_BASED_MONITOR_EXITING);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006217 if (kvm_hlt_in_guest(vmx->vcpu.kvm))
6218 exec_control &= ~CPU_BASED_HLT_EXITING;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006219 return exec_control;
6220}
6221
Jim Mattson45ec3682017-08-23 16:32:04 -07006222static bool vmx_rdrand_supported(void)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006223{
Jim Mattson45ec3682017-08-23 16:32:04 -07006224 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006225 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006226}
6227
Jim Mattson75f4fc82017-08-23 16:32:03 -07006228static bool vmx_rdseed_supported(void)
6229{
6230 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006231 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006232}
6233
Paolo Bonzini80154d72017-08-24 13:55:35 +02006234static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006235{
Paolo Bonzini80154d72017-08-24 13:55:35 +02006236 struct kvm_vcpu *vcpu = &vmx->vcpu;
6237
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006238 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006239
Paolo Bonzini80154d72017-08-24 13:55:35 +02006240 if (!cpu_need_virtualize_apic_accesses(vcpu))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006241 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6242 if (vmx->vpid == 0)
6243 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
6244 if (!enable_ept) {
6245 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
6246 enable_unrestricted_guest = 0;
6247 }
6248 if (!enable_unrestricted_guest)
6249 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
Wanpeng Lib31c1142018-03-12 04:53:04 -07006250 if (kvm_pause_in_guest(vmx->vcpu.kvm))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006251 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
Paolo Bonzini80154d72017-08-24 13:55:35 +02006252 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhangc7c9c562013-01-25 10:18:51 +08006253 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
6254 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang8d146952013-01-25 10:18:50 +08006255 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006256
6257 /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
6258 * in vmx_set_cr4. */
6259 exec_control &= ~SECONDARY_EXEC_DESC;
6260
Abel Gordonabc4fc52013-04-18 14:35:25 +03006261 /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
6262 (handle_vmptrld).
6263 We can NOT enable shadow_vmcs here because we don't have yet
6264 a current VMCS12
6265 */
6266 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
Kai Huanga3eaa862015-11-04 13:46:05 +08006267
6268 if (!enable_pml)
6269 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Kai Huang843e4332015-01-28 10:54:28 +08006270
Paolo Bonzini3db13482017-08-24 14:48:03 +02006271 if (vmx_xsaves_supported()) {
6272 /* Exposing XSAVES only when XSAVE is exposed */
6273 bool xsaves_enabled =
6274 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
6275 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
6276
6277 if (!xsaves_enabled)
6278 exec_control &= ~SECONDARY_EXEC_XSAVES;
6279
6280 if (nested) {
6281 if (xsaves_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006282 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006283 SECONDARY_EXEC_XSAVES;
6284 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006285 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006286 ~SECONDARY_EXEC_XSAVES;
6287 }
6288 }
6289
Paolo Bonzini80154d72017-08-24 13:55:35 +02006290 if (vmx_rdtscp_supported()) {
6291 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
6292 if (!rdtscp_enabled)
6293 exec_control &= ~SECONDARY_EXEC_RDTSCP;
6294
6295 if (nested) {
6296 if (rdtscp_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006297 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006298 SECONDARY_EXEC_RDTSCP;
6299 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006300 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006301 ~SECONDARY_EXEC_RDTSCP;
6302 }
6303 }
6304
6305 if (vmx_invpcid_supported()) {
6306 /* Exposing INVPCID only when PCID is exposed */
6307 bool invpcid_enabled =
6308 guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
6309 guest_cpuid_has(vcpu, X86_FEATURE_PCID);
6310
6311 if (!invpcid_enabled) {
6312 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
6313 guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
6314 }
6315
6316 if (nested) {
6317 if (invpcid_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006318 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006319 SECONDARY_EXEC_ENABLE_INVPCID;
6320 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006321 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006322 ~SECONDARY_EXEC_ENABLE_INVPCID;
6323 }
6324 }
6325
Jim Mattson45ec3682017-08-23 16:32:04 -07006326 if (vmx_rdrand_supported()) {
6327 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
6328 if (rdrand_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006329 exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006330
6331 if (nested) {
6332 if (rdrand_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006333 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006334 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006335 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006336 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006337 ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006338 }
6339 }
6340
Jim Mattson75f4fc82017-08-23 16:32:03 -07006341 if (vmx_rdseed_supported()) {
6342 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
6343 if (rdseed_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006344 exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006345
6346 if (nested) {
6347 if (rdseed_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006348 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006349 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006350 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006351 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006352 ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006353 }
6354 }
6355
Paolo Bonzini80154d72017-08-24 13:55:35 +02006356 vmx->secondary_exec_control = exec_control;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006357}
6358
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006359static void ept_set_mmio_spte_mask(void)
6360{
6361 /*
6362 * EPT Misconfigurations can be generated if the value of bits 2:0
6363 * of an EPT paging-structure entry is 110b (write/execute).
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006364 */
Peter Feinerdcdca5f2017-06-30 17:26:30 -07006365 kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
6366 VMX_EPT_MISCONFIG_WX_VALUE);
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006367}
6368
Wanpeng Lif53cd632014-12-02 19:14:58 +08006369#define VMX_XSS_EXIT_BITMAP 0
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006370/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08006371 * Sets up the vmcs for emulated real mode.
6372 */
David Hildenbrand12d79912017-08-24 20:51:26 +02006373static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006374{
Avi Kivity6aa8b732006-12-10 02:21:36 -08006375 int i;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006376
Abel Gordon4607c2d2013-04-18 14:35:55 +03006377 if (enable_shadow_vmcs) {
Jim Mattsonf4160e42018-05-29 09:11:33 -07006378 /*
6379 * At vCPU creation, "VMWRITE to any supported field
6380 * in the VMCS" is supported, so use the more
6381 * permissive vmx_vmread_bitmap to specify both read
6382 * and write permissions for the shadow VMCS.
6383 */
Abel Gordon4607c2d2013-04-18 14:35:55 +03006384 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
Jim Mattsonf4160e42018-05-29 09:11:33 -07006385 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
Abel Gordon4607c2d2013-04-18 14:35:55 +03006386 }
Sheng Yang25c5f222008-03-28 13:18:56 +08006387 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006388 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
Sheng Yang25c5f222008-03-28 13:18:56 +08006389
Avi Kivity6aa8b732006-12-10 02:21:36 -08006390 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
6391
Avi Kivity6aa8b732006-12-10 02:21:36 -08006392 /* Control */
Yang Zhang01e439b2013-04-11 19:25:12 +08006393 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Yunhong Jiang64672c92016-06-13 14:19:59 -07006394 vmx->hv_deadline_tsc = -1;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08006395
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006396 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006397
Dan Williamsdfa169b2016-06-02 11:17:24 -07006398 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +02006399 vmx_compute_secondary_exec_control(vmx);
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006400 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini80154d72017-08-24 13:55:35 +02006401 vmx->secondary_exec_control);
Dan Williamsdfa169b2016-06-02 11:17:24 -07006402 }
Sheng Yangf78e0e22007-10-29 09:40:42 +08006403
Andrey Smetanind62caab2015-11-10 15:36:33 +03006404 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08006405 vmcs_write64(EOI_EXIT_BITMAP0, 0);
6406 vmcs_write64(EOI_EXIT_BITMAP1, 0);
6407 vmcs_write64(EOI_EXIT_BITMAP2, 0);
6408 vmcs_write64(EOI_EXIT_BITMAP3, 0);
6409
6410 vmcs_write16(GUEST_INTR_STATUS, 0);
Yang Zhang01e439b2013-04-11 19:25:12 +08006411
Li RongQing0bcf2612015-12-03 13:29:34 +08006412 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
Yang Zhang01e439b2013-04-11 19:25:12 +08006413 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
Yang Zhangc7c9c562013-01-25 10:18:51 +08006414 }
6415
Wanpeng Lib31c1142018-03-12 04:53:04 -07006416 if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006417 vmcs_write32(PLE_GAP, ple_gap);
Radim Krčmářa7653ec2014-08-21 18:08:07 +02006418 vmx->ple_window = ple_window;
6419 vmx->ple_window_dirty = true;
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006420 }
6421
Xiao Guangrongc3707952011-07-12 03:28:04 +08006422 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
6423 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006424 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
6425
Avi Kivity9581d442010-10-19 16:46:55 +02006426 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
6427 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006428 vmx_set_constant_host_state(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006429 vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
6430 vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08006431
Bandan Das2a499e42017-08-03 15:54:41 -04006432 if (cpu_has_vmx_vmfunc())
6433 vmcs_write64(VM_FUNCTION_CONTROL, 0);
6434
Eddie Dong2cc51562007-05-21 07:28:09 +03006435 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
6436 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
Avi Kivity61d2ef22010-04-28 16:40:38 +03006437 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
Eddie Dong2cc51562007-05-21 07:28:09 +03006438 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
Avi Kivity61d2ef22010-04-28 16:40:38 +03006439 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006440
Radim Krčmář74545702015-04-27 15:11:25 +02006441 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
6442 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Sheng Yang468d4722008-10-09 16:01:55 +08006443
Paolo Bonzini03916db2014-07-24 14:21:57 +02006444 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08006445 u32 index = vmx_msr_index[i];
6446 u32 data_low, data_high;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006447 int j = vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006448
6449 if (rdmsr_safe(index, &data_low, &data_high) < 0)
6450 continue;
Avi Kivity432bd6c2007-01-31 23:48:13 -08006451 if (wrmsr_safe(index, data_low, data_high) < 0)
6452 continue;
Avi Kivity26bb0982009-09-07 11:14:12 +03006453 vmx->guest_msrs[j].index = i;
6454 vmx->guest_msrs[j].data = 0;
Avi Kivityd5696722009-12-02 12:28:47 +02006455 vmx->guest_msrs[j].mask = -1ull;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006456 ++vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006457 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006458
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01006459 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
6460 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
Gleb Natapov2961e8762013-11-25 15:37:13 +02006461
6462 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006463
6464 /* 22.2.1, 20.8.1 */
Gleb Natapov2961e8762013-11-25 15:37:13 +02006465 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03006466
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006467 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
6468 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
6469
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006470 set_cr4_guest_host_mask(vmx);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006471
Wanpeng Lif53cd632014-12-02 19:14:58 +08006472 if (vmx_xsaves_supported())
6473 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
6474
Peter Feiner4e595162016-07-07 14:49:58 -07006475 if (enable_pml) {
6476 ASSERT(vmx->pml_pg);
6477 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
6478 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6479 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006480}
6481
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006482static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006483{
6484 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka58cb6282014-01-24 16:48:44 +01006485 struct msr_data apic_base_msr;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006486 u64 cr0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006487
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006488 vmx->rmode.vm86_active = 0;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01006489 vmx->spec_ctrl = 0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006490
Wanpeng Li518e7b92018-02-28 14:03:31 +08006491 vcpu->arch.microcode_version = 0x100000000ULL;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08006492 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006493 kvm_set_cr8(vcpu, 0);
6494
6495 if (!init_event) {
6496 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
6497 MSR_IA32_APICBASE_ENABLE;
6498 if (kvm_vcpu_is_reset_bsp(vcpu))
6499 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
6500 apic_base_msr.host_initiated = true;
6501 kvm_set_apic_base(vcpu, &apic_base_msr);
6502 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006503
Avi Kivity2fb92db2011-04-27 19:42:18 +03006504 vmx_segment_cache_clear(vmx);
6505
Avi Kivity5706be02008-08-20 15:07:31 +03006506 seg_setup(VCPU_SREG_CS);
Jan Kiszka66450a22013-03-13 12:42:34 +01006507 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006508 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006509
6510 seg_setup(VCPU_SREG_DS);
6511 seg_setup(VCPU_SREG_ES);
6512 seg_setup(VCPU_SREG_FS);
6513 seg_setup(VCPU_SREG_GS);
6514 seg_setup(VCPU_SREG_SS);
6515
6516 vmcs_write16(GUEST_TR_SELECTOR, 0);
6517 vmcs_writel(GUEST_TR_BASE, 0);
6518 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
6519 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
6520
6521 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
6522 vmcs_writel(GUEST_LDTR_BASE, 0);
6523 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
6524 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
6525
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006526 if (!init_event) {
6527 vmcs_write32(GUEST_SYSENTER_CS, 0);
6528 vmcs_writel(GUEST_SYSENTER_ESP, 0);
6529 vmcs_writel(GUEST_SYSENTER_EIP, 0);
6530 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
6531 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006532
Wanpeng Lic37c2872017-11-20 14:52:21 -08006533 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
Jan Kiszka66450a22013-03-13 12:42:34 +01006534 kvm_rip_write(vcpu, 0xfff0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006535
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006536 vmcs_writel(GUEST_GDTR_BASE, 0);
6537 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
6538
6539 vmcs_writel(GUEST_IDTR_BASE, 0);
6540 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
6541
Anthony Liguori443381a2010-12-06 10:53:38 -06006542 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006543 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006544 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
Wanpeng Lia554d202017-10-11 05:10:19 -07006545 if (kvm_mpx_supported())
6546 vmcs_write64(GUEST_BNDCFGS, 0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006547
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006548 setup_msrs(vmx);
6549
Avi Kivity6aa8b732006-12-10 02:21:36 -08006550 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
6551
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006552 if (cpu_has_vmx_tpr_shadow() && !init_event) {
Sheng Yangf78e0e22007-10-29 09:40:42 +08006553 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
Paolo Bonzini35754c92015-07-29 12:05:37 +02006554 if (cpu_need_tpr_shadow(vcpu))
Sheng Yangf78e0e22007-10-29 09:40:42 +08006555 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006556 __pa(vcpu->arch.apic->regs));
Sheng Yangf78e0e22007-10-29 09:40:42 +08006557 vmcs_write32(TPR_THRESHOLD, 0);
6558 }
6559
Paolo Bonzinia73896c2014-11-02 07:54:30 +01006560 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006561
Sheng Yang2384d2b2008-01-17 15:14:33 +08006562 if (vmx->vpid != 0)
6563 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
6564
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006565 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006566 vmx->vcpu.arch.cr0 = cr0;
Bruce Rogersf2463242016-04-28 14:49:21 -06006567 vmx_set_cr0(vcpu, cr0); /* enter rmode */
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006568 vmx_set_cr4(vcpu, 0);
Paolo Bonzini56908912015-10-19 11:30:19 +02006569 vmx_set_efer(vcpu, 0);
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006570
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006571 update_exception_bitmap(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006572
Wanpeng Lidd5f5342015-09-23 18:26:57 +08006573 vpid_sync_context(vmx->vpid);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006574 if (init_event)
6575 vmx_clear_hlt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006576}
6577
Nadav Har'Elb6f12502011-05-25 23:13:06 +03006578/*
6579 * In nested virtualization, check if L1 asked to exit on external interrupts.
6580 * For most existing hypervisors, this will always return true.
6581 */
6582static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
6583{
6584 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
6585 PIN_BASED_EXT_INTR_MASK;
6586}
6587
Bandan Das77b0f5d2014-04-19 18:17:45 -04006588/*
6589 * In nested virtualization, check if L1 has set
6590 * VM_EXIT_ACK_INTR_ON_EXIT
6591 */
6592static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
6593{
6594 return get_vmcs12(vcpu)->vm_exit_controls &
6595 VM_EXIT_ACK_INTR_ON_EXIT;
6596}
6597
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006598static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
6599{
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05006600 return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006601}
6602
Jan Kiszkac9a79532014-03-07 20:03:15 +01006603static void enable_irq_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006604{
Paolo Bonzini47c01522016-12-19 11:44:07 +01006605 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6606 CPU_BASED_VIRTUAL_INTR_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006607}
6608
Jan Kiszkac9a79532014-03-07 20:03:15 +01006609static void enable_nmi_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006610{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006611 if (!enable_vnmi ||
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006612 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
Jan Kiszkac9a79532014-03-07 20:03:15 +01006613 enable_irq_window(vcpu);
6614 return;
6615 }
Jan Kiszka03b28f82013-04-29 16:46:42 +02006616
Paolo Bonzini47c01522016-12-19 11:44:07 +01006617 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6618 CPU_BASED_VIRTUAL_NMI_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006619}
6620
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006621static void vmx_inject_irq(struct kvm_vcpu *vcpu)
Eddie Dong85f455f2007-07-06 12:20:49 +03006622{
Avi Kivity9c8cba32007-11-22 11:42:59 +02006623 struct vcpu_vmx *vmx = to_vmx(vcpu);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006624 uint32_t intr;
6625 int irq = vcpu->arch.interrupt.nr;
Avi Kivity9c8cba32007-11-22 11:42:59 +02006626
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006627 trace_kvm_inj_virq(irq);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04006628
Avi Kivityfa89a812008-09-01 15:57:51 +03006629 ++vcpu->stat.irq_injections;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006630 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006631 int inc_eip = 0;
6632 if (vcpu->arch.interrupt.soft)
6633 inc_eip = vcpu->arch.event_exit_inst_len;
6634 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006635 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006636 return;
6637 }
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006638 intr = irq | INTR_INFO_VALID_MASK;
6639 if (vcpu->arch.interrupt.soft) {
6640 intr |= INTR_TYPE_SOFT_INTR;
6641 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6642 vmx->vcpu.arch.event_exit_inst_len);
6643 } else
6644 intr |= INTR_TYPE_EXT_INTR;
6645 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006646
6647 vmx_clear_hlt(vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006648}
6649
Sheng Yangf08864b2008-05-15 18:23:25 +08006650static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
6651{
Jan Kiszka66a5a342008-09-26 09:30:51 +02006652 struct vcpu_vmx *vmx = to_vmx(vcpu);
6653
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006654 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006655 /*
6656 * Tracking the NMI-blocked state in software is built upon
6657 * finding the next open IRQ window. This, in turn, depends on
6658 * well-behaving guests: They have to keep IRQs disabled at
6659 * least as long as the NMI handler runs. Otherwise we may
6660 * cause NMI nesting, maybe breaking the guest. But as this is
6661 * highly unlikely, we can live with the residual risk.
6662 */
6663 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
6664 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6665 }
6666
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006667 ++vcpu->stat.nmi_injections;
6668 vmx->loaded_vmcs->nmi_known_unmasked = false;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006669
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006670 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006671 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006672 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka66a5a342008-09-26 09:30:51 +02006673 return;
6674 }
Wanpeng Lic5a6d5f2016-09-22 17:55:54 +08006675
Sheng Yangf08864b2008-05-15 18:23:25 +08006676 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6677 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006678
6679 vmx_clear_hlt(vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08006680}
6681
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006682static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
6683{
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006684 struct vcpu_vmx *vmx = to_vmx(vcpu);
6685 bool masked;
6686
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006687 if (!enable_vnmi)
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006688 return vmx->loaded_vmcs->soft_vnmi_blocked;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006689 if (vmx->loaded_vmcs->nmi_known_unmasked)
Avi Kivity9d58b932011-03-07 16:52:07 +02006690 return false;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006691 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
6692 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6693 return masked;
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006694}
6695
6696static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
6697{
6698 struct vcpu_vmx *vmx = to_vmx(vcpu);
6699
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006700 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006701 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
6702 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
6703 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6704 }
6705 } else {
6706 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6707 if (masked)
6708 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6709 GUEST_INTR_STATE_NMI);
6710 else
6711 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
6712 GUEST_INTR_STATE_NMI);
6713 }
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006714}
6715
Jan Kiszka2505dc92013-04-14 12:12:47 +02006716static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
6717{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006718 if (to_vmx(vcpu)->nested.nested_run_pending)
6719 return 0;
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006720
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006721 if (!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006722 to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
6723 return 0;
6724
Jan Kiszka2505dc92013-04-14 12:12:47 +02006725 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6726 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
6727 | GUEST_INTR_STATE_NMI));
6728}
6729
Gleb Natapov78646122009-03-23 12:12:11 +02006730static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
6731{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006732 return (!to_vmx(vcpu)->nested.nested_run_pending &&
6733 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
Gleb Natapovc4282df2009-04-21 17:45:07 +03006734 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6735 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
Gleb Natapov78646122009-03-23 12:12:11 +02006736}
6737
Izik Eiduscbc94022007-10-25 00:29:55 +02006738static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
6739{
6740 int ret;
Izik Eiduscbc94022007-10-25 00:29:55 +02006741
Sean Christophersonf7eaeb02018-03-05 12:04:36 -08006742 if (enable_unrestricted_guest)
6743 return 0;
6744
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02006745 ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
6746 PAGE_SIZE * 3);
Izik Eiduscbc94022007-10-25 00:29:55 +02006747 if (ret)
6748 return ret;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006749 to_kvm_vmx(kvm)->tss_addr = addr;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02006750 return init_rmode_tss(kvm);
Izik Eiduscbc94022007-10-25 00:29:55 +02006751}
6752
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006753static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
6754{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006755 to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006756 return 0;
6757}
6758
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006759static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006760{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006761 switch (vec) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006762 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01006763 /*
6764 * Update instruction length as we may reinject the exception
6765 * from user space while in guest debugging mode.
6766 */
6767 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
6768 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006769 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006770 return false;
6771 /* fall through */
6772 case DB_VECTOR:
6773 if (vcpu->guest_debug &
6774 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
6775 return false;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006776 /* fall through */
6777 case DE_VECTOR:
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006778 case OF_VECTOR:
6779 case BR_VECTOR:
6780 case UD_VECTOR:
6781 case DF_VECTOR:
6782 case SS_VECTOR:
6783 case GP_VECTOR:
6784 case MF_VECTOR:
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006785 return true;
6786 break;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006787 }
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006788 return false;
6789}
6790
6791static int handle_rmode_exception(struct kvm_vcpu *vcpu,
6792 int vec, u32 err_code)
6793{
6794 /*
6795 * Instruction with address size override prefix opcode 0x67
6796 * Cause the #SS fault with 0 error code in VM86 mode.
6797 */
6798 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
6799 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
6800 if (vcpu->arch.halt_request) {
6801 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06006802 return kvm_vcpu_halt(vcpu);
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006803 }
6804 return 1;
6805 }
6806 return 0;
6807 }
6808
6809 /*
6810 * Forward all other exceptions that are valid in real mode.
6811 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
6812 * the required debugging infrastructure rework.
6813 */
6814 kvm_queue_exception(vcpu, vec);
6815 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006816}
6817
Andi Kleena0861c02009-06-08 17:37:09 +08006818/*
6819 * Trigger machine check on the host. We assume all the MSRs are already set up
6820 * by the CPU and that we still run on the same CPU as the MCE occurred on.
6821 * We pass a fake environment to the machine check handler because we want
6822 * the guest to be always treated like user space, no matter what context
6823 * it used internally.
6824 */
6825static void kvm_machine_check(void)
6826{
6827#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
6828 struct pt_regs regs = {
6829 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
6830 .flags = X86_EFLAGS_IF,
6831 };
6832
6833 do_machine_check(&regs, 0);
6834#endif
6835}
6836
Avi Kivity851ba692009-08-24 11:10:17 +03006837static int handle_machine_check(struct kvm_vcpu *vcpu)
Andi Kleena0861c02009-06-08 17:37:09 +08006838{
6839 /* already handled by vcpu_run */
6840 return 1;
6841}
6842
Avi Kivity851ba692009-08-24 11:10:17 +03006843static int handle_exception(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006844{
Avi Kivity1155f762007-11-22 11:30:47 +02006845 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03006846 struct kvm_run *kvm_run = vcpu->run;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006847 u32 intr_info, ex_no, error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006848 unsigned long cr2, rip, dr6;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006849 u32 vect_info;
6850 enum emulation_result er;
6851
Avi Kivity1155f762007-11-22 11:30:47 +02006852 vect_info = vmx->idt_vectoring_info;
Avi Kivity88786472011-03-07 17:39:45 +02006853 intr_info = vmx->exit_intr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006854
Andi Kleena0861c02009-06-08 17:37:09 +08006855 if (is_machine_check(intr_info))
Avi Kivity851ba692009-08-24 11:10:17 +03006856 return handle_machine_check(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08006857
Jim Mattsonef85b672016-12-12 11:01:37 -08006858 if (is_nmi(intr_info))
Avi Kivity1b6269d2007-10-09 12:12:19 +02006859 return 1; /* already handled by vmx_vcpu_run() */
Anthony Liguori2ab455c2007-04-27 09:29:49 +03006860
Wanpeng Li082d06e2018-04-03 16:28:48 -07006861 if (is_invalid_opcode(intr_info))
6862 return handle_ud(vcpu);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05006863
Avi Kivity6aa8b732006-12-10 02:21:36 -08006864 error_code = 0;
Ryan Harper2e113842008-02-11 10:26:38 -06006865 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006866 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08006867
Liran Alon9e869482018-03-12 13:12:51 +02006868 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
6869 WARN_ON_ONCE(!enable_vmware_backdoor);
6870 er = emulate_instruction(vcpu,
6871 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
6872 if (er == EMULATE_USER_EXIT)
6873 return 0;
6874 else if (er != EMULATE_DONE)
6875 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
6876 return 1;
6877 }
6878
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08006879 /*
6880 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
6881 * MMIO, it is better to report an internal error.
6882 * See the comments in vmx_handle_exit.
6883 */
6884 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
6885 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
6886 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6887 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
Radim Krčmář80f0e952015-04-02 21:11:05 +02006888 vcpu->run->internal.ndata = 3;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08006889 vcpu->run->internal.data[0] = vect_info;
6890 vcpu->run->internal.data[1] = intr_info;
Radim Krčmář80f0e952015-04-02 21:11:05 +02006891 vcpu->run->internal.data[2] = error_code;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08006892 return 0;
6893 }
6894
Avi Kivity6aa8b732006-12-10 02:21:36 -08006895 if (is_page_fault(intr_info)) {
6896 cr2 = vmcs_readl(EXIT_QUALIFICATION);
Wanpeng Li1261bfa2017-07-13 18:30:40 -07006897 /* EPT won't cause page fault directly */
6898 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
Paolo Bonzinid0006532017-08-11 18:36:43 +02006899 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006900 }
6901
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006902 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006903
6904 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
6905 return handle_rmode_exception(vcpu, ex_no, error_code);
6906
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006907 switch (ex_no) {
Eric Northup54a20552015-11-03 18:03:53 +01006908 case AC_VECTOR:
6909 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
6910 return 1;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006911 case DB_VECTOR:
6912 dr6 = vmcs_readl(EXIT_QUALIFICATION);
6913 if (!(vcpu->guest_debug &
6914 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
Jan Kiszka8246bf52014-01-04 18:47:17 +01006915 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03006916 vcpu->arch.dr6 |= dr6 | DR6_RTM;
Linus Torvalds32d43cd2018-03-20 12:16:59 -07006917 if (is_icebp(intr_info))
Huw Daviesfd2a4452014-04-16 10:02:51 +01006918 skip_emulated_instruction(vcpu);
6919
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006920 kvm_queue_exception(vcpu, DB_VECTOR);
6921 return 1;
6922 }
6923 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
6924 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
6925 /* fall through */
6926 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01006927 /*
6928 * Update instruction length as we may reinject #BP from
6929 * user space while in guest debugging mode. Reading it for
6930 * #DB as well causes no harm, it is not used in that case.
6931 */
6932 vmx->vcpu.arch.event_exit_inst_len =
6933 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006934 kvm_run->exit_reason = KVM_EXIT_DEBUG;
Avi Kivity0a434bb2011-04-28 15:59:33 +03006935 rip = kvm_rip_read(vcpu);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006936 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
6937 kvm_run->debug.arch.exception = ex_no;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006938 break;
6939 default:
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006940 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
6941 kvm_run->ex.exception = ex_no;
6942 kvm_run->ex.error_code = error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006943 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006944 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006945 return 0;
6946}
6947
Avi Kivity851ba692009-08-24 11:10:17 +03006948static int handle_external_interrupt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006949{
Avi Kivity1165f5f2007-04-19 17:27:43 +03006950 ++vcpu->stat.irq_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006951 return 1;
6952}
6953
Avi Kivity851ba692009-08-24 11:10:17 +03006954static int handle_triple_fault(struct kvm_vcpu *vcpu)
Avi Kivity988ad742007-02-12 00:54:36 -08006955{
Avi Kivity851ba692009-08-24 11:10:17 +03006956 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
Wanpeng Libbeac282017-08-09 22:33:12 -07006957 vcpu->mmio_needed = 0;
Avi Kivity988ad742007-02-12 00:54:36 -08006958 return 0;
6959}
Avi Kivity6aa8b732006-12-10 02:21:36 -08006960
Avi Kivity851ba692009-08-24 11:10:17 +03006961static int handle_io(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006962{
He, Qingbfdaab02007-09-12 14:18:28 +08006963 unsigned long exit_qualification;
Sean Christophersondca7f122018-03-08 08:57:27 -08006964 int size, in, string;
Avi Kivity039576c2007-03-20 12:46:50 +02006965 unsigned port;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006966
He, Qingbfdaab02007-09-12 14:18:28 +08006967 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity039576c2007-03-20 12:46:50 +02006968 string = (exit_qualification & 16) != 0;
Laurent Viviere70669a2007-08-05 10:36:40 +03006969
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006970 ++vcpu->stat.io_exits;
6971
Sean Christopherson432baf62018-03-08 08:57:26 -08006972 if (string)
Andre Przywara51d8b662010-12-21 11:12:02 +01006973 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006974
6975 port = exit_qualification >> 16;
6976 size = (exit_qualification & 7) + 1;
Sean Christopherson432baf62018-03-08 08:57:26 -08006977 in = (exit_qualification & 8) != 0;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006978
Sean Christophersondca7f122018-03-08 08:57:27 -08006979 return kvm_fast_pio(vcpu, size, port, in);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006980}
6981
Ingo Molnar102d8322007-02-19 14:37:47 +02006982static void
6983vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
6984{
6985 /*
6986 * Patch in the VMCALL instruction:
6987 */
6988 hypercall[0] = 0x0f;
6989 hypercall[1] = 0x01;
6990 hypercall[2] = 0xc1;
Ingo Molnar102d8322007-02-19 14:37:47 +02006991}
6992
Guo Chao0fa06072012-06-28 15:16:19 +08006993/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006994static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
6995{
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006996 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006997 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6998 unsigned long orig_val = val;
6999
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007000 /*
7001 * We get here when L2 changed cr0 in a way that did not change
7002 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007003 * but did change L0 shadowed bits. So we first calculate the
7004 * effective cr0 value that L1 would like to write into the
7005 * hardware. It consists of the L2-owned bits from the new
7006 * value combined with the L1-owned bits from L1's guest_cr0.
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007007 */
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007008 val = (val & ~vmcs12->cr0_guest_host_mask) |
7009 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
7010
David Matlack38991522016-11-29 18:14:08 -08007011 if (!nested_guest_cr0_valid(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007012 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007013
7014 if (kvm_set_cr0(vcpu, val))
7015 return 1;
7016 vmcs_writel(CR0_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007017 return 0;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007018 } else {
7019 if (to_vmx(vcpu)->nested.vmxon &&
David Matlack38991522016-11-29 18:14:08 -08007020 !nested_host_cr0_valid(vcpu, val))
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007021 return 1;
David Matlack38991522016-11-29 18:14:08 -08007022
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007023 return kvm_set_cr0(vcpu, val);
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007024 }
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007025}
7026
7027static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
7028{
7029 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007030 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7031 unsigned long orig_val = val;
7032
7033 /* analogously to handle_set_cr0 */
7034 val = (val & ~vmcs12->cr4_guest_host_mask) |
7035 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
7036 if (kvm_set_cr4(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007037 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007038 vmcs_writel(CR4_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007039 return 0;
7040 } else
7041 return kvm_set_cr4(vcpu, val);
7042}
7043
Paolo Bonzini0367f202016-07-12 10:44:55 +02007044static int handle_desc(struct kvm_vcpu *vcpu)
7045{
7046 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
7047 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
7048}
7049
Avi Kivity851ba692009-08-24 11:10:17 +03007050static int handle_cr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007051{
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007052 unsigned long exit_qualification, val;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007053 int cr;
7054 int reg;
Avi Kivity49a9b072010-06-10 17:02:14 +03007055 int err;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007056 int ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007057
He, Qingbfdaab02007-09-12 14:18:28 +08007058 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007059 cr = exit_qualification & 15;
7060 reg = (exit_qualification >> 8) & 15;
7061 switch ((exit_qualification >> 4) & 3) {
7062 case 0: /* mov to cr */
Nadav Amit1e32c072014-06-18 17:19:25 +03007063 val = kvm_register_readl(vcpu, reg);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007064 trace_kvm_cr_write(cr, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007065 switch (cr) {
7066 case 0:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007067 err = handle_set_cr0(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007068 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007069 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007070 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity23902182010-06-10 17:02:16 +03007071 err = kvm_set_cr3(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007072 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007073 case 4:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007074 err = handle_set_cr4(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007075 return kvm_complete_insn_gp(vcpu, err);
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007076 case 8: {
7077 u8 cr8_prev = kvm_get_cr8(vcpu);
Nadav Amit1e32c072014-06-18 17:19:25 +03007078 u8 cr8 = (u8)val;
Andre Przywaraeea1cff2010-12-21 11:12:00 +01007079 err = kvm_set_cr8(vcpu, cr8);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007080 ret = kvm_complete_insn_gp(vcpu, err);
Paolo Bonzini35754c92015-07-29 12:05:37 +02007081 if (lapic_in_kernel(vcpu))
Kyle Huey6affcbe2016-11-29 12:40:40 -08007082 return ret;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007083 if (cr8_prev <= cr8)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007084 return ret;
7085 /*
7086 * TODO: we might be squashing a
7087 * KVM_GUESTDBG_SINGLESTEP-triggered
7088 * KVM_EXIT_DEBUG here.
7089 */
Avi Kivity851ba692009-08-24 11:10:17 +03007090 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007091 return 0;
7092 }
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +02007093 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08007094 break;
Anthony Liguori25c4c272007-04-27 09:29:21 +03007095 case 2: /* clts */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08007096 WARN_ONCE(1, "Guest should always own CR0.TS");
7097 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
Avi Kivity4d4ec082009-12-29 18:07:30 +02007098 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
Kyle Huey6affcbe2016-11-29 12:40:40 -08007099 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007100 case 1: /*mov from cr*/
7101 switch (cr) {
7102 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007103 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity9f8fe502010-12-05 17:30:00 +02007104 val = kvm_read_cr3(vcpu);
7105 kvm_register_write(vcpu, reg, val);
7106 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007107 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007108 case 8:
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007109 val = kvm_get_cr8(vcpu);
7110 kvm_register_write(vcpu, reg, val);
7111 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007112 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007113 }
7114 break;
7115 case 3: /* lmsw */
Avi Kivitya1f83a72009-12-29 17:33:58 +02007116 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Avi Kivity4d4ec082009-12-29 18:07:30 +02007117 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
Avi Kivitya1f83a72009-12-29 17:33:58 +02007118 kvm_lmsw(vcpu, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007119
Kyle Huey6affcbe2016-11-29 12:40:40 -08007120 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007121 default:
7122 break;
7123 }
Avi Kivity851ba692009-08-24 11:10:17 +03007124 vcpu->run->exit_reason = 0;
Christoffer Dalla737f252012-06-03 21:17:48 +03007125 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
Avi Kivity6aa8b732006-12-10 02:21:36 -08007126 (int)(exit_qualification >> 4) & 3, cr);
7127 return 0;
7128}
7129
Avi Kivity851ba692009-08-24 11:10:17 +03007130static int handle_dr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007131{
He, Qingbfdaab02007-09-12 14:18:28 +08007132 unsigned long exit_qualification;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007133 int dr, dr7, reg;
7134
7135 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7136 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
7137
7138 /* First, if DR does not exist, trigger UD */
7139 if (!kvm_require_dr(vcpu, dr))
7140 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007141
Jan Kiszkaf2483412010-01-20 18:20:20 +01007142 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
Avi Kivity0a79b002009-09-01 12:03:25 +03007143 if (!kvm_require_cpl(vcpu, 0))
7144 return 1;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007145 dr7 = vmcs_readl(GUEST_DR7);
7146 if (dr7 & DR7_GD) {
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007147 /*
7148 * As the vm-exit takes precedence over the debug trap, we
7149 * need to emulate the latter, either for the host or the
7150 * guest debugging itself.
7151 */
7152 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
Avi Kivity851ba692009-08-24 11:10:17 +03007153 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007154 vcpu->run->debug.arch.dr7 = dr7;
Nadav Amit82b32772014-11-02 11:54:45 +02007155 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03007156 vcpu->run->debug.arch.exception = DB_VECTOR;
7157 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007158 return 0;
7159 } else {
Nadav Amit7305eb52014-11-02 11:54:44 +02007160 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03007161 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007162 kvm_queue_exception(vcpu, DB_VECTOR);
7163 return 1;
7164 }
7165 }
7166
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007167 if (vcpu->guest_debug == 0) {
Paolo Bonzini8f223722016-02-26 12:09:49 +01007168 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7169 CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007170
7171 /*
7172 * No more DR vmexits; force a reload of the debug registers
7173 * and reenter on this instruction. The next vmexit will
7174 * retrieve the full state of the debug registers.
7175 */
7176 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
7177 return 1;
7178 }
7179
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007180 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
7181 if (exit_qualification & TYPE_MOV_FROM_DR) {
Gleb Natapov020df072010-04-13 10:05:23 +03007182 unsigned long val;
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007183
7184 if (kvm_get_dr(vcpu, dr, &val))
7185 return 1;
7186 kvm_register_write(vcpu, reg, val);
Gleb Natapov020df072010-04-13 10:05:23 +03007187 } else
Nadav Amit57773922014-06-18 17:19:23 +03007188 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007189 return 1;
7190
Kyle Huey6affcbe2016-11-29 12:40:40 -08007191 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007192}
7193
Jan Kiszka73aaf249e2014-01-04 18:47:16 +01007194static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
7195{
7196 return vcpu->arch.dr6;
7197}
7198
7199static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
7200{
7201}
7202
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007203static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
7204{
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007205 get_debugreg(vcpu->arch.db[0], 0);
7206 get_debugreg(vcpu->arch.db[1], 1);
7207 get_debugreg(vcpu->arch.db[2], 2);
7208 get_debugreg(vcpu->arch.db[3], 3);
7209 get_debugreg(vcpu->arch.dr6, 6);
7210 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
7211
7212 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
Paolo Bonzini8f223722016-02-26 12:09:49 +01007213 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007214}
7215
Gleb Natapov020df072010-04-13 10:05:23 +03007216static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
7217{
7218 vmcs_writel(GUEST_DR7, val);
7219}
7220
Avi Kivity851ba692009-08-24 11:10:17 +03007221static int handle_cpuid(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007222{
Kyle Huey6a908b62016-11-29 12:40:37 -08007223 return kvm_emulate_cpuid(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007224}
7225
Avi Kivity851ba692009-08-24 11:10:17 +03007226static int handle_rdmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007227{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007228 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007229 struct msr_data msr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007230
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007231 msr_info.index = ecx;
7232 msr_info.host_initiated = false;
7233 if (vmx_get_msr(vcpu, &msr_info)) {
Avi Kivity59200272010-01-25 19:47:02 +02007234 trace_kvm_msr_read_ex(ecx);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007235 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007236 return 1;
7237 }
7238
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007239 trace_kvm_msr_read(ecx, msr_info.data);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007240
Avi Kivity6aa8b732006-12-10 02:21:36 -08007241 /* FIXME: handling of bits 32:63 of rax, rdx */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007242 vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
7243 vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007244 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007245}
7246
Avi Kivity851ba692009-08-24 11:10:17 +03007247static int handle_wrmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007248{
Will Auld8fe8ab42012-11-29 12:42:12 -08007249 struct msr_data msr;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007250 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
7251 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
7252 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007253
Will Auld8fe8ab42012-11-29 12:42:12 -08007254 msr.data = data;
7255 msr.index = ecx;
7256 msr.host_initiated = false;
Nadav Amit854e8bb2014-09-16 03:24:05 +03007257 if (kvm_set_msr(vcpu, &msr) != 0) {
Avi Kivity59200272010-01-25 19:47:02 +02007258 trace_kvm_msr_write_ex(ecx, data);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007259 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007260 return 1;
7261 }
7262
Avi Kivity59200272010-01-25 19:47:02 +02007263 trace_kvm_msr_write(ecx, data);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007264 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007265}
7266
Avi Kivity851ba692009-08-24 11:10:17 +03007267static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007268{
Paolo Bonzinieb90f342016-12-18 14:02:21 +01007269 kvm_apic_update_ppr(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007270 return 1;
7271}
7272
Avi Kivity851ba692009-08-24 11:10:17 +03007273static int handle_interrupt_window(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007274{
Paolo Bonzini47c01522016-12-19 11:44:07 +01007275 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7276 CPU_BASED_VIRTUAL_INTR_PENDING);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007277
Avi Kivity3842d132010-07-27 12:30:24 +03007278 kvm_make_request(KVM_REQ_EVENT, vcpu);
7279
Jan Kiszkaa26bf122008-09-26 09:30:45 +02007280 ++vcpu->stat.irq_window_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007281 return 1;
7282}
7283
Avi Kivity851ba692009-08-24 11:10:17 +03007284static int handle_halt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007285{
Avi Kivityd3bef152007-06-05 15:53:05 +03007286 return kvm_emulate_halt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007287}
7288
Avi Kivity851ba692009-08-24 11:10:17 +03007289static int handle_vmcall(struct kvm_vcpu *vcpu)
Ingo Molnarc21415e2007-02-19 14:37:47 +02007290{
Andrey Smetanin0d9c0552016-02-11 16:44:59 +03007291 return kvm_emulate_hypercall(vcpu);
Ingo Molnarc21415e2007-02-19 14:37:47 +02007292}
7293
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007294static int handle_invd(struct kvm_vcpu *vcpu)
7295{
Andre Przywara51d8b662010-12-21 11:12:02 +01007296 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007297}
7298
Avi Kivity851ba692009-08-24 11:10:17 +03007299static int handle_invlpg(struct kvm_vcpu *vcpu)
Marcelo Tosattia7052892008-09-23 13:18:35 -03007300{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007301 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007302
7303 kvm_mmu_invlpg(vcpu, exit_qualification);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007304 return kvm_skip_emulated_instruction(vcpu);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007305}
7306
Avi Kivityfee84b02011-11-10 14:57:25 +02007307static int handle_rdpmc(struct kvm_vcpu *vcpu)
7308{
7309 int err;
7310
7311 err = kvm_rdpmc(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007312 return kvm_complete_insn_gp(vcpu, err);
Avi Kivityfee84b02011-11-10 14:57:25 +02007313}
7314
Avi Kivity851ba692009-08-24 11:10:17 +03007315static int handle_wbinvd(struct kvm_vcpu *vcpu)
Eddie Donge5edaa02007-11-11 12:28:35 +02007316{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007317 return kvm_emulate_wbinvd(vcpu);
Eddie Donge5edaa02007-11-11 12:28:35 +02007318}
7319
Dexuan Cui2acf9232010-06-10 11:27:12 +08007320static int handle_xsetbv(struct kvm_vcpu *vcpu)
7321{
7322 u64 new_bv = kvm_read_edx_eax(vcpu);
7323 u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
7324
7325 if (kvm_set_xcr(vcpu, index, new_bv) == 0)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007326 return kvm_skip_emulated_instruction(vcpu);
Dexuan Cui2acf9232010-06-10 11:27:12 +08007327 return 1;
7328}
7329
Wanpeng Lif53cd632014-12-02 19:14:58 +08007330static int handle_xsaves(struct kvm_vcpu *vcpu)
7331{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007332 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007333 WARN(1, "this should never happen\n");
7334 return 1;
7335}
7336
7337static int handle_xrstors(struct kvm_vcpu *vcpu)
7338{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007339 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007340 WARN(1, "this should never happen\n");
7341 return 1;
7342}
7343
Avi Kivity851ba692009-08-24 11:10:17 +03007344static int handle_apic_access(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08007345{
Kevin Tian58fbbf22011-08-30 13:56:17 +03007346 if (likely(fasteoi)) {
7347 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7348 int access_type, offset;
7349
7350 access_type = exit_qualification & APIC_ACCESS_TYPE;
7351 offset = exit_qualification & APIC_ACCESS_OFFSET;
7352 /*
7353 * Sane guest uses MOV to write EOI, with written value
7354 * not cared. So make a short-circuit here by avoiding
7355 * heavy instruction emulation.
7356 */
7357 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
7358 (offset == APIC_EOI)) {
7359 kvm_lapic_set_eoi(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007360 return kvm_skip_emulated_instruction(vcpu);
Kevin Tian58fbbf22011-08-30 13:56:17 +03007361 }
7362 }
Andre Przywara51d8b662010-12-21 11:12:02 +01007363 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Sheng Yangf78e0e22007-10-29 09:40:42 +08007364}
7365
Yang Zhangc7c9c562013-01-25 10:18:51 +08007366static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
7367{
7368 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7369 int vector = exit_qualification & 0xff;
7370
7371 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
7372 kvm_apic_set_eoi_accelerated(vcpu, vector);
7373 return 1;
7374}
7375
Yang Zhang83d4c282013-01-25 10:18:49 +08007376static int handle_apic_write(struct kvm_vcpu *vcpu)
7377{
7378 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7379 u32 offset = exit_qualification & 0xfff;
7380
7381 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
7382 kvm_apic_write_nodecode(vcpu, offset);
7383 return 1;
7384}
7385
Avi Kivity851ba692009-08-24 11:10:17 +03007386static int handle_task_switch(struct kvm_vcpu *vcpu)
Izik Eidus37817f22008-03-24 23:14:53 +02007387{
Jan Kiszka60637aa2008-09-26 09:30:47 +02007388 struct vcpu_vmx *vmx = to_vmx(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02007389 unsigned long exit_qualification;
Jan Kiszkae269fb22010-04-14 15:51:09 +02007390 bool has_error_code = false;
7391 u32 error_code = 0;
Izik Eidus37817f22008-03-24 23:14:53 +02007392 u16 tss_selector;
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007393 int reason, type, idt_v, idt_index;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007394
7395 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007396 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007397 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
Izik Eidus37817f22008-03-24 23:14:53 +02007398
7399 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7400
7401 reason = (u32)exit_qualification >> 30;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007402 if (reason == TASK_SWITCH_GATE && idt_v) {
7403 switch (type) {
7404 case INTR_TYPE_NMI_INTR:
7405 vcpu->arch.nmi_injected = false;
Avi Kivity654f06f2011-03-23 15:02:47 +02007406 vmx_set_nmi_mask(vcpu, true);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007407 break;
7408 case INTR_TYPE_EXT_INTR:
Gleb Natapov66fd3f72009-05-11 13:35:50 +03007409 case INTR_TYPE_SOFT_INTR:
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007410 kvm_clear_interrupt_queue(vcpu);
7411 break;
7412 case INTR_TYPE_HARD_EXCEPTION:
Jan Kiszkae269fb22010-04-14 15:51:09 +02007413 if (vmx->idt_vectoring_info &
7414 VECTORING_INFO_DELIVER_CODE_MASK) {
7415 has_error_code = true;
7416 error_code =
7417 vmcs_read32(IDT_VECTORING_ERROR_CODE);
7418 }
7419 /* fall through */
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007420 case INTR_TYPE_SOFT_EXCEPTION:
7421 kvm_clear_exception_queue(vcpu);
7422 break;
7423 default:
7424 break;
7425 }
Jan Kiszka60637aa2008-09-26 09:30:47 +02007426 }
Izik Eidus37817f22008-03-24 23:14:53 +02007427 tss_selector = exit_qualification;
7428
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007429 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
7430 type != INTR_TYPE_EXT_INTR &&
7431 type != INTR_TYPE_NMI_INTR))
7432 skip_emulated_instruction(vcpu);
7433
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007434 if (kvm_task_switch(vcpu, tss_selector,
7435 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
7436 has_error_code, error_code) == EMULATE_FAIL) {
Gleb Natapovacb54512010-04-15 21:03:50 +03007437 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7438 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7439 vcpu->run->internal.ndata = 0;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007440 return 0;
Gleb Natapovacb54512010-04-15 21:03:50 +03007441 }
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007442
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007443 /*
7444 * TODO: What about debug traps on tss switch?
7445 * Are we supposed to inject them and update dr6?
7446 */
7447
7448 return 1;
Izik Eidus37817f22008-03-24 23:14:53 +02007449}
7450
Avi Kivity851ba692009-08-24 11:10:17 +03007451static int handle_ept_violation(struct kvm_vcpu *vcpu)
Sheng Yang14394422008-04-28 12:24:45 +08007452{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007453 unsigned long exit_qualification;
Sheng Yang14394422008-04-28 12:24:45 +08007454 gpa_t gpa;
Paolo Bonzinieebed242016-11-28 14:39:58 +01007455 u64 error_code;
Sheng Yang14394422008-04-28 12:24:45 +08007456
Sheng Yangf9c617f2009-03-25 10:08:52 +08007457 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Sheng Yang14394422008-04-28 12:24:45 +08007458
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007459 /*
7460 * EPT violation happened while executing iret from NMI,
7461 * "blocked by NMI" bit has to be set before next VM entry.
7462 * There are errata that may cause this bit to not be set:
7463 * AAK134, BY25.
7464 */
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007465 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007466 enable_vnmi &&
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007467 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007468 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
7469
Sheng Yang14394422008-04-28 12:24:45 +08007470 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007471 trace_kvm_page_fault(gpa, exit_qualification);
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007472
Junaid Shahid27959a42016-12-06 16:46:10 -08007473 /* Is it a read fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007474 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
Junaid Shahid27959a42016-12-06 16:46:10 -08007475 ? PFERR_USER_MASK : 0;
7476 /* Is it a write fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007477 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
Junaid Shahid27959a42016-12-06 16:46:10 -08007478 ? PFERR_WRITE_MASK : 0;
7479 /* Is it a fetch fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007480 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
Junaid Shahid27959a42016-12-06 16:46:10 -08007481 ? PFERR_FETCH_MASK : 0;
7482 /* ept page table entry is present? */
7483 error_code |= (exit_qualification &
7484 (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
7485 EPT_VIOLATION_EXECUTABLE))
7486 ? PFERR_PRESENT_MASK : 0;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007487
Paolo Bonzinieebed242016-11-28 14:39:58 +01007488 error_code |= (exit_qualification & 0x100) != 0 ?
7489 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
Yang Zhang25d92082013-08-06 12:00:32 +03007490
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007491 vcpu->arch.exit_qualification = exit_qualification;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007492 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
Sheng Yang14394422008-04-28 12:24:45 +08007493}
7494
Avi Kivity851ba692009-08-24 11:10:17 +03007495static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007496{
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007497 gpa_t gpa;
7498
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007499 /*
7500 * A nested guest cannot optimize MMIO vmexits, because we have an
7501 * nGPA here instead of the required GPA.
7502 */
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007503 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007504 if (!is_guest_mode(vcpu) &&
7505 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
Jason Wang931c33b2015-09-15 14:41:58 +08007506 trace_kvm_fast_mmio(gpa);
Vitaly Kuznetsovd391f122018-01-25 16:37:07 +01007507 /*
7508 * Doing kvm_skip_emulated_instruction() depends on undefined
7509 * behavior: Intel's manual doesn't mandate
7510 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
7511 * occurs and while on real hardware it was observed to be set,
7512 * other hypervisors (namely Hyper-V) don't set it, we end up
7513 * advancing IP with some random value. Disable fast mmio when
7514 * running nested and keep it for real hardware in hope that
7515 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
7516 */
7517 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
7518 return kvm_skip_emulated_instruction(vcpu);
7519 else
7520 return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
7521 NULL, 0) == EMULATE_DONE;
Michael S. Tsirkin68c3b4d2014-03-31 21:50:44 +03007522 }
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007523
Sean Christophersonc75d0edc2018-03-29 14:48:31 -07007524 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007525}
7526
Avi Kivity851ba692009-08-24 11:10:17 +03007527static int handle_nmi_window(struct kvm_vcpu *vcpu)
Sheng Yangf08864b2008-05-15 18:23:25 +08007528{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007529 WARN_ON_ONCE(!enable_vnmi);
Paolo Bonzini47c01522016-12-19 11:44:07 +01007530 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7531 CPU_BASED_VIRTUAL_NMI_PENDING);
Sheng Yangf08864b2008-05-15 18:23:25 +08007532 ++vcpu->stat.nmi_window_exits;
Avi Kivity3842d132010-07-27 12:30:24 +03007533 kvm_make_request(KVM_REQ_EVENT, vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08007534
7535 return 1;
7536}
7537
Mohammed Gamal80ced182009-09-01 12:48:18 +02007538static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007539{
Avi Kivity8b3079a2009-01-05 12:10:54 +02007540 struct vcpu_vmx *vmx = to_vmx(vcpu);
7541 enum emulation_result err = EMULATE_DONE;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007542 int ret = 1;
Avi Kivity49e9d552010-09-19 14:34:08 +02007543 u32 cpu_exec_ctrl;
7544 bool intr_window_requested;
Avi Kivityb8405c12012-06-07 17:08:48 +03007545 unsigned count = 130;
Avi Kivity49e9d552010-09-19 14:34:08 +02007546
Sean Christopherson2bb8caf2018-03-12 10:56:13 -07007547 /*
7548 * We should never reach the point where we are emulating L2
7549 * due to invalid guest state as that means we incorrectly
7550 * allowed a nested VMEntry with an invalid vmcs12.
7551 */
7552 WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
7553
Avi Kivity49e9d552010-09-19 14:34:08 +02007554 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
7555 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007556
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01007557 while (vmx->emulation_required && count-- != 0) {
Avi Kivitybdea48e2012-06-10 18:07:57 +03007558 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
Avi Kivity49e9d552010-09-19 14:34:08 +02007559 return handle_interrupt_window(&vmx->vcpu);
7560
Radim Krčmář72875d82017-04-26 22:32:19 +02007561 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
Avi Kivityde87dcdd2012-06-12 20:21:38 +03007562 return 1;
7563
Liran Alon9b8ae632017-11-05 16:56:34 +02007564 err = emulate_instruction(vcpu, 0);
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007565
Paolo Bonziniac0a48c2013-06-25 18:24:41 +02007566 if (err == EMULATE_USER_EXIT) {
Paolo Bonzini94452b92013-08-27 15:41:42 +02007567 ++vcpu->stat.mmio_exits;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007568 ret = 0;
7569 goto out;
7570 }
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01007571
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007572 if (err != EMULATE_DONE)
7573 goto emulation_error;
7574
7575 if (vmx->emulation_required && !vmx->rmode.vm86_active &&
7576 vcpu->arch.exception.pending)
7577 goto emulation_error;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007578
Gleb Natapov8d76c492013-05-08 18:38:44 +03007579 if (vcpu->arch.halt_request) {
7580 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06007581 ret = kvm_vcpu_halt(vcpu);
Gleb Natapov8d76c492013-05-08 18:38:44 +03007582 goto out;
7583 }
7584
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007585 if (signal_pending(current))
Mohammed Gamal80ced182009-09-01 12:48:18 +02007586 goto out;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007587 if (need_resched())
7588 schedule();
7589 }
7590
Mohammed Gamal80ced182009-09-01 12:48:18 +02007591out:
7592 return ret;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007593
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007594emulation_error:
7595 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7596 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7597 vcpu->run->internal.ndata = 0;
7598 return 0;
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007599}
7600
7601static void grow_ple_window(struct kvm_vcpu *vcpu)
7602{
7603 struct vcpu_vmx *vmx = to_vmx(vcpu);
7604 int old = vmx->ple_window;
7605
Babu Mogerc8e88712018-03-16 16:37:24 -04007606 vmx->ple_window = __grow_ple_window(old, ple_window,
7607 ple_window_grow,
7608 ple_window_max);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007609
7610 if (vmx->ple_window != old)
7611 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007612
7613 trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007614}
7615
7616static void shrink_ple_window(struct kvm_vcpu *vcpu)
7617{
7618 struct vcpu_vmx *vmx = to_vmx(vcpu);
7619 int old = vmx->ple_window;
7620
Babu Mogerc8e88712018-03-16 16:37:24 -04007621 vmx->ple_window = __shrink_ple_window(old, ple_window,
7622 ple_window_shrink,
7623 ple_window);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007624
7625 if (vmx->ple_window != old)
7626 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007627
7628 trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007629}
7630
7631/*
Feng Wubf9f6ac2015-09-18 22:29:55 +08007632 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
7633 */
7634static void wakeup_handler(void)
7635{
7636 struct kvm_vcpu *vcpu;
7637 int cpu = smp_processor_id();
7638
7639 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7640 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
7641 blocked_vcpu_list) {
7642 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7643
7644 if (pi_test_on(pi_desc) == 1)
7645 kvm_vcpu_kick(vcpu);
7646 }
7647 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7648}
7649
Peng Haoe01bca22018-04-07 05:47:32 +08007650static void vmx_enable_tdp(void)
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007651{
7652 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
7653 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
7654 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
7655 0ull, VMX_EPT_EXECUTABLE_MASK,
7656 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
Tom Lendackyd0ec49d2017-07-17 16:10:27 -05007657 VMX_EPT_RWX_MASK, 0ull);
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007658
7659 ept_set_mmio_spte_mask();
7660 kvm_enable_tdp();
7661}
7662
Tiejun Chenf2c76482014-10-28 10:14:47 +08007663static __init int hardware_setup(void)
7664{
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007665 unsigned long host_bndcfgs;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01007666 int r = -ENOMEM, i;
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007667
7668 rdmsrl_safe(MSR_EFER, &host_efer);
7669
7670 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
7671 kvm_define_shared_msr(i, vmx_msr_index[i]);
7672
Radim Krčmář23611332016-09-29 22:41:33 +02007673 for (i = 0; i < VMX_BITMAP_NR; i++) {
7674 vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
7675 if (!vmx_bitmap[i])
7676 goto out;
7677 }
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007678
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007679 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
7680 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
7681
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007682 if (setup_vmcs_config(&vmcs_config) < 0) {
7683 r = -EIO;
Radim Krčmář23611332016-09-29 22:41:33 +02007684 goto out;
Tiejun Chenbaa03522014-12-23 16:21:11 +08007685 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007686
7687 if (boot_cpu_has(X86_FEATURE_NX))
7688 kvm_enable_efer_bits(EFER_NX);
7689
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007690 if (boot_cpu_has(X86_FEATURE_MPX)) {
7691 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
7692 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
7693 }
7694
Wanpeng Li08d839c2017-03-23 05:30:08 -07007695 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
7696 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
Tiejun Chenf2c76482014-10-28 10:14:47 +08007697 enable_vpid = 0;
Wanpeng Li08d839c2017-03-23 05:30:08 -07007698
Tiejun Chenf2c76482014-10-28 10:14:47 +08007699 if (!cpu_has_vmx_ept() ||
David Hildenbrand42aa53b2017-08-10 23:15:29 +02007700 !cpu_has_vmx_ept_4levels() ||
David Hildenbrandf5f51582017-08-24 20:51:30 +02007701 !cpu_has_vmx_ept_mt_wb() ||
Wanpeng Li8ad81822017-10-09 15:51:53 -07007702 !cpu_has_vmx_invept_global())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007703 enable_ept = 0;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007704
Wanpeng Lifce6ac42017-05-11 02:58:56 -07007705 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007706 enable_ept_ad_bits = 0;
7707
Wanpeng Li8ad81822017-10-09 15:51:53 -07007708 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007709 enable_unrestricted_guest = 0;
7710
Paolo Bonziniad15a292015-01-30 16:18:49 +01007711 if (!cpu_has_vmx_flexpriority())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007712 flexpriority_enabled = 0;
7713
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007714 if (!cpu_has_virtual_nmis())
7715 enable_vnmi = 0;
7716
Paolo Bonziniad15a292015-01-30 16:18:49 +01007717 /*
7718 * set_apic_access_page_addr() is used to reload apic access
7719 * page upon invalidation. No need to do anything if not
7720 * using the APIC_ACCESS_ADDR VMCS field.
7721 */
7722 if (!flexpriority_enabled)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007723 kvm_x86_ops->set_apic_access_page_addr = NULL;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007724
7725 if (!cpu_has_vmx_tpr_shadow())
7726 kvm_x86_ops->update_cr8_intercept = NULL;
7727
7728 if (enable_ept && !cpu_has_vmx_ept_2m_page())
7729 kvm_disable_largepages();
7730
Tianyu Lan877ad952018-07-19 08:40:23 +00007731#if IS_ENABLED(CONFIG_HYPERV)
7732 if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
7733 && enable_ept)
7734 kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
7735#endif
7736
Wanpeng Li0f107682017-09-28 18:06:24 -07007737 if (!cpu_has_vmx_ple()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007738 ple_gap = 0;
Wanpeng Li0f107682017-09-28 18:06:24 -07007739 ple_window = 0;
7740 ple_window_grow = 0;
7741 ple_window_max = 0;
7742 ple_window_shrink = 0;
7743 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007744
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007745 if (!cpu_has_vmx_apicv()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007746 enable_apicv = 0;
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007747 kvm_x86_ops->sync_pir_to_irr = NULL;
7748 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007749
Haozhong Zhang64903d62015-10-20 15:39:09 +08007750 if (cpu_has_vmx_tsc_scaling()) {
7751 kvm_has_tsc_control = true;
7752 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
7753 kvm_tsc_scaling_ratio_frac_bits = 48;
7754 }
7755
Wanpeng Li04bb92e2015-09-16 19:31:11 +08007756 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
7757
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007758 if (enable_ept)
7759 vmx_enable_tdp();
7760 else
Tiejun Chenbaa03522014-12-23 16:21:11 +08007761 kvm_disable_tdp();
7762
Jim Mattson8fcc4b52018-07-10 11:27:20 +02007763 if (!nested) {
7764 kvm_x86_ops->get_nested_state = NULL;
7765 kvm_x86_ops->set_nested_state = NULL;
7766 }
7767
Kai Huang843e4332015-01-28 10:54:28 +08007768 /*
7769 * Only enable PML when hardware supports PML feature, and both EPT
7770 * and EPT A/D bit features are enabled -- PML depends on them to work.
7771 */
7772 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
7773 enable_pml = 0;
7774
7775 if (!enable_pml) {
7776 kvm_x86_ops->slot_enable_log_dirty = NULL;
7777 kvm_x86_ops->slot_disable_log_dirty = NULL;
7778 kvm_x86_ops->flush_log_dirty = NULL;
7779 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7780 }
7781
Yunhong Jiang64672c92016-06-13 14:19:59 -07007782 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
7783 u64 vmx_msr;
7784
7785 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
7786 cpu_preemption_timer_multi =
7787 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
7788 } else {
7789 kvm_x86_ops->set_hv_timer = NULL;
7790 kvm_x86_ops->cancel_hv_timer = NULL;
7791 }
7792
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01007793 if (!cpu_has_vmx_shadow_vmcs())
7794 enable_shadow_vmcs = 0;
7795 if (enable_shadow_vmcs)
7796 init_vmcs_shadow_fields();
7797
Feng Wubf9f6ac2015-09-18 22:29:55 +08007798 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
Paolo Bonzini13893092018-02-26 13:40:09 +01007799 nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
Feng Wubf9f6ac2015-09-18 22:29:55 +08007800
Ashok Rajc45dcc72016-06-22 14:59:56 +08007801 kvm_mce_cap_supported |= MCG_LMCE_P;
7802
Tiejun Chenf2c76482014-10-28 10:14:47 +08007803 return alloc_kvm_area();
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007804
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007805out:
Radim Krčmář23611332016-09-29 22:41:33 +02007806 for (i = 0; i < VMX_BITMAP_NR; i++)
7807 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007808
7809 return r;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007810}
7811
7812static __exit void hardware_unsetup(void)
7813{
Radim Krčmář23611332016-09-29 22:41:33 +02007814 int i;
7815
7816 for (i = 0; i < VMX_BITMAP_NR; i++)
7817 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007818
Tiejun Chenf2c76482014-10-28 10:14:47 +08007819 free_kvm_area();
7820}
7821
Avi Kivity6aa8b732006-12-10 02:21:36 -08007822/*
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08007823 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
7824 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
7825 */
Marcelo Tosatti9fb41ba2009-10-12 19:37:31 -03007826static int handle_pause(struct kvm_vcpu *vcpu)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08007827{
Wanpeng Lib31c1142018-03-12 04:53:04 -07007828 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007829 grow_ple_window(vcpu);
7830
Longpeng(Mike)de63ad42017-08-08 12:05:33 +08007831 /*
7832 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
7833 * VM-execution control is ignored if CPL > 0. OTOH, KVM
7834 * never set PAUSE_EXITING and just set PLE if supported,
7835 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
7836 */
7837 kvm_vcpu_on_spin(vcpu, true);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007838 return kvm_skip_emulated_instruction(vcpu);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08007839}
7840
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04007841static int handle_nop(struct kvm_vcpu *vcpu)
Sheng Yang59708672009-12-15 13:29:54 +08007842{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007843 return kvm_skip_emulated_instruction(vcpu);
Sheng Yang59708672009-12-15 13:29:54 +08007844}
7845
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04007846static int handle_mwait(struct kvm_vcpu *vcpu)
7847{
7848 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
7849 return handle_nop(vcpu);
7850}
7851
Jim Mattson45ec3682017-08-23 16:32:04 -07007852static int handle_invalid_op(struct kvm_vcpu *vcpu)
7853{
7854 kvm_queue_exception(vcpu, UD_VECTOR);
7855 return 1;
7856}
7857
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03007858static int handle_monitor_trap(struct kvm_vcpu *vcpu)
7859{
7860 return 1;
7861}
7862
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04007863static int handle_monitor(struct kvm_vcpu *vcpu)
7864{
7865 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
7866 return handle_nop(vcpu);
7867}
7868
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08007869/*
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08007870 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
7871 * set the success or error code of an emulated VMX instruction, as specified
7872 * by Vol 2B, VMX Instruction Reference, "Conventions".
7873 */
7874static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
7875{
7876 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
7877 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
7878 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
7879}
7880
7881static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
7882{
7883 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
7884 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
7885 X86_EFLAGS_SF | X86_EFLAGS_OF))
7886 | X86_EFLAGS_CF);
7887}
7888
Abel Gordon145c28d2013-04-18 14:36:55 +03007889static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08007890 u32 vm_instruction_error)
7891{
7892 if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
7893 /*
7894 * failValid writes the error number to the current VMCS, which
7895 * can't be done there isn't a current VMCS.
7896 */
7897 nested_vmx_failInvalid(vcpu);
7898 return;
7899 }
7900 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
7901 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
7902 X86_EFLAGS_SF | X86_EFLAGS_OF))
7903 | X86_EFLAGS_ZF);
7904 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
7905 /*
7906 * We don't need to force a shadow sync because
7907 * VM_INSTRUCTION_ERROR is not shadowed
7908 */
7909}
Abel Gordon145c28d2013-04-18 14:36:55 +03007910
Wincy Vanff651cb2014-12-11 08:52:58 +03007911static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
7912{
7913 /* TODO: not to reset guest simply here. */
7914 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Paolo Bonzinibbe41b92016-08-19 17:51:20 +02007915 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
Wincy Vanff651cb2014-12-11 08:52:58 +03007916}
7917
Jan Kiszkaf41245002014-03-07 20:03:13 +01007918static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
7919{
7920 struct vcpu_vmx *vmx =
7921 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
7922
7923 vmx->nested.preemption_timer_expired = true;
7924 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
7925 kvm_vcpu_kick(&vmx->vcpu);
7926
7927 return HRTIMER_NORESTART;
7928}
7929
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03007930/*
Bandan Das19677e32014-05-06 02:19:15 -04007931 * Decode the memory-address operand of a vmx instruction, as recorded on an
7932 * exit caused by such an instruction (run by a guest hypervisor).
7933 * On success, returns 0. When the operand is invalid, returns 1 and throws
7934 * #UD or #GP.
7935 */
7936static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
7937 unsigned long exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007938 u32 vmx_instruction_info, bool wr, gva_t *ret)
Bandan Das19677e32014-05-06 02:19:15 -04007939{
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007940 gva_t off;
7941 bool exn;
7942 struct kvm_segment s;
7943
Bandan Das19677e32014-05-06 02:19:15 -04007944 /*
7945 * According to Vol. 3B, "Information for VM Exits Due to Instruction
7946 * Execution", on an exit, vmx_instruction_info holds most of the
7947 * addressing components of the operand. Only the displacement part
7948 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
7949 * For how an actual address is calculated from all these components,
7950 * refer to Vol. 1, "Operand Addressing".
7951 */
7952 int scaling = vmx_instruction_info & 3;
7953 int addr_size = (vmx_instruction_info >> 7) & 7;
7954 bool is_reg = vmx_instruction_info & (1u << 10);
7955 int seg_reg = (vmx_instruction_info >> 15) & 7;
7956 int index_reg = (vmx_instruction_info >> 18) & 0xf;
7957 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
7958 int base_reg = (vmx_instruction_info >> 23) & 0xf;
7959 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
7960
7961 if (is_reg) {
7962 kvm_queue_exception(vcpu, UD_VECTOR);
7963 return 1;
7964 }
7965
7966 /* Addr = segment_base + offset */
7967 /* offset = base + [index * scale] + displacement */
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007968 off = exit_qualification; /* holds the displacement */
Bandan Das19677e32014-05-06 02:19:15 -04007969 if (base_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007970 off += kvm_register_read(vcpu, base_reg);
Bandan Das19677e32014-05-06 02:19:15 -04007971 if (index_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007972 off += kvm_register_read(vcpu, index_reg)<<scaling;
7973 vmx_get_segment(vcpu, &s, seg_reg);
7974 *ret = s.base + off;
Bandan Das19677e32014-05-06 02:19:15 -04007975
7976 if (addr_size == 1) /* 32 bit */
7977 *ret &= 0xffffffff;
7978
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007979 /* Checks for #GP/#SS exceptions. */
7980 exn = false;
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02007981 if (is_long_mode(vcpu)) {
7982 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
7983 * non-canonical form. This is the only check on the memory
7984 * destination for long mode!
7985 */
Yu Zhangfd8cb432017-08-24 20:27:56 +08007986 exn = is_noncanonical_address(*ret, vcpu);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02007987 } else if (is_protmode(vcpu)) {
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007988 /* Protected mode: apply checks for segment validity in the
7989 * following order:
7990 * - segment type check (#GP(0) may be thrown)
7991 * - usability check (#GP(0)/#SS(0))
7992 * - limit check (#GP(0)/#SS(0))
7993 */
7994 if (wr)
7995 /* #GP(0) if the destination operand is located in a
7996 * read-only data segment or any code segment.
7997 */
7998 exn = ((s.type & 0xa) == 0 || (s.type & 8));
7999 else
8000 /* #GP(0) if the source operand is located in an
8001 * execute-only code segment
8002 */
8003 exn = ((s.type & 0xa) == 8);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008004 if (exn) {
8005 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8006 return 1;
8007 }
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008008 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
8009 */
8010 exn = (s.unusable != 0);
8011 /* Protected mode: #GP(0)/#SS(0) if the memory
8012 * operand is outside the segment limit.
8013 */
8014 exn = exn || (off + sizeof(u64) > s.limit);
8015 }
8016 if (exn) {
8017 kvm_queue_exception_e(vcpu,
8018 seg_reg == VCPU_SREG_SS ?
8019 SS_VECTOR : GP_VECTOR,
8020 0);
8021 return 1;
8022 }
8023
Bandan Das19677e32014-05-06 02:19:15 -04008024 return 0;
8025}
8026
Radim Krčmářcbf71272017-05-19 15:48:51 +02008027static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
Bandan Das3573e222014-05-06 02:19:16 -04008028{
8029 gva_t gva;
Bandan Das3573e222014-05-06 02:19:16 -04008030 struct x86_exception e;
Bandan Das3573e222014-05-06 02:19:16 -04008031
8032 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008033 vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
Bandan Das3573e222014-05-06 02:19:16 -04008034 return 1;
8035
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008036 if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
Bandan Das3573e222014-05-06 02:19:16 -04008037 kvm_inject_page_fault(vcpu, &e);
8038 return 1;
8039 }
8040
Bandan Das3573e222014-05-06 02:19:16 -04008041 return 0;
8042}
8043
Liran Alonabfc52c2018-06-23 02:35:13 +03008044/*
8045 * Allocate a shadow VMCS and associate it with the currently loaded
8046 * VMCS, unless such a shadow VMCS already exists. The newly allocated
8047 * VMCS is also VMCLEARed, so that it is ready for use.
8048 */
8049static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
8050{
8051 struct vcpu_vmx *vmx = to_vmx(vcpu);
8052 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
8053
8054 /*
8055 * We should allocate a shadow vmcs for vmcs01 only when L1
8056 * executes VMXON and free it when L1 executes VMXOFF.
8057 * As it is invalid to execute VMXON twice, we shouldn't reach
8058 * here when vmcs01 already have an allocated shadow vmcs.
8059 */
8060 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
8061
8062 if (!loaded_vmcs->shadow_vmcs) {
8063 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
8064 if (loaded_vmcs->shadow_vmcs)
8065 vmcs_clear(loaded_vmcs->shadow_vmcs);
8066 }
8067 return loaded_vmcs->shadow_vmcs;
8068}
8069
Jim Mattsone29acc52016-11-30 12:03:43 -08008070static int enter_vmx_operation(struct kvm_vcpu *vcpu)
8071{
8072 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008073 int r;
Jim Mattsone29acc52016-11-30 12:03:43 -08008074
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008075 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
8076 if (r < 0)
Jim Mattsonde3a0022017-11-27 17:22:25 -06008077 goto out_vmcs02;
Jim Mattsone29acc52016-11-30 12:03:43 -08008078
8079 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8080 if (!vmx->nested.cached_vmcs12)
8081 goto out_cached_vmcs12;
8082
Liran Alon61ada742018-06-23 02:35:08 +03008083 vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8084 if (!vmx->nested.cached_shadow_vmcs12)
8085 goto out_cached_shadow_vmcs12;
8086
Liran Alonabfc52c2018-06-23 02:35:13 +03008087 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
8088 goto out_shadow_vmcs;
Jim Mattsone29acc52016-11-30 12:03:43 -08008089
Jim Mattsone29acc52016-11-30 12:03:43 -08008090 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
8091 HRTIMER_MODE_REL_PINNED);
8092 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
8093
8094 vmx->nested.vmxon = true;
8095 return 0;
8096
8097out_shadow_vmcs:
Liran Alon61ada742018-06-23 02:35:08 +03008098 kfree(vmx->nested.cached_shadow_vmcs12);
8099
8100out_cached_shadow_vmcs12:
Jim Mattsone29acc52016-11-30 12:03:43 -08008101 kfree(vmx->nested.cached_vmcs12);
8102
8103out_cached_vmcs12:
Jim Mattsonde3a0022017-11-27 17:22:25 -06008104 free_loaded_vmcs(&vmx->nested.vmcs02);
Jim Mattsone29acc52016-11-30 12:03:43 -08008105
Jim Mattsonde3a0022017-11-27 17:22:25 -06008106out_vmcs02:
Jim Mattsone29acc52016-11-30 12:03:43 -08008107 return -ENOMEM;
8108}
8109
Bandan Das3573e222014-05-06 02:19:16 -04008110/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008111 * Emulate the VMXON instruction.
8112 * Currently, we just remember that VMX is active, and do not save or even
8113 * inspect the argument to VMXON (the so-called "VMXON pointer") because we
8114 * do not currently need to store anything in that guest-allocated memory
8115 * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
8116 * argument is different from the VMXON pointer (which the spec says they do).
8117 */
8118static int handle_vmon(struct kvm_vcpu *vcpu)
8119{
Jim Mattsone29acc52016-11-30 12:03:43 -08008120 int ret;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008121 gpa_t vmptr;
8122 struct page *page;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008123 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008124 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
8125 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008126
Jim Mattson70f3aac2017-04-26 08:53:46 -07008127 /*
8128 * The Intel VMX Instruction Reference lists a bunch of bits that are
8129 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
8130 * 1 (see vmx_set_cr4() for when we allow the guest to set this).
8131 * Otherwise, we should fail with #UD. But most faulting conditions
8132 * have already been checked by hardware, prior to the VM-exit for
8133 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
8134 * that bit set to 1 in non-root mode.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008135 */
Jim Mattson70f3aac2017-04-26 08:53:46 -07008136 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008137 kvm_queue_exception(vcpu, UD_VECTOR);
8138 return 1;
8139 }
8140
Felix Wilhelm727ba742018-06-11 09:43:44 +02008141 /* CPL=0 must be checked manually. */
8142 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008143 kvm_inject_gp(vcpu, 0);
Felix Wilhelm727ba742018-06-11 09:43:44 +02008144 return 1;
8145 }
8146
Abel Gordon145c28d2013-04-18 14:36:55 +03008147 if (vmx->nested.vmxon) {
8148 nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008149 return kvm_skip_emulated_instruction(vcpu);
Abel Gordon145c28d2013-04-18 14:36:55 +03008150 }
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008151
Haozhong Zhang3b840802016-06-22 14:59:54 +08008152 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008153 != VMXON_NEEDED_FEATURES) {
8154 kvm_inject_gp(vcpu, 0);
8155 return 1;
8156 }
8157
Radim Krčmářcbf71272017-05-19 15:48:51 +02008158 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Jim Mattson21e7fbe2016-12-22 15:49:55 -08008159 return 1;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008160
8161 /*
8162 * SDM 3: 24.11.5
8163 * The first 4 bytes of VMXON region contain the supported
8164 * VMCS revision identifier
8165 *
8166 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
8167 * which replaces physical address width with 32
8168 */
8169 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
8170 nested_vmx_failInvalid(vcpu);
8171 return kvm_skip_emulated_instruction(vcpu);
8172 }
8173
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02008174 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
8175 if (is_error_page(page)) {
Radim Krčmářcbf71272017-05-19 15:48:51 +02008176 nested_vmx_failInvalid(vcpu);
8177 return kvm_skip_emulated_instruction(vcpu);
8178 }
8179 if (*(u32 *)kmap(page) != VMCS12_REVISION) {
8180 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008181 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008182 nested_vmx_failInvalid(vcpu);
8183 return kvm_skip_emulated_instruction(vcpu);
8184 }
8185 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008186 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008187
8188 vmx->nested.vmxon_ptr = vmptr;
Jim Mattsone29acc52016-11-30 12:03:43 -08008189 ret = enter_vmx_operation(vcpu);
8190 if (ret)
8191 return ret;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008192
Arthur Chunqi Lia25eb112013-07-04 15:03:33 +08008193 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008194 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008195}
8196
8197/*
8198 * Intel's VMX Instruction Reference specifies a common set of prerequisites
8199 * for running VMX instructions (except VMXON, whose prerequisites are
8200 * slightly different). It also specifies what exception to inject otherwise.
Jim Mattson70f3aac2017-04-26 08:53:46 -07008201 * Note that many of these exceptions have priority over VM exits, so they
8202 * don't have to be checked again here.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008203 */
8204static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
8205{
Jim Mattsone49fcb82018-07-27 13:44:45 -07008206 if (!to_vmx(vcpu)->nested.vmxon) {
8207 kvm_queue_exception(vcpu, UD_VECTOR);
8208 return 0;
8209 }
8210
Felix Wilhelm727ba742018-06-11 09:43:44 +02008211 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008212 kvm_inject_gp(vcpu, 0);
Felix Wilhelm727ba742018-06-11 09:43:44 +02008213 return 0;
8214 }
8215
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008216 return 1;
8217}
8218
David Matlack8ca44e82017-08-01 14:00:39 -07008219static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
8220{
8221 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
8222 vmcs_write64(VMCS_LINK_POINTER, -1ull);
8223}
8224
Abel Gordone7953d72013-04-18 14:37:55 +03008225static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
8226{
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008227 if (vmx->nested.current_vmptr == -1ull)
8228 return;
8229
Abel Gordon012f83c2013-04-18 14:39:25 +03008230 if (enable_shadow_vmcs) {
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008231 /* copy to memory all shadowed fields in case
8232 they were modified */
8233 copy_shadow_to_vmcs12(vmx);
8234 vmx->nested.sync_shadow_vmcs = false;
David Matlack8ca44e82017-08-01 14:00:39 -07008235 vmx_disable_shadow_vmcs(vmx);
Abel Gordon012f83c2013-04-18 14:39:25 +03008236 }
Wincy Van705699a2015-02-03 23:58:17 +08008237 vmx->nested.posted_intr_nv = -1;
David Matlack4f2777b2016-07-13 17:16:37 -07008238
8239 /* Flush VMCS12 to guest memory */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02008240 kvm_vcpu_write_guest_page(&vmx->vcpu,
8241 vmx->nested.current_vmptr >> PAGE_SHIFT,
8242 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
David Matlack4f2777b2016-07-13 17:16:37 -07008243
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008244 vmx->nested.current_vmptr = -1ull;
Abel Gordone7953d72013-04-18 14:37:55 +03008245}
8246
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008247/*
8248 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
8249 * just stops using VMX.
8250 */
8251static void free_nested(struct vcpu_vmx *vmx)
8252{
Wanpeng Lib7455822017-11-22 14:04:00 -08008253 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008254 return;
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008255
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008256 vmx->nested.vmxon = false;
Wanpeng Lib7455822017-11-22 14:04:00 -08008257 vmx->nested.smm.vmxon = false;
Wanpeng Li5c614b32015-10-13 09:18:36 -07008258 free_vpid(vmx->nested.vpid02);
David Matlack8ca44e82017-08-01 14:00:39 -07008259 vmx->nested.posted_intr_nv = -1;
8260 vmx->nested.current_vmptr = -1ull;
Jim Mattson355f4fb2016-10-28 08:29:39 -07008261 if (enable_shadow_vmcs) {
David Matlack8ca44e82017-08-01 14:00:39 -07008262 vmx_disable_shadow_vmcs(vmx);
Jim Mattson355f4fb2016-10-28 08:29:39 -07008263 vmcs_clear(vmx->vmcs01.shadow_vmcs);
8264 free_vmcs(vmx->vmcs01.shadow_vmcs);
8265 vmx->vmcs01.shadow_vmcs = NULL;
8266 }
David Matlack4f2777b2016-07-13 17:16:37 -07008267 kfree(vmx->nested.cached_vmcs12);
Liran Alon61ada742018-06-23 02:35:08 +03008268 kfree(vmx->nested.cached_shadow_vmcs12);
Jim Mattsonde3a0022017-11-27 17:22:25 -06008269 /* Unpin physical memory we referred to in the vmcs02 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008270 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008271 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008272 vmx->nested.apic_access_page = NULL;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008273 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008274 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008275 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008276 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008277 }
Wincy Van705699a2015-02-03 23:58:17 +08008278 if (vmx->nested.pi_desc_page) {
8279 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008280 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +08008281 vmx->nested.pi_desc_page = NULL;
8282 vmx->nested.pi_desc = NULL;
8283 }
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03008284
Jim Mattsonde3a0022017-11-27 17:22:25 -06008285 free_loaded_vmcs(&vmx->nested.vmcs02);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008286}
8287
8288/* Emulate the VMXOFF instruction */
8289static int handle_vmoff(struct kvm_vcpu *vcpu)
8290{
8291 if (!nested_vmx_check_permission(vcpu))
8292 return 1;
8293 free_nested(to_vmx(vcpu));
Arthur Chunqi Lia25eb112013-07-04 15:03:33 +08008294 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008295 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008296}
8297
Nadav Har'El27d6c862011-05-25 23:06:59 +03008298/* Emulate the VMCLEAR instruction */
8299static int handle_vmclear(struct kvm_vcpu *vcpu)
8300{
8301 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson587d7e722017-03-02 12:41:48 -08008302 u32 zero = 0;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008303 gpa_t vmptr;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008304
8305 if (!nested_vmx_check_permission(vcpu))
8306 return 1;
8307
Radim Krčmářcbf71272017-05-19 15:48:51 +02008308 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El27d6c862011-05-25 23:06:59 +03008309 return 1;
8310
Radim Krčmářcbf71272017-05-19 15:48:51 +02008311 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
8312 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
8313 return kvm_skip_emulated_instruction(vcpu);
8314 }
8315
8316 if (vmptr == vmx->nested.vmxon_ptr) {
8317 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
8318 return kvm_skip_emulated_instruction(vcpu);
8319 }
8320
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008321 if (vmptr == vmx->nested.current_vmptr)
Abel Gordone7953d72013-04-18 14:37:55 +03008322 nested_release_vmcs12(vmx);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008323
Jim Mattson587d7e722017-03-02 12:41:48 -08008324 kvm_vcpu_write_guest(vcpu,
8325 vmptr + offsetof(struct vmcs12, launch_state),
8326 &zero, sizeof(zero));
Nadav Har'El27d6c862011-05-25 23:06:59 +03008327
Nadav Har'El27d6c862011-05-25 23:06:59 +03008328 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008329 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008330}
8331
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03008332static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
8333
8334/* Emulate the VMLAUNCH instruction */
8335static int handle_vmlaunch(struct kvm_vcpu *vcpu)
8336{
8337 return nested_vmx_run(vcpu, true);
8338}
8339
8340/* Emulate the VMRESUME instruction */
8341static int handle_vmresume(struct kvm_vcpu *vcpu)
8342{
8343
8344 return nested_vmx_run(vcpu, false);
8345}
8346
Nadav Har'El49f705c2011-05-25 23:08:30 +03008347/*
8348 * Read a vmcs12 field. Since these can have varying lengths and we return
8349 * one type, we chose the biggest type (u64) and zero-extend the return value
8350 * to that size. Note that the caller, handle_vmread, might need to use only
8351 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
8352 * 64-bit fields are to be returned).
8353 */
Liran Alone2536742018-06-23 02:35:02 +03008354static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008355 unsigned long field, u64 *ret)
Nadav Har'El49f705c2011-05-25 23:08:30 +03008356{
8357 short offset = vmcs_field_to_offset(field);
8358 char *p;
8359
8360 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008361 return offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008362
Liran Alone2536742018-06-23 02:35:02 +03008363 p = (char *)vmcs12 + offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008364
Jim Mattsond37f4262017-12-22 12:12:16 -08008365 switch (vmcs_field_width(field)) {
8366 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008367 *ret = *((natural_width *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008368 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008369 case VMCS_FIELD_WIDTH_U16:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008370 *ret = *((u16 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008371 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008372 case VMCS_FIELD_WIDTH_U32:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008373 *ret = *((u32 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008374 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008375 case VMCS_FIELD_WIDTH_U64:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008376 *ret = *((u64 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008377 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008378 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008379 WARN_ON(1);
8380 return -ENOENT;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008381 }
8382}
8383
Abel Gordon20b97fe2013-04-18 14:36:25 +03008384
Liran Alone2536742018-06-23 02:35:02 +03008385static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008386 unsigned long field, u64 field_value){
Abel Gordon20b97fe2013-04-18 14:36:25 +03008387 short offset = vmcs_field_to_offset(field);
Liran Alone2536742018-06-23 02:35:02 +03008388 char *p = (char *)vmcs12 + offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008389 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008390 return offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008391
Jim Mattsond37f4262017-12-22 12:12:16 -08008392 switch (vmcs_field_width(field)) {
8393 case VMCS_FIELD_WIDTH_U16:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008394 *(u16 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008395 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008396 case VMCS_FIELD_WIDTH_U32:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008397 *(u32 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008398 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008399 case VMCS_FIELD_WIDTH_U64:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008400 *(u64 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008401 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008402 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008403 *(natural_width *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008404 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008405 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008406 WARN_ON(1);
8407 return -ENOENT;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008408 }
8409
8410}
8411
Jim Mattsonf4160e42018-05-29 09:11:33 -07008412/*
8413 * Copy the writable VMCS shadow fields back to the VMCS12, in case
8414 * they have been modified by the L1 guest. Note that the "read-only"
8415 * VM-exit information fields are actually writable if the vCPU is
8416 * configured to support "VMWRITE to any supported field in the VMCS."
8417 */
Abel Gordon16f5b902013-04-18 14:38:25 +03008418static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
8419{
Jim Mattsonf4160e42018-05-29 09:11:33 -07008420 const u16 *fields[] = {
8421 shadow_read_write_fields,
8422 shadow_read_only_fields
8423 };
8424 const int max_fields[] = {
8425 max_shadow_read_write_fields,
8426 max_shadow_read_only_fields
8427 };
8428 int i, q;
Abel Gordon16f5b902013-04-18 14:38:25 +03008429 unsigned long field;
8430 u64 field_value;
Jim Mattson355f4fb2016-10-28 08:29:39 -07008431 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordon16f5b902013-04-18 14:38:25 +03008432
Jan Kiszka282da872014-10-08 18:05:39 +02008433 preempt_disable();
8434
Abel Gordon16f5b902013-04-18 14:38:25 +03008435 vmcs_load(shadow_vmcs);
8436
Jim Mattsonf4160e42018-05-29 09:11:33 -07008437 for (q = 0; q < ARRAY_SIZE(fields); q++) {
8438 for (i = 0; i < max_fields[q]; i++) {
8439 field = fields[q][i];
8440 field_value = __vmcs_readl(field);
Liran Alone2536742018-06-23 02:35:02 +03008441 vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
Jim Mattsonf4160e42018-05-29 09:11:33 -07008442 }
8443 /*
8444 * Skip the VM-exit information fields if they are read-only.
8445 */
8446 if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
8447 break;
Abel Gordon16f5b902013-04-18 14:38:25 +03008448 }
8449
8450 vmcs_clear(shadow_vmcs);
8451 vmcs_load(vmx->loaded_vmcs->vmcs);
Jan Kiszka282da872014-10-08 18:05:39 +02008452
8453 preempt_enable();
Abel Gordon16f5b902013-04-18 14:38:25 +03008454}
8455
Abel Gordonc3114422013-04-18 14:38:55 +03008456static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
8457{
Paolo Bonzini44900ba2017-12-13 12:58:02 +01008458 const u16 *fields[] = {
Mathias Krausec2bae892013-06-26 20:36:21 +02008459 shadow_read_write_fields,
8460 shadow_read_only_fields
Abel Gordonc3114422013-04-18 14:38:55 +03008461 };
Mathias Krausec2bae892013-06-26 20:36:21 +02008462 const int max_fields[] = {
Abel Gordonc3114422013-04-18 14:38:55 +03008463 max_shadow_read_write_fields,
8464 max_shadow_read_only_fields
8465 };
8466 int i, q;
8467 unsigned long field;
8468 u64 field_value = 0;
Jim Mattson355f4fb2016-10-28 08:29:39 -07008469 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordonc3114422013-04-18 14:38:55 +03008470
8471 vmcs_load(shadow_vmcs);
8472
Mathias Krausec2bae892013-06-26 20:36:21 +02008473 for (q = 0; q < ARRAY_SIZE(fields); q++) {
Abel Gordonc3114422013-04-18 14:38:55 +03008474 for (i = 0; i < max_fields[q]; i++) {
8475 field = fields[q][i];
Liran Alone2536742018-06-23 02:35:02 +03008476 vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
Paolo Bonzini44900ba2017-12-13 12:58:02 +01008477 __vmcs_writel(field, field_value);
Abel Gordonc3114422013-04-18 14:38:55 +03008478 }
8479 }
8480
8481 vmcs_clear(shadow_vmcs);
8482 vmcs_load(vmx->loaded_vmcs->vmcs);
8483}
8484
Nadav Har'El49f705c2011-05-25 23:08:30 +03008485/*
8486 * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
8487 * used before) all generate the same failure when it is missing.
8488 */
8489static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
8490{
8491 struct vcpu_vmx *vmx = to_vmx(vcpu);
8492 if (vmx->nested.current_vmptr == -1ull) {
8493 nested_vmx_failInvalid(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008494 return 0;
8495 }
8496 return 1;
8497}
8498
8499static int handle_vmread(struct kvm_vcpu *vcpu)
8500{
8501 unsigned long field;
8502 u64 field_value;
8503 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
8504 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
8505 gva_t gva = 0;
Liran Alon6d894f42018-06-23 02:35:09 +03008506 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008507
Kyle Hueyeb277562016-11-29 12:40:39 -08008508 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03008509 return 1;
8510
Kyle Huey6affcbe2016-11-29 12:40:40 -08008511 if (!nested_vmx_check_vmcs12(vcpu))
8512 return kvm_skip_emulated_instruction(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08008513
Liran Alon6d894f42018-06-23 02:35:09 +03008514 if (!is_guest_mode(vcpu))
8515 vmcs12 = get_vmcs12(vcpu);
8516 else {
8517 /*
8518 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
8519 * to shadowed-field sets the ALU flags for VMfailInvalid.
8520 */
8521 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
8522 nested_vmx_failInvalid(vcpu);
8523 return kvm_skip_emulated_instruction(vcpu);
8524 }
8525 vmcs12 = get_shadow_vmcs12(vcpu);
8526 }
8527
Nadav Har'El49f705c2011-05-25 23:08:30 +03008528 /* Decode instruction info and find the field to read */
Nadav Amit27e6fb52014-06-18 17:19:26 +03008529 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Nadav Har'El49f705c2011-05-25 23:08:30 +03008530 /* Read the field, zero-extended to a u64 field_value */
Liran Alon6d894f42018-06-23 02:35:09 +03008531 if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03008532 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008533 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008534 }
8535 /*
8536 * Now copy part of this value to register or memory, as requested.
8537 * Note that the number of bits actually copied is 32 or 64 depending
8538 * on the guest's mode (32 or 64 bit), not on the given field's length.
8539 */
8540 if (vmx_instruction_info & (1u << 10)) {
Nadav Amit27e6fb52014-06-18 17:19:26 +03008541 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
Nadav Har'El49f705c2011-05-25 23:08:30 +03008542 field_value);
8543 } else {
8544 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008545 vmx_instruction_info, true, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03008546 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02008547 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008548 kvm_write_guest_virt_system(vcpu, gva, &field_value,
8549 (is_long_mode(vcpu) ? 8 : 4), NULL);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008550 }
8551
8552 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008553 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008554}
8555
8556
8557static int handle_vmwrite(struct kvm_vcpu *vcpu)
8558{
8559 unsigned long field;
8560 gva_t gva;
Paolo Bonzini74a497f2017-12-20 13:55:39 +01008561 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008562 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
8563 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Paolo Bonzini74a497f2017-12-20 13:55:39 +01008564
Nadav Har'El49f705c2011-05-25 23:08:30 +03008565 /* The value to write might be 32 or 64 bits, depending on L1's long
8566 * mode, and eventually we need to write that into a field of several
8567 * possible lengths. The code below first zero-extends the value to 64
Adam Buchbinder6a6256f2016-02-23 15:34:30 -08008568 * bit (field_value), and then copies only the appropriate number of
Nadav Har'El49f705c2011-05-25 23:08:30 +03008569 * bits into the vmcs12 field.
8570 */
8571 u64 field_value = 0;
8572 struct x86_exception e;
Liran Alon6d894f42018-06-23 02:35:09 +03008573 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008574
Kyle Hueyeb277562016-11-29 12:40:39 -08008575 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03008576 return 1;
8577
Kyle Huey6affcbe2016-11-29 12:40:40 -08008578 if (!nested_vmx_check_vmcs12(vcpu))
8579 return kvm_skip_emulated_instruction(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08008580
Nadav Har'El49f705c2011-05-25 23:08:30 +03008581 if (vmx_instruction_info & (1u << 10))
Nadav Amit27e6fb52014-06-18 17:19:26 +03008582 field_value = kvm_register_readl(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03008583 (((vmx_instruction_info) >> 3) & 0xf));
8584 else {
8585 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008586 vmx_instruction_info, false, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03008587 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008588 if (kvm_read_guest_virt(vcpu, gva, &field_value,
8589 (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03008590 kvm_inject_page_fault(vcpu, &e);
8591 return 1;
8592 }
8593 }
8594
8595
Nadav Amit27e6fb52014-06-18 17:19:26 +03008596 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Jim Mattsonf4160e42018-05-29 09:11:33 -07008597 /*
8598 * If the vCPU supports "VMWRITE to any supported field in the
8599 * VMCS," then the "read-only" fields are actually read/write.
8600 */
8601 if (vmcs_field_readonly(field) &&
8602 !nested_cpu_has_vmwrite_any_field(vcpu)) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03008603 nested_vmx_failValid(vcpu,
8604 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008605 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008606 }
8607
Liran Alon6d894f42018-06-23 02:35:09 +03008608 if (!is_guest_mode(vcpu))
8609 vmcs12 = get_vmcs12(vcpu);
8610 else {
8611 /*
8612 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
8613 * to shadowed-field sets the ALU flags for VMfailInvalid.
8614 */
8615 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
8616 nested_vmx_failInvalid(vcpu);
8617 return kvm_skip_emulated_instruction(vcpu);
8618 }
8619 vmcs12 = get_shadow_vmcs12(vcpu);
8620
8621 }
8622
8623 if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03008624 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008625 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008626 }
8627
Liran Alon6d894f42018-06-23 02:35:09 +03008628 /*
8629 * Do not track vmcs12 dirty-state if in guest-mode
8630 * as we actually dirty shadow vmcs12 instead of vmcs12.
8631 */
8632 if (!is_guest_mode(vcpu)) {
8633 switch (field) {
Paolo Bonzini74a497f2017-12-20 13:55:39 +01008634#define SHADOW_FIELD_RW(x) case x:
8635#include "vmx_shadow_fields.h"
Liran Alon6d894f42018-06-23 02:35:09 +03008636 /*
8637 * The fields that can be updated by L1 without a vmexit are
8638 * always updated in the vmcs02, the others go down the slow
8639 * path of prepare_vmcs02.
8640 */
8641 break;
8642 default:
8643 vmx->nested.dirty_vmcs12 = true;
8644 break;
8645 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01008646 }
8647
Nadav Har'El49f705c2011-05-25 23:08:30 +03008648 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008649 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03008650}
8651
Jim Mattsona8bc2842016-11-30 12:03:44 -08008652static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
8653{
8654 vmx->nested.current_vmptr = vmptr;
8655 if (enable_shadow_vmcs) {
8656 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
8657 SECONDARY_EXEC_SHADOW_VMCS);
8658 vmcs_write64(VMCS_LINK_POINTER,
8659 __pa(vmx->vmcs01.shadow_vmcs));
8660 vmx->nested.sync_shadow_vmcs = true;
8661 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01008662 vmx->nested.dirty_vmcs12 = true;
Jim Mattsona8bc2842016-11-30 12:03:44 -08008663}
8664
Nadav Har'El63846662011-05-25 23:07:29 +03008665/* Emulate the VMPTRLD instruction */
8666static int handle_vmptrld(struct kvm_vcpu *vcpu)
8667{
8668 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03008669 gpa_t vmptr;
Nadav Har'El63846662011-05-25 23:07:29 +03008670
8671 if (!nested_vmx_check_permission(vcpu))
8672 return 1;
8673
Radim Krčmářcbf71272017-05-19 15:48:51 +02008674 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El63846662011-05-25 23:07:29 +03008675 return 1;
8676
Radim Krčmářcbf71272017-05-19 15:48:51 +02008677 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
8678 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
8679 return kvm_skip_emulated_instruction(vcpu);
8680 }
8681
8682 if (vmptr == vmx->nested.vmxon_ptr) {
8683 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
8684 return kvm_skip_emulated_instruction(vcpu);
8685 }
8686
Nadav Har'El63846662011-05-25 23:07:29 +03008687 if (vmx->nested.current_vmptr != vmptr) {
8688 struct vmcs12 *new_vmcs12;
8689 struct page *page;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02008690 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
8691 if (is_error_page(page)) {
Nadav Har'El63846662011-05-25 23:07:29 +03008692 nested_vmx_failInvalid(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008693 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03008694 }
8695 new_vmcs12 = kmap(page);
Liran Alon392b2f22018-06-23 02:35:01 +03008696 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
Liran Alonfa97d7d2018-07-18 14:07:59 +02008697 (new_vmcs12->hdr.shadow_vmcs &&
8698 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
Nadav Har'El63846662011-05-25 23:07:29 +03008699 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008700 kvm_release_page_clean(page);
Nadav Har'El63846662011-05-25 23:07:29 +03008701 nested_vmx_failValid(vcpu,
8702 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008703 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03008704 }
Nadav Har'El63846662011-05-25 23:07:29 +03008705
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008706 nested_release_vmcs12(vmx);
David Matlack4f2777b2016-07-13 17:16:37 -07008707 /*
8708 * Load VMCS12 from guest memory since it is not already
8709 * cached.
8710 */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02008711 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
8712 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008713 kvm_release_page_clean(page);
Paolo Bonzini9f744c52017-07-27 15:54:46 +02008714
Jim Mattsona8bc2842016-11-30 12:03:44 -08008715 set_current_vmptr(vmx, vmptr);
Nadav Har'El63846662011-05-25 23:07:29 +03008716 }
8717
8718 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008719 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03008720}
8721
Nadav Har'El6a4d7552011-05-25 23:08:00 +03008722/* Emulate the VMPTRST instruction */
8723static int handle_vmptrst(struct kvm_vcpu *vcpu)
8724{
8725 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
8726 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
8727 gva_t vmcs_gva;
8728 struct x86_exception e;
8729
8730 if (!nested_vmx_check_permission(vcpu))
8731 return 1;
8732
8733 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008734 vmx_instruction_info, true, &vmcs_gva))
Nadav Har'El6a4d7552011-05-25 23:08:00 +03008735 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02008736 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008737 if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
8738 (void *)&to_vmx(vcpu)->nested.current_vmptr,
8739 sizeof(u64), &e)) {
Nadav Har'El6a4d7552011-05-25 23:08:00 +03008740 kvm_inject_page_fault(vcpu, &e);
8741 return 1;
8742 }
8743 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008744 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El6a4d7552011-05-25 23:08:00 +03008745}
8746
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008747/* Emulate the INVEPT instruction */
8748static int handle_invept(struct kvm_vcpu *vcpu)
8749{
Wincy Vanb9c237b2015-02-03 23:56:30 +08008750 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008751 u32 vmx_instruction_info, types;
8752 unsigned long type;
8753 gva_t gva;
8754 struct x86_exception e;
8755 struct {
8756 u64 eptp, gpa;
8757 } operand;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008758
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008759 if (!(vmx->nested.msrs.secondary_ctls_high &
Wincy Vanb9c237b2015-02-03 23:56:30 +08008760 SECONDARY_EXEC_ENABLE_EPT) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008761 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008762 kvm_queue_exception(vcpu, UD_VECTOR);
8763 return 1;
8764 }
8765
8766 if (!nested_vmx_check_permission(vcpu))
8767 return 1;
8768
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008769 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Nadav Amit27e6fb52014-06-18 17:19:26 +03008770 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008771
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008772 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008773
Jim Mattson85c856b2016-10-26 08:38:38 -07008774 if (type >= 32 || !(types & (1 << type))) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008775 nested_vmx_failValid(vcpu,
8776 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008777 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008778 }
8779
8780 /* According to the Intel VMX instruction reference, the memory
8781 * operand is read even if it isn't needed (e.g., for type==global)
8782 */
8783 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008784 vmx_instruction_info, false, &gva))
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008785 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008786 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008787 kvm_inject_page_fault(vcpu, &e);
8788 return 1;
8789 }
8790
8791 switch (type) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008792 case VMX_EPT_EXTENT_GLOBAL:
Bandan Das45e11812016-08-02 16:32:36 -04008793 /*
8794 * TODO: track mappings and invalidate
8795 * single context requests appropriately
8796 */
8797 case VMX_EPT_EXTENT_CONTEXT:
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008798 kvm_mmu_sync_roots(vcpu);
Liang Chen77c39132014-09-18 12:38:37 -04008799 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008800 nested_vmx_succeed(vcpu);
8801 break;
8802 default:
8803 BUG_ON(1);
8804 break;
8805 }
8806
Kyle Huey6affcbe2016-11-29 12:40:40 -08008807 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008808}
8809
Petr Matouseka642fc32014-09-23 20:22:30 +02008810static int handle_invvpid(struct kvm_vcpu *vcpu)
8811{
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008812 struct vcpu_vmx *vmx = to_vmx(vcpu);
8813 u32 vmx_instruction_info;
8814 unsigned long type, types;
8815 gva_t gva;
8816 struct x86_exception e;
Jim Mattson40352602017-06-28 09:37:37 -07008817 struct {
8818 u64 vpid;
8819 u64 gla;
8820 } operand;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008821
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008822 if (!(vmx->nested.msrs.secondary_ctls_high &
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008823 SECONDARY_EXEC_ENABLE_VPID) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008824 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008825 kvm_queue_exception(vcpu, UD_VECTOR);
8826 return 1;
8827 }
8828
8829 if (!nested_vmx_check_permission(vcpu))
8830 return 1;
8831
8832 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
8833 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
8834
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01008835 types = (vmx->nested.msrs.vpid_caps &
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008836 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008837
Jim Mattson85c856b2016-10-26 08:38:38 -07008838 if (type >= 32 || !(types & (1 << type))) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008839 nested_vmx_failValid(vcpu,
8840 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008841 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008842 }
8843
8844 /* according to the intel vmx instruction reference, the memory
8845 * operand is read even if it isn't needed (e.g., for type==global)
8846 */
8847 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
8848 vmx_instruction_info, false, &gva))
8849 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008850 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008851 kvm_inject_page_fault(vcpu, &e);
8852 return 1;
8853 }
Jim Mattson40352602017-06-28 09:37:37 -07008854 if (operand.vpid >> 16) {
8855 nested_vmx_failValid(vcpu,
8856 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
8857 return kvm_skip_emulated_instruction(vcpu);
8858 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008859
8860 switch (type) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008861 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
Liran Aloncd9a4912018-05-22 17:16:15 +03008862 if (!operand.vpid ||
8863 is_noncanonical_address(operand.gla, vcpu)) {
Jim Mattson40352602017-06-28 09:37:37 -07008864 nested_vmx_failValid(vcpu,
8865 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
8866 return kvm_skip_emulated_instruction(vcpu);
8867 }
Liran Aloncd9a4912018-05-22 17:16:15 +03008868 if (cpu_has_vmx_invvpid_individual_addr() &&
8869 vmx->nested.vpid02) {
8870 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
8871 vmx->nested.vpid02, operand.gla);
8872 } else
8873 __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
8874 break;
Paolo Bonzinief697a72016-03-18 16:58:38 +01008875 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008876 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
Jim Mattson40352602017-06-28 09:37:37 -07008877 if (!operand.vpid) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008878 nested_vmx_failValid(vcpu,
8879 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008880 return kvm_skip_emulated_instruction(vcpu);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008881 }
Liran Aloncd9a4912018-05-22 17:16:15 +03008882 __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008883 break;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008884 case VMX_VPID_EXTENT_ALL_CONTEXT:
Liran Aloncd9a4912018-05-22 17:16:15 +03008885 __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008886 break;
8887 default:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008888 WARN_ON_ONCE(1);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008889 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07008890 }
8891
Jan Dakinevichbcdde302016-10-28 07:00:30 +03008892 nested_vmx_succeed(vcpu);
8893
Kyle Huey6affcbe2016-11-29 12:40:40 -08008894 return kvm_skip_emulated_instruction(vcpu);
Petr Matouseka642fc32014-09-23 20:22:30 +02008895}
8896
Junaid Shahideb4b2482018-06-27 14:59:14 -07008897static int handle_invpcid(struct kvm_vcpu *vcpu)
8898{
8899 u32 vmx_instruction_info;
8900 unsigned long type;
8901 bool pcid_enabled;
8902 gva_t gva;
8903 struct x86_exception e;
Junaid Shahidb94742c2018-06-27 14:59:20 -07008904 unsigned i;
8905 unsigned long roots_to_free = 0;
Junaid Shahideb4b2482018-06-27 14:59:14 -07008906 struct {
8907 u64 pcid;
8908 u64 gla;
8909 } operand;
8910
8911 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
8912 kvm_queue_exception(vcpu, UD_VECTOR);
8913 return 1;
8914 }
8915
8916 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
8917 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
8918
8919 if (type > 3) {
8920 kvm_inject_gp(vcpu, 0);
8921 return 1;
8922 }
8923
8924 /* According to the Intel instruction reference, the memory operand
8925 * is read even if it isn't needed (e.g., for type==all)
8926 */
8927 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
8928 vmx_instruction_info, false, &gva))
8929 return 1;
8930
8931 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
8932 kvm_inject_page_fault(vcpu, &e);
8933 return 1;
8934 }
8935
8936 if (operand.pcid >> 12 != 0) {
8937 kvm_inject_gp(vcpu, 0);
8938 return 1;
8939 }
8940
8941 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
8942
8943 switch (type) {
8944 case INVPCID_TYPE_INDIV_ADDR:
8945 if ((!pcid_enabled && (operand.pcid != 0)) ||
8946 is_noncanonical_address(operand.gla, vcpu)) {
8947 kvm_inject_gp(vcpu, 0);
8948 return 1;
8949 }
8950 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
8951 return kvm_skip_emulated_instruction(vcpu);
8952
8953 case INVPCID_TYPE_SINGLE_CTXT:
8954 if (!pcid_enabled && (operand.pcid != 0)) {
8955 kvm_inject_gp(vcpu, 0);
8956 return 1;
8957 }
8958
8959 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
8960 kvm_mmu_sync_roots(vcpu);
8961 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
8962 }
8963
Junaid Shahidb94742c2018-06-27 14:59:20 -07008964 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
8965 if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
8966 == operand.pcid)
8967 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
Junaid Shahidade61e22018-06-27 14:59:15 -07008968
Junaid Shahidb94742c2018-06-27 14:59:20 -07008969 kvm_mmu_free_roots(vcpu, roots_to_free);
Junaid Shahideb4b2482018-06-27 14:59:14 -07008970 /*
Junaid Shahidb94742c2018-06-27 14:59:20 -07008971 * If neither the current cr3 nor any of the prev_roots use the
Junaid Shahidade61e22018-06-27 14:59:15 -07008972 * given PCID, then nothing needs to be done here because a
8973 * resync will happen anyway before switching to any other CR3.
Junaid Shahideb4b2482018-06-27 14:59:14 -07008974 */
8975
8976 return kvm_skip_emulated_instruction(vcpu);
8977
8978 case INVPCID_TYPE_ALL_NON_GLOBAL:
8979 /*
8980 * Currently, KVM doesn't mark global entries in the shadow
8981 * page tables, so a non-global flush just degenerates to a
8982 * global flush. If needed, we could optimize this later by
8983 * keeping track of global entries in shadow page tables.
8984 */
8985
8986 /* fall-through */
8987 case INVPCID_TYPE_ALL_INCL_GLOBAL:
8988 kvm_mmu_unload(vcpu);
8989 return kvm_skip_emulated_instruction(vcpu);
8990
8991 default:
8992 BUG(); /* We have already checked above that type <= 3 */
8993 }
8994}
8995
Kai Huang843e4332015-01-28 10:54:28 +08008996static int handle_pml_full(struct kvm_vcpu *vcpu)
8997{
8998 unsigned long exit_qualification;
8999
9000 trace_kvm_pml_full(vcpu->vcpu_id);
9001
9002 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9003
9004 /*
9005 * PML buffer FULL happened while executing iret from NMI,
9006 * "blocked by NMI" bit has to be set before next VM entry.
9007 */
9008 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009009 enable_vnmi &&
Kai Huang843e4332015-01-28 10:54:28 +08009010 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
9011 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
9012 GUEST_INTR_STATE_NMI);
9013
9014 /*
9015 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
9016 * here.., and there's no userspace involvement needed for PML.
9017 */
9018 return 1;
9019}
9020
Yunhong Jiang64672c92016-06-13 14:19:59 -07009021static int handle_preemption_timer(struct kvm_vcpu *vcpu)
9022{
9023 kvm_lapic_expired_hv_timer(vcpu);
9024 return 1;
9025}
9026
Bandan Das41ab9372017-08-03 15:54:43 -04009027static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
9028{
9029 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das41ab9372017-08-03 15:54:43 -04009030 int maxphyaddr = cpuid_maxphyaddr(vcpu);
9031
9032 /* Check for memory type validity */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009033 switch (address & VMX_EPTP_MT_MASK) {
9034 case VMX_EPTP_MT_UC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009035 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009036 return false;
9037 break;
David Hildenbrandbb97a012017-08-10 23:15:28 +02009038 case VMX_EPTP_MT_WB:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009039 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009040 return false;
9041 break;
9042 default:
9043 return false;
9044 }
9045
David Hildenbrandbb97a012017-08-10 23:15:28 +02009046 /* only 4 levels page-walk length are valid */
9047 if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
Bandan Das41ab9372017-08-03 15:54:43 -04009048 return false;
9049
9050 /* Reserved bits should not be set */
9051 if (address >> maxphyaddr || ((address >> 7) & 0x1f))
9052 return false;
9053
9054 /* AD, if set, should be supported */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009055 if (address & VMX_EPTP_AD_ENABLE_BIT) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009056 if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009057 return false;
9058 }
9059
9060 return true;
9061}
9062
9063static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
9064 struct vmcs12 *vmcs12)
9065{
9066 u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
9067 u64 address;
9068 bool accessed_dirty;
9069 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
9070
9071 if (!nested_cpu_has_eptp_switching(vmcs12) ||
9072 !nested_cpu_has_ept(vmcs12))
9073 return 1;
9074
9075 if (index >= VMFUNC_EPTP_ENTRIES)
9076 return 1;
9077
9078
9079 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
9080 &address, index * 8, 8))
9081 return 1;
9082
David Hildenbrandbb97a012017-08-10 23:15:28 +02009083 accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
Bandan Das41ab9372017-08-03 15:54:43 -04009084
9085 /*
9086 * If the (L2) guest does a vmfunc to the currently
9087 * active ept pointer, we don't have to do anything else
9088 */
9089 if (vmcs12->ept_pointer != address) {
9090 if (!valid_ept_address(vcpu, address))
9091 return 1;
9092
9093 kvm_mmu_unload(vcpu);
9094 mmu->ept_ad = accessed_dirty;
9095 mmu->base_role.ad_disabled = !accessed_dirty;
9096 vmcs12->ept_pointer = address;
9097 /*
9098 * TODO: Check what's the correct approach in case
9099 * mmu reload fails. Currently, we just let the next
9100 * reload potentially fail
9101 */
9102 kvm_mmu_reload(vcpu);
9103 }
9104
9105 return 0;
9106}
9107
Bandan Das2a499e42017-08-03 15:54:41 -04009108static int handle_vmfunc(struct kvm_vcpu *vcpu)
9109{
Bandan Das27c42a12017-08-03 15:54:42 -04009110 struct vcpu_vmx *vmx = to_vmx(vcpu);
9111 struct vmcs12 *vmcs12;
9112 u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
9113
9114 /*
9115 * VMFUNC is only supported for nested guests, but we always enable the
9116 * secondary control for simplicity; for non-nested mode, fake that we
9117 * didn't by injecting #UD.
9118 */
9119 if (!is_guest_mode(vcpu)) {
9120 kvm_queue_exception(vcpu, UD_VECTOR);
9121 return 1;
9122 }
9123
9124 vmcs12 = get_vmcs12(vcpu);
9125 if ((vmcs12->vm_function_control & (1 << function)) == 0)
9126 goto fail;
Bandan Das41ab9372017-08-03 15:54:43 -04009127
9128 switch (function) {
9129 case 0:
9130 if (nested_vmx_eptp_switching(vcpu, vmcs12))
9131 goto fail;
9132 break;
9133 default:
9134 goto fail;
9135 }
9136 return kvm_skip_emulated_instruction(vcpu);
Bandan Das27c42a12017-08-03 15:54:42 -04009137
9138fail:
9139 nested_vmx_vmexit(vcpu, vmx->exit_reason,
9140 vmcs_read32(VM_EXIT_INTR_INFO),
9141 vmcs_readl(EXIT_QUALIFICATION));
Bandan Das2a499e42017-08-03 15:54:41 -04009142 return 1;
9143}
9144
Nadav Har'El0140cae2011-05-25 23:06:28 +03009145/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08009146 * The exit handlers return 1 if the exit was handled fully and guest execution
9147 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
9148 * to be done to userspace and return 0.
9149 */
Mathias Krause772e0312012-08-30 01:30:19 +02009150static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
Avi Kivity6aa8b732006-12-10 02:21:36 -08009151 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
9152 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
Avi Kivity988ad742007-02-12 00:54:36 -08009153 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
Sheng Yangf08864b2008-05-15 18:23:25 +08009154 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009155 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009156 [EXIT_REASON_CR_ACCESS] = handle_cr,
9157 [EXIT_REASON_DR_ACCESS] = handle_dr,
9158 [EXIT_REASON_CPUID] = handle_cpuid,
9159 [EXIT_REASON_MSR_READ] = handle_rdmsr,
9160 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
9161 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
9162 [EXIT_REASON_HLT] = handle_halt,
Gleb Natapovec25d5e2010-11-01 15:35:01 +02009163 [EXIT_REASON_INVD] = handle_invd,
Marcelo Tosattia7052892008-09-23 13:18:35 -03009164 [EXIT_REASON_INVLPG] = handle_invlpg,
Avi Kivityfee84b02011-11-10 14:57:25 +02009165 [EXIT_REASON_RDPMC] = handle_rdpmc,
Ingo Molnarc21415e2007-02-19 14:37:47 +02009166 [EXIT_REASON_VMCALL] = handle_vmcall,
Nadav Har'El27d6c862011-05-25 23:06:59 +03009167 [EXIT_REASON_VMCLEAR] = handle_vmclear,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009168 [EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
Nadav Har'El63846662011-05-25 23:07:29 +03009169 [EXIT_REASON_VMPTRLD] = handle_vmptrld,
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009170 [EXIT_REASON_VMPTRST] = handle_vmptrst,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009171 [EXIT_REASON_VMREAD] = handle_vmread,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009172 [EXIT_REASON_VMRESUME] = handle_vmresume,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009173 [EXIT_REASON_VMWRITE] = handle_vmwrite,
Nadav Har'Elec378ae2011-05-25 23:02:54 +03009174 [EXIT_REASON_VMOFF] = handle_vmoff,
9175 [EXIT_REASON_VMON] = handle_vmon,
Sheng Yangf78e0e22007-10-29 09:40:42 +08009176 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
9177 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
Yang Zhang83d4c282013-01-25 10:18:49 +08009178 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
Yang Zhangc7c9c562013-01-25 10:18:51 +08009179 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
Eddie Donge5edaa02007-11-11 12:28:35 +02009180 [EXIT_REASON_WBINVD] = handle_wbinvd,
Dexuan Cui2acf9232010-06-10 11:27:12 +08009181 [EXIT_REASON_XSETBV] = handle_xsetbv,
Izik Eidus37817f22008-03-24 23:14:53 +02009182 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
Andi Kleena0861c02009-06-08 17:37:09 +08009183 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
Paolo Bonzini0367f202016-07-12 10:44:55 +02009184 [EXIT_REASON_GDTR_IDTR] = handle_desc,
9185 [EXIT_REASON_LDTR_TR] = handle_desc,
Marcelo Tosatti68f89402009-06-11 12:07:43 -03009186 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
9187 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08009188 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009189 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03009190 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009191 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009192 [EXIT_REASON_INVEPT] = handle_invept,
Petr Matouseka642fc32014-09-23 20:22:30 +02009193 [EXIT_REASON_INVVPID] = handle_invvpid,
Jim Mattson45ec3682017-08-23 16:32:04 -07009194 [EXIT_REASON_RDRAND] = handle_invalid_op,
Jim Mattson75f4fc82017-08-23 16:32:03 -07009195 [EXIT_REASON_RDSEED] = handle_invalid_op,
Wanpeng Lif53cd632014-12-02 19:14:58 +08009196 [EXIT_REASON_XSAVES] = handle_xsaves,
9197 [EXIT_REASON_XRSTORS] = handle_xrstors,
Kai Huang843e4332015-01-28 10:54:28 +08009198 [EXIT_REASON_PML_FULL] = handle_pml_full,
Junaid Shahideb4b2482018-06-27 14:59:14 -07009199 [EXIT_REASON_INVPCID] = handle_invpcid,
Bandan Das2a499e42017-08-03 15:54:41 -04009200 [EXIT_REASON_VMFUNC] = handle_vmfunc,
Yunhong Jiang64672c92016-06-13 14:19:59 -07009201 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009202};
9203
9204static const int kvm_vmx_max_exit_handlers =
Robert P. J. Day50a34852007-06-03 13:35:29 -04009205 ARRAY_SIZE(kvm_vmx_exit_handlers);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009206
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009207static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
9208 struct vmcs12 *vmcs12)
9209{
9210 unsigned long exit_qualification;
9211 gpa_t bitmap, last_bitmap;
9212 unsigned int port;
9213 int size;
9214 u8 b;
9215
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009216 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
Zhihui Zhang2f0a6392013-12-30 15:56:29 -05009217 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009218
9219 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9220
9221 port = exit_qualification >> 16;
9222 size = (exit_qualification & 7) + 1;
9223
9224 last_bitmap = (gpa_t)-1;
9225 b = -1;
9226
9227 while (size > 0) {
9228 if (port < 0x8000)
9229 bitmap = vmcs12->io_bitmap_a;
9230 else if (port < 0x10000)
9231 bitmap = vmcs12->io_bitmap_b;
9232 else
Joe Perches1d804d02015-03-30 16:46:09 -07009233 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009234 bitmap += (port & 0x7fff) / 8;
9235
9236 if (last_bitmap != bitmap)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009237 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009238 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009239 if (b & (1 << (port & 7)))
Joe Perches1d804d02015-03-30 16:46:09 -07009240 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009241
9242 port++;
9243 size--;
9244 last_bitmap = bitmap;
9245 }
9246
Joe Perches1d804d02015-03-30 16:46:09 -07009247 return false;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009248}
9249
Nadav Har'El644d7112011-05-25 23:12:35 +03009250/*
9251 * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
9252 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
9253 * disinterest in the current event (read or write a specific MSR) by using an
9254 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
9255 */
9256static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
9257 struct vmcs12 *vmcs12, u32 exit_reason)
9258{
9259 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
9260 gpa_t bitmap;
9261
Jan Kiszkacbd29cb2013-02-11 12:19:28 +01009262 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
Joe Perches1d804d02015-03-30 16:46:09 -07009263 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009264
9265 /*
9266 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
9267 * for the four combinations of read/write and low/high MSR numbers.
9268 * First we need to figure out which of the four to use:
9269 */
9270 bitmap = vmcs12->msr_bitmap;
9271 if (exit_reason == EXIT_REASON_MSR_WRITE)
9272 bitmap += 2048;
9273 if (msr_index >= 0xc0000000) {
9274 msr_index -= 0xc0000000;
9275 bitmap += 1024;
9276 }
9277
9278 /* Then read the msr_index'th bit from this bitmap: */
9279 if (msr_index < 1024*8) {
9280 unsigned char b;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009281 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009282 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009283 return 1 & (b >> (msr_index & 7));
9284 } else
Joe Perches1d804d02015-03-30 16:46:09 -07009285 return true; /* let L1 handle the wrong parameter */
Nadav Har'El644d7112011-05-25 23:12:35 +03009286}
9287
9288/*
9289 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
9290 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
9291 * intercept (via guest_host_mask etc.) the current event.
9292 */
9293static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
9294 struct vmcs12 *vmcs12)
9295{
9296 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9297 int cr = exit_qualification & 15;
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009298 int reg;
9299 unsigned long val;
Nadav Har'El644d7112011-05-25 23:12:35 +03009300
9301 switch ((exit_qualification >> 4) & 3) {
9302 case 0: /* mov to cr */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009303 reg = (exit_qualification >> 8) & 15;
9304 val = kvm_register_readl(vcpu, reg);
Nadav Har'El644d7112011-05-25 23:12:35 +03009305 switch (cr) {
9306 case 0:
9307 if (vmcs12->cr0_guest_host_mask &
9308 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07009309 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009310 break;
9311 case 3:
9312 if ((vmcs12->cr3_target_count >= 1 &&
9313 vmcs12->cr3_target_value0 == val) ||
9314 (vmcs12->cr3_target_count >= 2 &&
9315 vmcs12->cr3_target_value1 == val) ||
9316 (vmcs12->cr3_target_count >= 3 &&
9317 vmcs12->cr3_target_value2 == val) ||
9318 (vmcs12->cr3_target_count >= 4 &&
9319 vmcs12->cr3_target_value3 == val))
Joe Perches1d804d02015-03-30 16:46:09 -07009320 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009321 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07009322 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009323 break;
9324 case 4:
9325 if (vmcs12->cr4_guest_host_mask &
9326 (vmcs12->cr4_read_shadow ^ val))
Joe Perches1d804d02015-03-30 16:46:09 -07009327 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009328 break;
9329 case 8:
9330 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07009331 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009332 break;
9333 }
9334 break;
9335 case 2: /* clts */
9336 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
9337 (vmcs12->cr0_read_shadow & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -07009338 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009339 break;
9340 case 1: /* mov from cr */
9341 switch (cr) {
9342 case 3:
9343 if (vmcs12->cpu_based_vm_exec_control &
9344 CPU_BASED_CR3_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07009345 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009346 break;
9347 case 8:
9348 if (vmcs12->cpu_based_vm_exec_control &
9349 CPU_BASED_CR8_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07009350 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009351 break;
9352 }
9353 break;
9354 case 3: /* lmsw */
9355 /*
9356 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
9357 * cr0. Other attempted changes are ignored, with no exit.
9358 */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009359 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Nadav Har'El644d7112011-05-25 23:12:35 +03009360 if (vmcs12->cr0_guest_host_mask & 0xe &
9361 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07009362 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009363 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
9364 !(vmcs12->cr0_read_shadow & 0x1) &&
9365 (val & 0x1))
Joe Perches1d804d02015-03-30 16:46:09 -07009366 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009367 break;
9368 }
Joe Perches1d804d02015-03-30 16:46:09 -07009369 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009370}
9371
Liran Alona7cde482018-06-23 02:35:10 +03009372static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
9373 struct vmcs12 *vmcs12, gpa_t bitmap)
9374{
9375 u32 vmx_instruction_info;
9376 unsigned long field;
9377 u8 b;
9378
9379 if (!nested_cpu_has_shadow_vmcs(vmcs12))
9380 return true;
9381
9382 /* Decode instruction info and find the field to access */
9383 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9384 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
9385
9386 /* Out-of-range fields always cause a VM exit from L2 to L1 */
9387 if (field >> 15)
9388 return true;
9389
9390 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
9391 return true;
9392
9393 return 1 & (b >> (field & 7));
9394}
9395
Nadav Har'El644d7112011-05-25 23:12:35 +03009396/*
9397 * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
9398 * should handle it ourselves in L0 (and then continue L2). Only call this
9399 * when in is_guest_mode (L2).
9400 */
Paolo Bonzini7313c692017-07-27 10:31:25 +02009401static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
Nadav Har'El644d7112011-05-25 23:12:35 +03009402{
Nadav Har'El644d7112011-05-25 23:12:35 +03009403 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9404 struct vcpu_vmx *vmx = to_vmx(vcpu);
9405 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9406
Jim Mattson4f350c62017-09-14 16:31:44 -07009407 if (vmx->nested.nested_run_pending)
9408 return false;
9409
9410 if (unlikely(vmx->fail)) {
9411 pr_info_ratelimited("%s failed vm entry %x\n", __func__,
9412 vmcs_read32(VM_INSTRUCTION_ERROR));
9413 return true;
9414 }
Jan Kiszka542060e2014-01-04 18:47:21 +01009415
David Matlackc9f04402017-08-01 14:00:40 -07009416 /*
9417 * The host physical addresses of some pages of guest memory
Jim Mattsonde3a0022017-11-27 17:22:25 -06009418 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
9419 * Page). The CPU may write to these pages via their host
9420 * physical address while L2 is running, bypassing any
9421 * address-translation-based dirty tracking (e.g. EPT write
9422 * protection).
David Matlackc9f04402017-08-01 14:00:40 -07009423 *
9424 * Mark them dirty on every exit from L2 to prevent them from
9425 * getting out of sync with dirty tracking.
9426 */
9427 nested_mark_vmcs12_pages_dirty(vcpu);
9428
Jim Mattson4f350c62017-09-14 16:31:44 -07009429 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
9430 vmcs_readl(EXIT_QUALIFICATION),
9431 vmx->idt_vectoring_info,
9432 intr_info,
9433 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
9434 KVM_ISA_VMX);
Nadav Har'El644d7112011-05-25 23:12:35 +03009435
9436 switch (exit_reason) {
9437 case EXIT_REASON_EXCEPTION_NMI:
Jim Mattsonef85b672016-12-12 11:01:37 -08009438 if (is_nmi(intr_info))
Joe Perches1d804d02015-03-30 16:46:09 -07009439 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009440 else if (is_page_fault(intr_info))
Wanpeng Li52a5c152017-07-13 18:30:42 -07009441 return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
Anthoine Bourgeoise504c902013-11-13 11:45:37 +01009442 else if (is_no_device(intr_info) &&
Paolo Bonziniccf98442014-02-27 22:54:11 +01009443 !(vmcs12->guest_cr0 & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -07009444 return false;
Jan Kiszka6f054852016-02-09 20:15:18 +01009445 else if (is_debug(intr_info) &&
9446 vcpu->guest_debug &
9447 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
9448 return false;
9449 else if (is_breakpoint(intr_info) &&
9450 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
9451 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009452 return vmcs12->exception_bitmap &
9453 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
9454 case EXIT_REASON_EXTERNAL_INTERRUPT:
Joe Perches1d804d02015-03-30 16:46:09 -07009455 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009456 case EXIT_REASON_TRIPLE_FAULT:
Joe Perches1d804d02015-03-30 16:46:09 -07009457 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009458 case EXIT_REASON_PENDING_INTERRUPT:
Jan Kiszka3b656cf2013-04-14 12:12:45 +02009459 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +03009460 case EXIT_REASON_NMI_WINDOW:
Jan Kiszka3b656cf2013-04-14 12:12:45 +02009461 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +03009462 case EXIT_REASON_TASK_SWITCH:
Joe Perches1d804d02015-03-30 16:46:09 -07009463 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009464 case EXIT_REASON_CPUID:
Joe Perches1d804d02015-03-30 16:46:09 -07009465 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009466 case EXIT_REASON_HLT:
9467 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
9468 case EXIT_REASON_INVD:
Joe Perches1d804d02015-03-30 16:46:09 -07009469 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009470 case EXIT_REASON_INVLPG:
9471 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
9472 case EXIT_REASON_RDPMC:
9473 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +02009474 case EXIT_REASON_RDRAND:
David Hildenbrand736fdf72017-08-24 20:51:37 +02009475 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +02009476 case EXIT_REASON_RDSEED:
David Hildenbrand736fdf72017-08-24 20:51:37 +02009477 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
Jan Kiszkab3a2a902015-03-23 19:27:19 +01009478 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
Nadav Har'El644d7112011-05-25 23:12:35 +03009479 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
Liran Alona7cde482018-06-23 02:35:10 +03009480 case EXIT_REASON_VMREAD:
9481 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
9482 vmcs12->vmread_bitmap);
9483 case EXIT_REASON_VMWRITE:
9484 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
9485 vmcs12->vmwrite_bitmap);
Nadav Har'El644d7112011-05-25 23:12:35 +03009486 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
9487 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
Liran Alona7cde482018-06-23 02:35:10 +03009488 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
Nadav Har'El644d7112011-05-25 23:12:35 +03009489 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
Petr Matouseka642fc32014-09-23 20:22:30 +02009490 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
Nadav Har'El644d7112011-05-25 23:12:35 +03009491 /*
9492 * VMX instructions trap unconditionally. This allows L1 to
9493 * emulate them for its L2 guest, i.e., allows 3-level nesting!
9494 */
Joe Perches1d804d02015-03-30 16:46:09 -07009495 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009496 case EXIT_REASON_CR_ACCESS:
9497 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
9498 case EXIT_REASON_DR_ACCESS:
9499 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
9500 case EXIT_REASON_IO_INSTRUCTION:
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009501 return nested_vmx_exit_handled_io(vcpu, vmcs12);
Paolo Bonzini1b073042016-10-25 16:06:30 +02009502 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
9503 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
Nadav Har'El644d7112011-05-25 23:12:35 +03009504 case EXIT_REASON_MSR_READ:
9505 case EXIT_REASON_MSR_WRITE:
9506 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
9507 case EXIT_REASON_INVALID_STATE:
Joe Perches1d804d02015-03-30 16:46:09 -07009508 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009509 case EXIT_REASON_MWAIT_INSTRUCTION:
9510 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03009511 case EXIT_REASON_MONITOR_TRAP_FLAG:
9512 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
Nadav Har'El644d7112011-05-25 23:12:35 +03009513 case EXIT_REASON_MONITOR_INSTRUCTION:
9514 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
9515 case EXIT_REASON_PAUSE_INSTRUCTION:
9516 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
9517 nested_cpu_has2(vmcs12,
9518 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
9519 case EXIT_REASON_MCE_DURING_VMENTRY:
Joe Perches1d804d02015-03-30 16:46:09 -07009520 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009521 case EXIT_REASON_TPR_BELOW_THRESHOLD:
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009522 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
Nadav Har'El644d7112011-05-25 23:12:35 +03009523 case EXIT_REASON_APIC_ACCESS:
Wincy Van82f0dd42015-02-03 23:57:18 +08009524 case EXIT_REASON_APIC_WRITE:
Wincy Van608406e2015-02-03 23:57:51 +08009525 case EXIT_REASON_EOI_INDUCED:
Jim Mattsonab5df312018-05-09 17:02:03 -04009526 /*
9527 * The controls for "virtualize APIC accesses," "APIC-
9528 * register virtualization," and "virtual-interrupt
9529 * delivery" only come from vmcs12.
9530 */
Joe Perches1d804d02015-03-30 16:46:09 -07009531 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009532 case EXIT_REASON_EPT_VIOLATION:
Nadav Har'El2b1be672013-08-05 11:07:19 +03009533 /*
9534 * L0 always deals with the EPT violation. If nested EPT is
9535 * used, and the nested mmu code discovers that the address is
9536 * missing in the guest EPT table (EPT12), the EPT violation
9537 * will be injected with nested_ept_inject_page_fault()
9538 */
Joe Perches1d804d02015-03-30 16:46:09 -07009539 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009540 case EXIT_REASON_EPT_MISCONFIG:
Nadav Har'El2b1be672013-08-05 11:07:19 +03009541 /*
9542 * L2 never uses directly L1's EPT, but rather L0's own EPT
9543 * table (shadow on EPT) or a merged EPT table that L0 built
9544 * (EPT on EPT). So any problems with the structure of the
9545 * table is L0's fault.
9546 */
Joe Perches1d804d02015-03-30 16:46:09 -07009547 return false;
Paolo Bonzini90a2db62017-07-27 13:22:13 +02009548 case EXIT_REASON_INVPCID:
9549 return
9550 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
9551 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
Nadav Har'El644d7112011-05-25 23:12:35 +03009552 case EXIT_REASON_WBINVD:
9553 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
9554 case EXIT_REASON_XSETBV:
Joe Perches1d804d02015-03-30 16:46:09 -07009555 return true;
Wanpeng Li81dc01f2014-12-04 19:11:07 +08009556 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
9557 /*
9558 * This should never happen, since it is not possible to
9559 * set XSS to a non-zero value---neither in L1 nor in L2.
9560 * If if it were, XSS would have to be checked against
9561 * the XSS exit bitmap in vmcs12.
9562 */
9563 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li55123e32016-07-06 18:29:58 +08009564 case EXIT_REASON_PREEMPTION_TIMER:
9565 return false;
Ladi Prosekab007cc2017-03-31 10:19:26 +02009566 case EXIT_REASON_PML_FULL:
Bandan Das03efce62017-05-05 15:25:15 -04009567 /* We emulate PML support to L1. */
Ladi Prosekab007cc2017-03-31 10:19:26 +02009568 return false;
Bandan Das2a499e42017-08-03 15:54:41 -04009569 case EXIT_REASON_VMFUNC:
9570 /* VM functions are emulated through L2->L0 vmexits. */
9571 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03009572 default:
Joe Perches1d804d02015-03-30 16:46:09 -07009573 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009574 }
9575}
9576
Paolo Bonzini7313c692017-07-27 10:31:25 +02009577static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
9578{
9579 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9580
9581 /*
9582 * At this point, the exit interruption info in exit_intr_info
9583 * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
9584 * we need to query the in-kernel LAPIC.
9585 */
9586 WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
9587 if ((exit_intr_info &
9588 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
9589 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
9590 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9591 vmcs12->vm_exit_intr_error_code =
9592 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
9593 }
9594
9595 nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
9596 vmcs_readl(EXIT_QUALIFICATION));
9597 return 1;
9598}
9599
Avi Kivity586f9602010-11-18 13:09:54 +02009600static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
9601{
9602 *info1 = vmcs_readl(EXIT_QUALIFICATION);
9603 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
9604}
9605
Kai Huanga3eaa862015-11-04 13:46:05 +08009606static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
Kai Huang843e4332015-01-28 10:54:28 +08009607{
Kai Huanga3eaa862015-11-04 13:46:05 +08009608 if (vmx->pml_pg) {
9609 __free_page(vmx->pml_pg);
9610 vmx->pml_pg = NULL;
9611 }
Kai Huang843e4332015-01-28 10:54:28 +08009612}
9613
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009614static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
Kai Huang843e4332015-01-28 10:54:28 +08009615{
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009616 struct vcpu_vmx *vmx = to_vmx(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +08009617 u64 *pml_buf;
9618 u16 pml_idx;
9619
9620 pml_idx = vmcs_read16(GUEST_PML_INDEX);
9621
9622 /* Do nothing if PML buffer is empty */
9623 if (pml_idx == (PML_ENTITY_NUM - 1))
9624 return;
9625
9626 /* PML index always points to next available PML buffer entity */
9627 if (pml_idx >= PML_ENTITY_NUM)
9628 pml_idx = 0;
9629 else
9630 pml_idx++;
9631
9632 pml_buf = page_address(vmx->pml_pg);
9633 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
9634 u64 gpa;
9635
9636 gpa = pml_buf[pml_idx];
9637 WARN_ON(gpa & (PAGE_SIZE - 1));
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009638 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
Kai Huang843e4332015-01-28 10:54:28 +08009639 }
9640
9641 /* reset PML index */
9642 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
9643}
9644
9645/*
9646 * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
9647 * Called before reporting dirty_bitmap to userspace.
9648 */
9649static void kvm_flush_pml_buffers(struct kvm *kvm)
9650{
9651 int i;
9652 struct kvm_vcpu *vcpu;
9653 /*
9654 * We only need to kick vcpu out of guest mode here, as PML buffer
9655 * is flushed at beginning of all VMEXITs, and it's obvious that only
9656 * vcpus running in guest are possible to have unflushed GPAs in PML
9657 * buffer.
9658 */
9659 kvm_for_each_vcpu(i, vcpu, kvm)
9660 kvm_vcpu_kick(vcpu);
9661}
9662
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009663static void vmx_dump_sel(char *name, uint32_t sel)
9664{
9665 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
Chao Peng96794e42017-02-21 03:50:01 -05009666 name, vmcs_read16(sel),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009667 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
9668 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
9669 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
9670}
9671
9672static void vmx_dump_dtsel(char *name, uint32_t limit)
9673{
9674 pr_err("%s limit=0x%08x, base=0x%016lx\n",
9675 name, vmcs_read32(limit),
9676 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
9677}
9678
9679static void dump_vmcs(void)
9680{
9681 u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
9682 u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
9683 u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
9684 u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
9685 u32 secondary_exec_control = 0;
9686 unsigned long cr4 = vmcs_readl(GUEST_CR4);
Paolo Bonzinif3531052015-12-03 15:49:56 +01009687 u64 efer = vmcs_read64(GUEST_IA32_EFER);
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009688 int i, n;
9689
9690 if (cpu_has_secondary_exec_ctrls())
9691 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
9692
9693 pr_err("*** Guest State ***\n");
9694 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
9695 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
9696 vmcs_readl(CR0_GUEST_HOST_MASK));
9697 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
9698 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
9699 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
9700 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
9701 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
9702 {
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009703 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
9704 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
9705 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
9706 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009707 }
9708 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
9709 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
9710 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
9711 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
9712 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
9713 vmcs_readl(GUEST_SYSENTER_ESP),
9714 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
9715 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
9716 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
9717 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
9718 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
9719 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
9720 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
9721 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
9722 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
9723 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
9724 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
9725 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
9726 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009727 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
9728 efer, vmcs_read64(GUEST_IA32_PAT));
9729 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
9730 vmcs_read64(GUEST_IA32_DEBUGCTL),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009731 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01009732 if (cpu_has_load_perf_global_ctrl &&
9733 vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009734 pr_err("PerfGlobCtl = 0x%016llx\n",
9735 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009736 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009737 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009738 pr_err("Interruptibility = %08x ActivityState = %08x\n",
9739 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
9740 vmcs_read32(GUEST_ACTIVITY_STATE));
9741 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
9742 pr_err("InterruptStatus = %04x\n",
9743 vmcs_read16(GUEST_INTR_STATUS));
9744
9745 pr_err("*** Host State ***\n");
9746 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
9747 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
9748 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
9749 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
9750 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
9751 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
9752 vmcs_read16(HOST_TR_SELECTOR));
9753 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
9754 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
9755 vmcs_readl(HOST_TR_BASE));
9756 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
9757 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
9758 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
9759 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
9760 vmcs_readl(HOST_CR4));
9761 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
9762 vmcs_readl(HOST_IA32_SYSENTER_ESP),
9763 vmcs_read32(HOST_IA32_SYSENTER_CS),
9764 vmcs_readl(HOST_IA32_SYSENTER_EIP));
9765 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009766 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
9767 vmcs_read64(HOST_IA32_EFER),
9768 vmcs_read64(HOST_IA32_PAT));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01009769 if (cpu_has_load_perf_global_ctrl &&
9770 vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009771 pr_err("PerfGlobCtl = 0x%016llx\n",
9772 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009773
9774 pr_err("*** Control State ***\n");
9775 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
9776 pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
9777 pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
9778 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
9779 vmcs_read32(EXCEPTION_BITMAP),
9780 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
9781 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
9782 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
9783 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
9784 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
9785 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
9786 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
9787 vmcs_read32(VM_EXIT_INTR_INFO),
9788 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
9789 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
9790 pr_err(" reason=%08x qualification=%016lx\n",
9791 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
9792 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
9793 vmcs_read32(IDT_VECTORING_INFO_FIELD),
9794 vmcs_read32(IDT_VECTORING_ERROR_CODE));
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009795 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
Haozhong Zhang8cfe9862015-10-20 15:39:12 +08009796 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009797 pr_err("TSC Multiplier = 0x%016llx\n",
9798 vmcs_read64(TSC_MULTIPLIER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009799 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
9800 pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
9801 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
9802 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
9803 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01009804 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009805 n = vmcs_read32(CR3_TARGET_COUNT);
9806 for (i = 0; i + 1 < n; i += 4)
9807 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
9808 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
9809 i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
9810 if (i < n)
9811 pr_err("CR3 target%u=%016lx\n",
9812 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
9813 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
9814 pr_err("PLE Gap=%08x Window=%08x\n",
9815 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
9816 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
9817 pr_err("Virtual processor ID = 0x%04x\n",
9818 vmcs_read16(VIRTUAL_PROCESSOR_ID));
9819}
9820
Avi Kivity6aa8b732006-12-10 02:21:36 -08009821/*
9822 * The guest has exited. See if we can fix it or if we need userspace
9823 * assistance.
9824 */
Avi Kivity851ba692009-08-24 11:10:17 +03009825static int vmx_handle_exit(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08009826{
Avi Kivity29bd8a72007-09-10 17:27:03 +03009827 struct vcpu_vmx *vmx = to_vmx(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08009828 u32 exit_reason = vmx->exit_reason;
Avi Kivity1155f762007-11-22 11:30:47 +02009829 u32 vectoring_info = vmx->idt_vectoring_info;
Avi Kivity29bd8a72007-09-10 17:27:03 +03009830
Paolo Bonzini8b89fe12015-12-10 18:37:32 +01009831 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
9832
Kai Huang843e4332015-01-28 10:54:28 +08009833 /*
9834 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
9835 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
9836 * querying dirty_bitmap, we only need to kick all vcpus out of guest
9837 * mode as if vcpus is in root mode, the PML buffer must has been
9838 * flushed already.
9839 */
9840 if (enable_pml)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009841 vmx_flush_pml_buffer(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +08009842
Mohammed Gamal80ced182009-09-01 12:48:18 +02009843 /* If guest state is invalid, start emulating */
Gleb Natapov14168782013-01-21 15:36:49 +02009844 if (vmx->emulation_required)
Mohammed Gamal80ced182009-09-01 12:48:18 +02009845 return handle_invalid_guest_state(vcpu);
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01009846
Paolo Bonzini7313c692017-07-27 10:31:25 +02009847 if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
9848 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
Nadav Har'El644d7112011-05-25 23:12:35 +03009849
Mohammed Gamal51207022010-05-31 22:40:54 +03009850 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02009851 dump_vmcs();
Mohammed Gamal51207022010-05-31 22:40:54 +03009852 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
9853 vcpu->run->fail_entry.hardware_entry_failure_reason
9854 = exit_reason;
9855 return 0;
9856 }
9857
Avi Kivity29bd8a72007-09-10 17:27:03 +03009858 if (unlikely(vmx->fail)) {
Avi Kivity851ba692009-08-24 11:10:17 +03009859 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
9860 vcpu->run->fail_entry.hardware_entry_failure_reason
Avi Kivity29bd8a72007-09-10 17:27:03 +03009861 = vmcs_read32(VM_INSTRUCTION_ERROR);
9862 return 0;
9863 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08009864
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08009865 /*
9866 * Note:
9867 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
9868 * delivery event since it indicates guest is accessing MMIO.
9869 * The vm-exit can be triggered again after return to guest that
9870 * will cause infinite loop.
9871 */
Mike Dayd77c26f2007-10-08 09:02:08 -04009872 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
Sheng Yang14394422008-04-28 12:24:45 +08009873 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
Jan Kiszka60637aa2008-09-26 09:30:47 +02009874 exit_reason != EXIT_REASON_EPT_VIOLATION &&
Cao, Leib244c9f2016-07-15 13:54:04 +00009875 exit_reason != EXIT_REASON_PML_FULL &&
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08009876 exit_reason != EXIT_REASON_TASK_SWITCH)) {
9877 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
9878 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
Paolo Bonzini70bcd702017-07-05 12:38:06 +02009879 vcpu->run->internal.ndata = 3;
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08009880 vcpu->run->internal.data[0] = vectoring_info;
9881 vcpu->run->internal.data[1] = exit_reason;
Paolo Bonzini70bcd702017-07-05 12:38:06 +02009882 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
9883 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
9884 vcpu->run->internal.ndata++;
9885 vcpu->run->internal.data[3] =
9886 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
9887 }
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08009888 return 0;
9889 }
Jan Kiszka3b86cd92008-09-26 09:30:57 +02009890
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009891 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01009892 vmx->loaded_vmcs->soft_vnmi_blocked)) {
9893 if (vmx_interrupt_allowed(vcpu)) {
9894 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
9895 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
9896 vcpu->arch.nmi_pending) {
9897 /*
9898 * This CPU don't support us in finding the end of an
9899 * NMI-blocked window if the guest runs with IRQs
9900 * disabled. So we pull the trigger after 1 s of
9901 * futile waiting, but inform the user about this.
9902 */
9903 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
9904 "state on VCPU %d after 1 s timeout\n",
9905 __func__, vcpu->vcpu_id);
9906 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
9907 }
9908 }
9909
Avi Kivity6aa8b732006-12-10 02:21:36 -08009910 if (exit_reason < kvm_vmx_max_exit_handlers
9911 && kvm_vmx_exit_handlers[exit_reason])
Avi Kivity851ba692009-08-24 11:10:17 +03009912 return kvm_vmx_exit_handlers[exit_reason](vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009913 else {
Radim Krčmář6c6c5e02017-01-13 18:59:04 +01009914 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
9915 exit_reason);
Michael S. Tsirkin2bc19dc2014-09-18 16:21:16 +03009916 kvm_queue_exception(vcpu, UD_VECTOR);
9917 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08009918 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08009919}
9920
Gleb Natapov95ba8273132009-04-21 17:45:08 +03009921static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08009922{
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009923 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9924
9925 if (is_guest_mode(vcpu) &&
9926 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
9927 return;
9928
Gleb Natapov95ba8273132009-04-21 17:45:08 +03009929 if (irr == -1 || tpr < irr) {
Yang, Sheng6e5d8652007-09-12 18:03:11 +08009930 vmcs_write32(TPR_THRESHOLD, 0);
9931 return;
9932 }
9933
Gleb Natapov95ba8273132009-04-21 17:45:08 +03009934 vmcs_write32(TPR_THRESHOLD, irr);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08009935}
9936
Jim Mattson8d860bb2018-05-09 16:56:05 -04009937static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
Yang Zhang8d146952013-01-25 10:18:50 +08009938{
9939 u32 sec_exec_control;
9940
Jim Mattson8d860bb2018-05-09 16:56:05 -04009941 if (!lapic_in_kernel(vcpu))
9942 return;
9943
Radim Krčmářdccbfcf2016-08-08 20:16:23 +02009944 /* Postpone execution until vmcs01 is the current VMCS. */
9945 if (is_guest_mode(vcpu)) {
Jim Mattson8d860bb2018-05-09 16:56:05 -04009946 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
Radim Krčmářdccbfcf2016-08-08 20:16:23 +02009947 return;
9948 }
9949
Paolo Bonzini35754c92015-07-29 12:05:37 +02009950 if (!cpu_need_tpr_shadow(vcpu))
Yang Zhang8d146952013-01-25 10:18:50 +08009951 return;
9952
9953 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
Jim Mattson8d860bb2018-05-09 16:56:05 -04009954 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
9955 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
Yang Zhang8d146952013-01-25 10:18:50 +08009956
Jim Mattson8d860bb2018-05-09 16:56:05 -04009957 switch (kvm_get_apic_mode(vcpu)) {
9958 case LAPIC_MODE_INVALID:
9959 WARN_ONCE(true, "Invalid local APIC state");
9960 case LAPIC_MODE_DISABLED:
9961 break;
9962 case LAPIC_MODE_XAPIC:
9963 if (flexpriority_enabled) {
9964 sec_exec_control |=
9965 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
9966 vmx_flush_tlb(vcpu, true);
9967 }
9968 break;
9969 case LAPIC_MODE_X2APIC:
9970 if (cpu_has_vmx_virtualize_x2apic_mode())
9971 sec_exec_control |=
9972 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
9973 break;
Yang Zhang8d146952013-01-25 10:18:50 +08009974 }
9975 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
9976
Paolo Bonzini904e14f2018-01-16 16:51:18 +01009977 vmx_update_msr_bitmap(vcpu);
Yang Zhang8d146952013-01-25 10:18:50 +08009978}
9979
Tang Chen38b99172014-09-24 15:57:54 +08009980static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
9981{
Jim Mattsonab5df312018-05-09 17:02:03 -04009982 if (!is_guest_mode(vcpu)) {
Tang Chen38b99172014-09-24 15:57:54 +08009983 vmcs_write64(APIC_ACCESS_ADDR, hpa);
Junaid Shahida468f2d2018-04-26 13:09:50 -07009984 vmx_flush_tlb(vcpu, true);
Jim Mattsonfb6c8192017-03-16 13:53:59 -07009985 }
Tang Chen38b99172014-09-24 15:57:54 +08009986}
9987
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02009988static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
Yang Zhangc7c9c562013-01-25 10:18:51 +08009989{
9990 u16 status;
9991 u8 old;
9992
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02009993 if (max_isr == -1)
9994 max_isr = 0;
Yang Zhangc7c9c562013-01-25 10:18:51 +08009995
9996 status = vmcs_read16(GUEST_INTR_STATUS);
9997 old = status >> 8;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02009998 if (max_isr != old) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08009999 status &= 0xff;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010000 status |= max_isr << 8;
Yang Zhangc7c9c562013-01-25 10:18:51 +080010001 vmcs_write16(GUEST_INTR_STATUS, status);
10002 }
10003}
10004
10005static void vmx_set_rvi(int vector)
10006{
10007 u16 status;
10008 u8 old;
10009
Wei Wang4114c272014-11-05 10:53:43 +080010010 if (vector == -1)
10011 vector = 0;
10012
Yang Zhangc7c9c562013-01-25 10:18:51 +080010013 status = vmcs_read16(GUEST_INTR_STATUS);
10014 old = (u8)status & 0xff;
10015 if ((u8)vector != old) {
10016 status &= ~0xff;
10017 status |= (u8)vector;
10018 vmcs_write16(GUEST_INTR_STATUS, status);
10019 }
10020}
10021
10022static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
10023{
Liran Alon851c1a182017-12-24 18:12:56 +020010024 /*
10025 * When running L2, updating RVI is only relevant when
10026 * vmcs12 virtual-interrupt-delivery enabled.
10027 * However, it can be enabled only when L1 also
10028 * intercepts external-interrupts and in that case
10029 * we should not update vmcs02 RVI but instead intercept
10030 * interrupt. Therefore, do nothing when running L2.
10031 */
10032 if (!is_guest_mode(vcpu))
Wanpeng Li963fee12014-07-17 19:03:00 +080010033 vmx_set_rvi(max_irr);
Yang Zhangc7c9c562013-01-25 10:18:51 +080010034}
10035
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010036static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010037{
10038 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010039 int max_irr;
Liran Alonf27a85c2017-12-24 18:12:55 +020010040 bool max_irr_updated;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010041
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010042 WARN_ON(!vcpu->arch.apicv_active);
10043 if (pi_test_on(&vmx->pi_desc)) {
10044 pi_clear_on(&vmx->pi_desc);
10045 /*
10046 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
10047 * But on x86 this is just a compiler barrier anyway.
10048 */
10049 smp_mb__after_atomic();
Liran Alonf27a85c2017-12-24 18:12:55 +020010050 max_irr_updated =
10051 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
10052
10053 /*
10054 * If we are running L2 and L1 has a new pending interrupt
10055 * which can be injected, we should re-evaluate
10056 * what should be done with this new L1 interrupt.
Liran Alon851c1a182017-12-24 18:12:56 +020010057 * If L1 intercepts external-interrupts, we should
10058 * exit from L2 to L1. Otherwise, interrupt should be
10059 * delivered directly to L2.
Liran Alonf27a85c2017-12-24 18:12:55 +020010060 */
Liran Alon851c1a182017-12-24 18:12:56 +020010061 if (is_guest_mode(vcpu) && max_irr_updated) {
10062 if (nested_exit_on_intr(vcpu))
10063 kvm_vcpu_exiting_guest_mode(vcpu);
10064 else
10065 kvm_make_request(KVM_REQ_EVENT, vcpu);
10066 }
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010067 } else {
10068 max_irr = kvm_lapic_find_highest_irr(vcpu);
10069 }
10070 vmx_hwapic_irr_update(vcpu, max_irr);
10071 return max_irr;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010072}
10073
Andrey Smetanin63086302015-11-10 15:36:32 +030010074static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
Yang Zhangc7c9c562013-01-25 10:18:51 +080010075{
Andrey Smetanind62caab2015-11-10 15:36:33 +030010076 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhang3d81bc72013-04-11 19:25:13 +080010077 return;
10078
Yang Zhangc7c9c562013-01-25 10:18:51 +080010079 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
10080 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
10081 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
10082 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
10083}
10084
Paolo Bonzini967235d2016-12-19 14:03:45 +010010085static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
10086{
10087 struct vcpu_vmx *vmx = to_vmx(vcpu);
10088
10089 pi_clear_on(&vmx->pi_desc);
10090 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
10091}
10092
Avi Kivity51aa01d2010-07-20 14:31:20 +030010093static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
Avi Kivitycf393f72008-07-01 16:20:21 +030010094{
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010095 u32 exit_intr_info = 0;
10096 u16 basic_exit_reason = (u16)vmx->exit_reason;
Avi Kivity00eba012011-03-07 17:24:54 +020010097
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010098 if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
10099 || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
Avi Kivity00eba012011-03-07 17:24:54 +020010100 return;
10101
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010102 if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
10103 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10104 vmx->exit_intr_info = exit_intr_info;
Andi Kleena0861c02009-06-08 17:37:09 +080010105
Wanpeng Li1261bfa2017-07-13 18:30:40 -070010106 /* if exit due to PF check for async PF */
10107 if (is_page_fault(exit_intr_info))
10108 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
10109
Andi Kleena0861c02009-06-08 17:37:09 +080010110 /* Handle machine checks before interrupts are enabled */
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010111 if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
10112 is_machine_check(exit_intr_info))
Andi Kleena0861c02009-06-08 17:37:09 +080010113 kvm_machine_check();
10114
Gleb Natapov20f65982009-05-11 13:35:55 +030010115 /* We need to handle NMIs before interrupts are enabled */
Jim Mattsonef85b672016-12-12 11:01:37 -080010116 if (is_nmi(exit_intr_info)) {
Andi Kleendd60d212017-07-25 17:20:32 -070010117 kvm_before_interrupt(&vmx->vcpu);
Gleb Natapov20f65982009-05-11 13:35:55 +030010118 asm("int $2");
Andi Kleendd60d212017-07-25 17:20:32 -070010119 kvm_after_interrupt(&vmx->vcpu);
Zhang, Yanminff9d07a2010-04-19 13:32:45 +080010120 }
Avi Kivity51aa01d2010-07-20 14:31:20 +030010121}
Gleb Natapov20f65982009-05-11 13:35:55 +030010122
Yang Zhanga547c6d2013-04-11 19:25:10 +080010123static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
10124{
10125 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10126
Yang Zhanga547c6d2013-04-11 19:25:10 +080010127 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
10128 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
10129 unsigned int vector;
10130 unsigned long entry;
10131 gate_desc *desc;
10132 struct vcpu_vmx *vmx = to_vmx(vcpu);
10133#ifdef CONFIG_X86_64
10134 unsigned long tmp;
10135#endif
10136
10137 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10138 desc = (gate_desc *)vmx->host_idt_base + vector;
Thomas Gleixner64b163f2017-08-28 08:47:37 +020010139 entry = gate_offset(desc);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010140 asm volatile(
10141#ifdef CONFIG_X86_64
10142 "mov %%" _ASM_SP ", %[sp]\n\t"
10143 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
10144 "push $%c[ss]\n\t"
10145 "push %[sp]\n\t"
10146#endif
10147 "pushf\n\t"
Yang Zhanga547c6d2013-04-11 19:25:10 +080010148 __ASM_SIZE(push) " $%c[cs]\n\t"
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010149 CALL_NOSPEC
Yang Zhanga547c6d2013-04-11 19:25:10 +080010150 :
10151#ifdef CONFIG_X86_64
Chris J Arges3f62de52016-01-22 15:44:38 -060010152 [sp]"=&r"(tmp),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010153#endif
Josh Poimboeuff5caf622017-09-20 16:24:33 -050010154 ASM_CALL_CONSTRAINT
Yang Zhanga547c6d2013-04-11 19:25:10 +080010155 :
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010156 THUNK_TARGET(entry),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010157 [ss]"i"(__KERNEL_DS),
10158 [cs]"i"(__KERNEL_CS)
10159 );
Paolo Bonzinif2485b32016-06-15 15:23:11 +020010160 }
Yang Zhanga547c6d2013-04-11 19:25:10 +080010161}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050010162STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010163
Tom Lendackybc226f02018-05-10 22:06:39 +020010164static bool vmx_has_emulated_msr(int index)
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010165{
Tom Lendackybc226f02018-05-10 22:06:39 +020010166 switch (index) {
10167 case MSR_IA32_SMBASE:
10168 /*
10169 * We cannot do SMM unless we can run the guest in big
10170 * real mode.
10171 */
10172 return enable_unrestricted_guest || emulate_invalid_guest_state;
10173 case MSR_AMD64_VIRT_SPEC_CTRL:
10174 /* This is AMD only. */
10175 return false;
10176 default:
10177 return true;
10178 }
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010179}
10180
Liu, Jinsongda8999d2014-02-24 10:55:46 +000010181static bool vmx_mpx_supported(void)
10182{
10183 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
10184 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
10185}
10186
Wanpeng Li55412b22014-12-02 19:21:30 +080010187static bool vmx_xsaves_supported(void)
10188{
10189 return vmcs_config.cpu_based_2nd_exec_ctrl &
10190 SECONDARY_EXEC_XSAVES;
10191}
10192
Avi Kivity51aa01d2010-07-20 14:31:20 +030010193static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
10194{
Avi Kivityc5ca8e52011-03-07 17:37:37 +020010195 u32 exit_intr_info;
Avi Kivity51aa01d2010-07-20 14:31:20 +030010196 bool unblock_nmi;
10197 u8 vector;
10198 bool idtv_info_valid;
10199
10200 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Gleb Natapov20f65982009-05-11 13:35:55 +030010201
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010202 if (enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010203 if (vmx->loaded_vmcs->nmi_known_unmasked)
10204 return;
10205 /*
10206 * Can't use vmx->exit_intr_info since we're not sure what
10207 * the exit reason is.
10208 */
10209 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10210 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
10211 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10212 /*
10213 * SDM 3: 27.7.1.2 (September 2008)
10214 * Re-set bit "block by NMI" before VM entry if vmexit caused by
10215 * a guest IRET fault.
10216 * SDM 3: 23.2.2 (September 2008)
10217 * Bit 12 is undefined in any of the following cases:
10218 * If the VM exit sets the valid bit in the IDT-vectoring
10219 * information field.
10220 * If the VM exit is due to a double fault.
10221 */
10222 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
10223 vector != DF_VECTOR && !idtv_info_valid)
10224 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
10225 GUEST_INTR_STATE_NMI);
10226 else
10227 vmx->loaded_vmcs->nmi_known_unmasked =
10228 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
10229 & GUEST_INTR_STATE_NMI);
10230 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
10231 vmx->loaded_vmcs->vnmi_blocked_time +=
10232 ktime_to_ns(ktime_sub(ktime_get(),
10233 vmx->loaded_vmcs->entry_time));
Avi Kivity51aa01d2010-07-20 14:31:20 +030010234}
10235
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010236static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
Avi Kivity83422e12010-07-20 14:43:23 +030010237 u32 idt_vectoring_info,
10238 int instr_len_field,
10239 int error_code_field)
Avi Kivity51aa01d2010-07-20 14:31:20 +030010240{
Avi Kivity51aa01d2010-07-20 14:31:20 +030010241 u8 vector;
10242 int type;
10243 bool idtv_info_valid;
10244
10245 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Avi Kivity668f6122008-07-02 09:28:55 +030010246
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010247 vcpu->arch.nmi_injected = false;
10248 kvm_clear_exception_queue(vcpu);
10249 kvm_clear_interrupt_queue(vcpu);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010250
10251 if (!idtv_info_valid)
10252 return;
10253
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010254 kvm_make_request(KVM_REQ_EVENT, vcpu);
Avi Kivity3842d132010-07-27 12:30:24 +030010255
Avi Kivity668f6122008-07-02 09:28:55 +030010256 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
10257 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
Gleb Natapov37b96e92009-03-30 16:03:13 +030010258
Gleb Natapov64a7ec02009-03-30 16:03:29 +030010259 switch (type) {
Gleb Natapov37b96e92009-03-30 16:03:13 +030010260 case INTR_TYPE_NMI_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010261 vcpu->arch.nmi_injected = true;
Avi Kivity668f6122008-07-02 09:28:55 +030010262 /*
Gleb Natapov7b4a25c2009-03-30 16:03:08 +030010263 * SDM 3: 27.7.1.2 (September 2008)
Gleb Natapov37b96e92009-03-30 16:03:13 +030010264 * Clear bit "block by NMI" before VM entry if a NMI
10265 * delivery faulted.
Avi Kivity668f6122008-07-02 09:28:55 +030010266 */
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010267 vmx_set_nmi_mask(vcpu, false);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010268 break;
Gleb Natapov37b96e92009-03-30 16:03:13 +030010269 case INTR_TYPE_SOFT_EXCEPTION:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010270 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010271 /* fall through */
10272 case INTR_TYPE_HARD_EXCEPTION:
Avi Kivity35920a32008-07-03 14:50:12 +030010273 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
Avi Kivity83422e12010-07-20 14:43:23 +030010274 u32 err = vmcs_read32(error_code_field);
Gleb Natapov851eb6672013-09-25 12:51:34 +030010275 kvm_requeue_exception_e(vcpu, vector, err);
Avi Kivity35920a32008-07-03 14:50:12 +030010276 } else
Gleb Natapov851eb6672013-09-25 12:51:34 +030010277 kvm_requeue_exception(vcpu, vector);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010278 break;
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010279 case INTR_TYPE_SOFT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010280 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030010281 /* fall through */
Gleb Natapov37b96e92009-03-30 16:03:13 +030010282 case INTR_TYPE_EXT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010283 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
Gleb Natapov37b96e92009-03-30 16:03:13 +030010284 break;
10285 default:
10286 break;
Avi Kivityf7d92382008-07-03 16:14:28 +030010287 }
Avi Kivitycf393f72008-07-01 16:20:21 +030010288}
10289
Avi Kivity83422e12010-07-20 14:43:23 +030010290static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
10291{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010292 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
Avi Kivity83422e12010-07-20 14:43:23 +030010293 VM_EXIT_INSTRUCTION_LEN,
10294 IDT_VECTORING_ERROR_CODE);
10295}
10296
Avi Kivityb463a6f2010-07-20 15:06:17 +030010297static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
10298{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010299 __vmx_complete_interrupts(vcpu,
Avi Kivityb463a6f2010-07-20 15:06:17 +030010300 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
10301 VM_ENTRY_INSTRUCTION_LEN,
10302 VM_ENTRY_EXCEPTION_ERROR_CODE);
10303
10304 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
10305}
10306
Gleb Natapovd7cd9792011-10-05 14:01:23 +020010307static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
10308{
10309 int i, nr_msrs;
10310 struct perf_guest_switch_msr *msrs;
10311
10312 msrs = perf_guest_get_msrs(&nr_msrs);
10313
10314 if (!msrs)
10315 return;
10316
10317 for (i = 0; i < nr_msrs; i++)
10318 if (msrs[i].host == msrs[i].guest)
10319 clear_atomic_switch_msr(vmx, msrs[i].msr);
10320 else
10321 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
10322 msrs[i].host);
10323}
10324
Jiang Biao33365e72016-11-03 15:03:37 +080010325static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
Yunhong Jiang64672c92016-06-13 14:19:59 -070010326{
10327 struct vcpu_vmx *vmx = to_vmx(vcpu);
10328 u64 tscl;
10329 u32 delta_tsc;
10330
10331 if (vmx->hv_deadline_tsc == -1)
10332 return;
10333
10334 tscl = rdtsc();
10335 if (vmx->hv_deadline_tsc > tscl)
10336 /* sure to be 32 bit only because checked on set_hv_timer */
10337 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
10338 cpu_preemption_timer_multi);
10339 else
10340 delta_tsc = 0;
10341
10342 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
10343}
10344
Lai Jiangshana3b5ba42011-02-11 14:29:40 +080010345static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -080010346{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040010347 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010348 unsigned long cr3, cr4, evmcs_rsp;
Avi Kivity104f2262010-11-18 13:12:52 +020010349
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010350 /* Record the guest's net vcpu time for enforced NMI injections. */
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010351 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010352 vmx->loaded_vmcs->soft_vnmi_blocked))
10353 vmx->loaded_vmcs->entry_time = ktime_get();
10354
Avi Kivity104f2262010-11-18 13:12:52 +020010355 /* Don't enter VMX if guest state is invalid, let the exit handler
10356 start emulation until we arrive back to a valid state */
Gleb Natapov14168782013-01-21 15:36:49 +020010357 if (vmx->emulation_required)
Avi Kivity104f2262010-11-18 13:12:52 +020010358 return;
10359
Radim Krčmářa7653ec2014-08-21 18:08:07 +020010360 if (vmx->ple_window_dirty) {
10361 vmx->ple_window_dirty = false;
10362 vmcs_write32(PLE_WINDOW, vmx->ple_window);
10363 }
10364
Abel Gordon012f83c2013-04-18 14:39:25 +030010365 if (vmx->nested.sync_shadow_vmcs) {
10366 copy_vmcs12_to_shadow(vmx);
10367 vmx->nested.sync_shadow_vmcs = false;
10368 }
10369
Avi Kivity104f2262010-11-18 13:12:52 +020010370 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
10371 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
10372 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
10373 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
10374
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070010375 cr3 = __get_current_cr3_fast();
Sean Christophersond7ee0392018-07-23 12:32:47 -070010376 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070010377 vmcs_writel(HOST_CR3, cr3);
Sean Christophersond7ee0392018-07-23 12:32:47 -070010378 vmx->loaded_vmcs->host_state.cr3 = cr3;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070010379 }
10380
Andy Lutomirski1e02ce42014-10-24 15:58:08 -070010381 cr4 = cr4_read_shadow();
Sean Christophersond7ee0392018-07-23 12:32:47 -070010382 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
Andy Lutomirskid974baa2014-10-08 09:02:13 -070010383 vmcs_writel(HOST_CR4, cr4);
Sean Christophersond7ee0392018-07-23 12:32:47 -070010384 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -070010385 }
10386
Avi Kivity104f2262010-11-18 13:12:52 +020010387 /* When single-stepping over STI and MOV SS, we must clear the
10388 * corresponding interruptibility bits in the guest state. Otherwise
10389 * vmentry fails as it then expects bit 14 (BS) in pending debug
10390 * exceptions being set, but that's not correct for the guest debugging
10391 * case. */
10392 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10393 vmx_set_interrupt_shadow(vcpu, 0);
10394
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020010395 if (static_cpu_has(X86_FEATURE_PKU) &&
10396 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
10397 vcpu->arch.pkru != vmx->host_pkru)
10398 __write_pkru(vcpu->arch.pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080010399
Gleb Natapovd7cd9792011-10-05 14:01:23 +020010400 atomic_switch_perf_msrs(vmx);
10401
Yunhong Jiang64672c92016-06-13 14:19:59 -070010402 vmx_arm_hv_timer(vcpu);
10403
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010010404 /*
10405 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
10406 * it's non-zero. Since vmentry is serialising on affected CPUs, there
10407 * is no need to worry about the conditional branch over the wrmsr
10408 * being speculatively taken.
10409 */
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020010410 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010010411
Nadav Har'Eld462b812011-05-24 15:26:10 +030010412 vmx->__launched = vmx->loaded_vmcs->launched;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010413
10414 evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
10415 (unsigned long)&current_evmcs->host_rsp : 0;
10416
Avi Kivity104f2262010-11-18 13:12:52 +020010417 asm(
Avi Kivity6aa8b732006-12-10 02:21:36 -080010418 /* Store host registers */
Avi Kivityb188c81f2012-09-16 15:10:58 +030010419 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
10420 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
10421 "push %%" _ASM_CX " \n\t"
10422 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +030010423 "je 1f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030010424 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010425 /* Avoid VMWRITE when Enlightened VMCS is in use */
10426 "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
10427 "jz 2f \n\t"
10428 "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
10429 "jmp 1f \n\t"
10430 "2: \n\t"
Avi Kivity4ecac3f2008-05-13 13:23:38 +030010431 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +030010432 "1: \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +030010433 /* Reload cr2 if changed */
Avi Kivityb188c81f2012-09-16 15:10:58 +030010434 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
10435 "mov %%cr2, %%" _ASM_DX " \n\t"
10436 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010437 "je 3f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030010438 "mov %%" _ASM_AX", %%cr2 \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010439 "3: \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080010440 /* Check if vmlaunch of vmresume is needed */
Avi Kivitye08aa782007-11-15 18:06:18 +020010441 "cmpl $0, %c[launched](%0) \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080010442 /* Load guest registers. Don't clobber flags. */
Avi Kivityb188c81f2012-09-16 15:10:58 +030010443 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
10444 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
10445 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
10446 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
10447 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
10448 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080010449#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020010450 "mov %c[r8](%0), %%r8 \n\t"
10451 "mov %c[r9](%0), %%r9 \n\t"
10452 "mov %c[r10](%0), %%r10 \n\t"
10453 "mov %c[r11](%0), %%r11 \n\t"
10454 "mov %c[r12](%0), %%r12 \n\t"
10455 "mov %c[r13](%0), %%r13 \n\t"
10456 "mov %c[r14](%0), %%r14 \n\t"
10457 "mov %c[r15](%0), %%r15 \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080010458#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030010459 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
Avi Kivityc8019492008-07-14 14:44:59 +030010460
Avi Kivity6aa8b732006-12-10 02:21:36 -080010461 /* Enter guest mode */
Avi Kivity83287ea422012-09-16 15:10:57 +030010462 "jne 1f \n\t"
Avi Kivity4ecac3f2008-05-13 13:23:38 +030010463 __ex(ASM_VMX_VMLAUNCH) "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030010464 "jmp 2f \n\t"
10465 "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
10466 "2: "
Avi Kivity6aa8b732006-12-10 02:21:36 -080010467 /* Save guest registers, load host registers, keep flags */
Avi Kivityb188c81f2012-09-16 15:10:58 +030010468 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
Avi Kivity40712fa2011-01-06 18:09:12 +020010469 "pop %0 \n\t"
Jim Mattson0cb5b302018-01-03 14:31:38 -080010470 "setbe %c[fail](%0)\n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030010471 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
10472 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
10473 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
10474 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
10475 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
10476 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
10477 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080010478#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020010479 "mov %%r8, %c[r8](%0) \n\t"
10480 "mov %%r9, %c[r9](%0) \n\t"
10481 "mov %%r10, %c[r10](%0) \n\t"
10482 "mov %%r11, %c[r11](%0) \n\t"
10483 "mov %%r12, %c[r12](%0) \n\t"
10484 "mov %%r13, %c[r13](%0) \n\t"
10485 "mov %%r14, %c[r14](%0) \n\t"
10486 "mov %%r15, %c[r15](%0) \n\t"
Jim Mattson0cb5b302018-01-03 14:31:38 -080010487 "xor %%r8d, %%r8d \n\t"
10488 "xor %%r9d, %%r9d \n\t"
10489 "xor %%r10d, %%r10d \n\t"
10490 "xor %%r11d, %%r11d \n\t"
10491 "xor %%r12d, %%r12d \n\t"
10492 "xor %%r13d, %%r13d \n\t"
10493 "xor %%r14d, %%r14d \n\t"
10494 "xor %%r15d, %%r15d \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080010495#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030010496 "mov %%cr2, %%" _ASM_AX " \n\t"
10497 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
Avi Kivityc8019492008-07-14 14:44:59 +030010498
Jim Mattson0cb5b302018-01-03 14:31:38 -080010499 "xor %%eax, %%eax \n\t"
10500 "xor %%ebx, %%ebx \n\t"
10501 "xor %%esi, %%esi \n\t"
10502 "xor %%edi, %%edi \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030010503 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030010504 ".pushsection .rodata \n\t"
10505 ".global vmx_return \n\t"
10506 "vmx_return: " _ASM_PTR " 2b \n\t"
10507 ".popsection"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010508 : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
Nadav Har'Eld462b812011-05-24 15:26:10 +030010509 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
Avi Kivitye08aa782007-11-15 18:06:18 +020010510 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
Avi Kivity313dbd492008-07-17 18:04:30 +030010511 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
Zhang Xiantaoad312c72007-12-13 23:50:52 +080010512 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
10513 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
10514 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
10515 [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
10516 [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
10517 [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
10518 [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
Avi Kivity05b3e0c2006-12-13 00:33:45 -080010519#ifdef CONFIG_X86_64
Zhang Xiantaoad312c72007-12-13 23:50:52 +080010520 [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
10521 [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
10522 [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
10523 [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
10524 [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
10525 [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
10526 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
10527 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
Avi Kivity6aa8b732006-12-10 02:21:36 -080010528#endif
Avi Kivity40712fa2011-01-06 18:09:12 +020010529 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
10530 [wordsize]"i"(sizeof(ulong))
Laurent Vivierc2036302007-10-25 14:18:52 +020010531 : "cc", "memory"
10532#ifdef CONFIG_X86_64
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010533 , "rax", "rbx", "rdi"
Laurent Vivierc2036302007-10-25 14:18:52 +020010534 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
Avi Kivityb188c81f2012-09-16 15:10:58 +030010535#else
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010536 , "eax", "ebx", "edi"
Laurent Vivierc2036302007-10-25 14:18:52 +020010537#endif
10538 );
Avi Kivity6aa8b732006-12-10 02:21:36 -080010539
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010010540 /*
10541 * We do not use IBRS in the kernel. If this vCPU has used the
10542 * SPEC_CTRL MSR it may have left it on; save the value and
10543 * turn it off. This is much more efficient than blindly adding
10544 * it to the atomic save/restore list. Especially as the former
10545 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
10546 *
10547 * For non-nested case:
10548 * If the L01 MSR bitmap does not intercept the MSR, then we need to
10549 * save it.
10550 *
10551 * For nested case:
10552 * If the L02 MSR bitmap does not intercept the MSR, then we need to
10553 * save it.
10554 */
Paolo Bonzini946fbbc2018-02-22 16:43:18 +010010555 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
Paolo Bonziniecb586b2018-02-22 16:43:17 +010010556 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010010557
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020010558 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010010559
David Woodhouse117cc7a2018-01-12 11:11:27 +000010560 /* Eliminate branch target predictions from guest mode */
10561 vmexit_fill_RSB();
10562
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010563 /* All fields are clean at this point */
10564 if (static_branch_unlikely(&enable_evmcs))
10565 current_evmcs->hv_clean_fields |=
10566 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
10567
Gleb Natapov2a7921b2012-08-12 16:12:29 +030010568 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
Wanpeng Li74c55932017-11-29 01:31:20 -080010569 if (vmx->host_debugctlmsr)
10570 update_debugctlmsr(vmx->host_debugctlmsr);
Gleb Natapov2a7921b2012-08-12 16:12:29 +030010571
Avi Kivityaa67f602012-08-01 16:48:03 +030010572#ifndef CONFIG_X86_64
10573 /*
10574 * The sysexit path does not restore ds/es, so we must set them to
10575 * a reasonable value ourselves.
10576 *
Sean Christopherson6d6095b2018-07-23 12:32:44 -070010577 * We can't defer this to vmx_prepare_switch_to_host() since that
10578 * function may be executed in interrupt context, which saves and
10579 * restore segments around it, nullifying its effect.
Avi Kivityaa67f602012-08-01 16:48:03 +030010580 */
10581 loadsegment(ds, __USER_DS);
10582 loadsegment(es, __USER_DS);
10583#endif
10584
Avi Kivity6de4f3a2009-05-31 22:58:47 +030010585 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
Avi Kivity6de12732011-03-07 12:51:22 +020010586 | (1 << VCPU_EXREG_RFLAGS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020010587 | (1 << VCPU_EXREG_PDPTR)
Avi Kivity2fb92db2011-04-27 19:42:18 +030010588 | (1 << VCPU_EXREG_SEGMENTS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020010589 | (1 << VCPU_EXREG_CR3));
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030010590 vcpu->arch.regs_dirty = 0;
10591
Gleb Natapove0b890d2013-09-25 12:51:33 +030010592 /*
Xiao Guangrong1be0e612016-03-22 16:51:18 +080010593 * eager fpu is enabled if PKEY is supported and CR4 is switched
10594 * back on host, so it is safe to read guest PKRU from current
10595 * XSAVE.
10596 */
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020010597 if (static_cpu_has(X86_FEATURE_PKU) &&
10598 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
10599 vcpu->arch.pkru = __read_pkru();
10600 if (vcpu->arch.pkru != vmx->host_pkru)
Xiao Guangrong1be0e612016-03-22 16:51:18 +080010601 __write_pkru(vmx->host_pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080010602 }
10603
Gleb Natapove0b890d2013-09-25 12:51:33 +030010604 vmx->nested.nested_run_pending = 0;
Jim Mattsonb060ca32017-09-14 16:31:42 -070010605 vmx->idt_vectoring_info = 0;
10606
10607 vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
10608 if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
10609 return;
10610
10611 vmx->loaded_vmcs->launched = 1;
10612 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
Gleb Natapove0b890d2013-09-25 12:51:33 +030010613
Avi Kivity51aa01d2010-07-20 14:31:20 +030010614 vmx_complete_atomic_exit(vmx);
10615 vmx_recover_nmi_blocking(vmx);
Avi Kivitycf393f72008-07-01 16:20:21 +030010616 vmx_complete_interrupts(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010617}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050010618STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010619
Sean Christopherson434a1e92018-03-20 12:17:18 -070010620static struct kvm *vmx_vm_alloc(void)
10621{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070010622 struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
Sean Christopherson40bbb9d2018-03-20 12:17:20 -070010623 return &kvm_vmx->kvm;
Sean Christopherson434a1e92018-03-20 12:17:18 -070010624}
10625
10626static void vmx_vm_free(struct kvm *kvm)
10627{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070010628 vfree(to_kvm_vmx(kvm));
Sean Christopherson434a1e92018-03-20 12:17:18 -070010629}
10630
David Hildenbrand1279a6b12017-03-20 10:00:08 +010010631static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010632{
10633 struct vcpu_vmx *vmx = to_vmx(vcpu);
10634 int cpu;
10635
David Hildenbrand1279a6b12017-03-20 10:00:08 +010010636 if (vmx->loaded_vmcs == vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010637 return;
10638
10639 cpu = get_cpu();
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010640 vmx_vcpu_put(vcpu);
Sean Christophersonbd9966d2018-07-23 12:32:42 -070010641 vmx->loaded_vmcs = vmcs;
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010642 vmx_vcpu_load(vcpu, cpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010643 put_cpu();
10644}
10645
Jim Mattson2f1fe812016-07-08 15:36:06 -070010646/*
10647 * Ensure that the current vmcs of the logical processor is the
10648 * vmcs01 of the vcpu before calling free_nested().
10649 */
10650static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
10651{
10652 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070010653
Christoffer Dallec7660c2017-12-04 21:35:23 +010010654 vcpu_load(vcpu);
David Hildenbrand1279a6b12017-03-20 10:00:08 +010010655 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson2f1fe812016-07-08 15:36:06 -070010656 free_nested(vmx);
10657 vcpu_put(vcpu);
10658}
10659
Avi Kivity6aa8b732006-12-10 02:21:36 -080010660static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
10661{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010662 struct vcpu_vmx *vmx = to_vmx(vcpu);
10663
Kai Huang843e4332015-01-28 10:54:28 +080010664 if (enable_pml)
Kai Huanga3eaa862015-11-04 13:46:05 +080010665 vmx_destroy_pml_buffer(vmx);
Wanpeng Li991e7a02015-09-16 17:30:05 +080010666 free_vpid(vmx->vpid);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010667 leave_guest_mode(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070010668 vmx_free_vcpu_nested(vcpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020010669 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010670 kfree(vmx->guest_msrs);
10671 kvm_vcpu_uninit(vcpu);
Rusty Russella4770342007-08-01 14:46:11 +100010672 kmem_cache_free(kvm_vcpu_cache, vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010673}
10674
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010675static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
Avi Kivity6aa8b732006-12-10 02:21:36 -080010676{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010677 int err;
Rusty Russellc16f8622007-07-30 21:12:19 +100010678 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paolo Bonzini904e14f2018-01-16 16:51:18 +010010679 unsigned long *msr_bitmap;
Avi Kivity15ad7142007-07-11 18:17:21 +030010680 int cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -080010681
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040010682 if (!vmx)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010683 return ERR_PTR(-ENOMEM);
10684
Wanpeng Li991e7a02015-09-16 17:30:05 +080010685 vmx->vpid = allocate_vpid();
Sheng Yang2384d2b2008-01-17 15:14:33 +080010686
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010687 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
10688 if (err)
10689 goto free_vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080010690
Peter Feiner4e595162016-07-07 14:49:58 -070010691 err = -ENOMEM;
10692
10693 /*
10694 * If PML is turned on, failure on enabling PML just results in failure
10695 * of creating the vcpu, therefore we can simplify PML logic (by
10696 * avoiding dealing with cases, such as enabling PML partially on vcpus
10697 * for the guest, etc.
10698 */
10699 if (enable_pml) {
10700 vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
10701 if (!vmx->pml_pg)
10702 goto uninit_vcpu;
10703 }
10704
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040010705 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
Paolo Bonzini03916db2014-07-24 14:21:57 +020010706 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
10707 > PAGE_SIZE);
Nadav Amit0123be42014-07-24 15:06:56 +030010708
Peter Feiner4e595162016-07-07 14:49:58 -070010709 if (!vmx->guest_msrs)
10710 goto free_pml;
Ingo Molnar965b58a2007-01-05 16:36:23 -080010711
Paolo Bonzinif21f1652018-01-11 12:16:15 +010010712 err = alloc_loaded_vmcs(&vmx->vmcs01);
10713 if (err < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010714 goto free_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040010715
Paolo Bonzini904e14f2018-01-16 16:51:18 +010010716 msr_bitmap = vmx->vmcs01.msr_bitmap;
10717 vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
10718 vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
10719 vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
10720 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
10721 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
10722 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
10723 vmx->msr_bitmap_mode = 0;
10724
Paolo Bonzinif21f1652018-01-11 12:16:15 +010010725 vmx->loaded_vmcs = &vmx->vmcs01;
Avi Kivity15ad7142007-07-11 18:17:21 +030010726 cpu = get_cpu();
10727 vmx_vcpu_load(&vmx->vcpu, cpu);
Zachary Amsdene48672f2010-08-19 22:07:23 -100010728 vmx->vcpu.cpu = cpu;
David Hildenbrand12d79912017-08-24 20:51:26 +020010729 vmx_vcpu_setup(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010730 vmx_vcpu_put(&vmx->vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +030010731 put_cpu();
Paolo Bonzini35754c92015-07-29 12:05:37 +020010732 if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
Jan Kiszkabe6d05c2011-04-13 01:27:55 +020010733 err = alloc_apic_access_page(kvm);
10734 if (err)
Marcelo Tosatti5e4a0b32008-02-14 21:21:43 -020010735 goto free_vmcs;
Jan Kiszkaa63cb562013-04-08 11:07:46 +020010736 }
Ingo Molnar965b58a2007-01-05 16:36:23 -080010737
Sean Christophersone90008d2018-03-05 12:04:37 -080010738 if (enable_ept && !enable_unrestricted_guest) {
Tang Chenf51770e2014-09-16 18:41:59 +080010739 err = init_rmode_identity_map(kvm);
10740 if (err)
Gleb Natapov93ea5382011-02-21 12:07:59 +020010741 goto free_vmcs;
Sheng Yangb927a3c2009-07-21 10:42:48 +080010742 }
Sheng Yangb7ebfb02008-04-25 21:44:52 +080010743
Wanpeng Li5c614b32015-10-13 09:18:36 -070010744 if (nested) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010010745 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
10746 kvm_vcpu_apicv_active(&vmx->vcpu));
Wanpeng Li5c614b32015-10-13 09:18:36 -070010747 vmx->nested.vpid02 = allocate_vpid();
10748 }
Wincy Vanb9c237b2015-02-03 23:56:30 +080010749
Wincy Van705699a2015-02-03 23:58:17 +080010750 vmx->nested.posted_intr_nv = -1;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030010751 vmx->nested.current_vmptr = -1ull;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030010752
Haozhong Zhang37e4c992016-06-22 14:59:55 +080010753 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
10754
Paolo Bonzini31afb2e2017-06-06 12:57:06 +020010755 /*
10756 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
10757 * or POSTED_INTR_WAKEUP_VECTOR.
10758 */
10759 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
10760 vmx->pi_desc.sn = 1;
10761
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010762 return &vmx->vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080010763
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010764free_vmcs:
Wanpeng Li5c614b32015-10-13 09:18:36 -070010765 free_vpid(vmx->nested.vpid02);
Xiao Guangrong5f3fbc32012-05-14 14:58:58 +080010766 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010767free_msrs:
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010768 kfree(vmx->guest_msrs);
Peter Feiner4e595162016-07-07 14:49:58 -070010769free_pml:
10770 vmx_destroy_pml_buffer(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010771uninit_vcpu:
10772 kvm_vcpu_uninit(&vmx->vcpu);
10773free_vcpu:
Wanpeng Li991e7a02015-09-16 17:30:05 +080010774 free_vpid(vmx->vpid);
Rusty Russella4770342007-08-01 14:46:11 +100010775 kmem_cache_free(kvm_vcpu_cache, vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100010776 return ERR_PTR(err);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010777}
10778
Wanpeng Lib31c1142018-03-12 04:53:04 -070010779static int vmx_vm_init(struct kvm *kvm)
10780{
Tianyu Lan877ad952018-07-19 08:40:23 +000010781 spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
10782
Wanpeng Lib31c1142018-03-12 04:53:04 -070010783 if (!ple_gap)
10784 kvm->arch.pause_in_guest = true;
10785 return 0;
10786}
10787
Yang, Sheng002c7f72007-07-31 14:23:01 +030010788static void __init vmx_check_processor_compat(void *rtn)
10789{
10790 struct vmcs_config vmcs_conf;
10791
10792 *(int *)rtn = 0;
10793 if (setup_vmcs_config(&vmcs_conf) < 0)
10794 *(int *)rtn = -EIO;
Paolo Bonzini13893092018-02-26 13:40:09 +010010795 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
Yang, Sheng002c7f72007-07-31 14:23:01 +030010796 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
10797 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
10798 smp_processor_id());
10799 *(int *)rtn = -EIO;
10800 }
10801}
10802
Sheng Yang4b12f0d2009-04-27 20:35:42 +080010803static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
Sheng Yang64d4d522008-10-09 16:01:57 +080010804{
Xiao Guangrongb18d5432015-06-15 16:55:21 +080010805 u8 cache;
10806 u64 ipat = 0;
Sheng Yang4b12f0d2009-04-27 20:35:42 +080010807
Sheng Yang522c68c2009-04-27 20:35:43 +080010808 /* For VT-d and EPT combination
Paolo Bonzini606decd2015-10-01 13:12:47 +020010809 * 1. MMIO: always map as UC
Sheng Yang522c68c2009-04-27 20:35:43 +080010810 * 2. EPT with VT-d:
10811 * a. VT-d without snooping control feature: can't guarantee the
Paolo Bonzini606decd2015-10-01 13:12:47 +020010812 * result, try to trust guest.
Sheng Yang522c68c2009-04-27 20:35:43 +080010813 * b. VT-d with snooping control feature: snooping control feature of
10814 * VT-d engine can guarantee the cache correctness. Just set it
10815 * to WB to keep consistent with host. So the same as item 3.
Sheng Yanga19a6d12010-02-09 16:41:53 +080010816 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
Sheng Yang522c68c2009-04-27 20:35:43 +080010817 * consistent with host MTRR
10818 */
Paolo Bonzini606decd2015-10-01 13:12:47 +020010819 if (is_mmio) {
10820 cache = MTRR_TYPE_UNCACHABLE;
10821 goto exit;
10822 }
10823
10824 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
Xiao Guangrongb18d5432015-06-15 16:55:21 +080010825 ipat = VMX_EPT_IPAT_BIT;
10826 cache = MTRR_TYPE_WRBACK;
10827 goto exit;
10828 }
10829
10830 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
10831 ipat = VMX_EPT_IPAT_BIT;
Paolo Bonzini0da029e2015-07-23 08:24:42 +020010832 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
Xiao Guangrongfb2799502015-07-16 03:25:56 +080010833 cache = MTRR_TYPE_WRBACK;
10834 else
10835 cache = MTRR_TYPE_UNCACHABLE;
Xiao Guangrongb18d5432015-06-15 16:55:21 +080010836 goto exit;
10837 }
10838
Xiao Guangrongff536042015-06-15 16:55:22 +080010839 cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
Xiao Guangrongb18d5432015-06-15 16:55:21 +080010840
10841exit:
10842 return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
Sheng Yang64d4d522008-10-09 16:01:57 +080010843}
10844
Sheng Yang17cc3932010-01-05 19:02:27 +080010845static int vmx_get_lpage_level(void)
Joerg Roedel344f4142009-07-27 16:30:48 +020010846{
Sheng Yang878403b2010-01-05 19:02:29 +080010847 if (enable_ept && !cpu_has_vmx_ept_1g_page())
10848 return PT_DIRECTORY_LEVEL;
10849 else
10850 /* For shadow and EPT supported 1GB page */
10851 return PT_PDPE_LEVEL;
Joerg Roedel344f4142009-07-27 16:30:48 +020010852}
10853
Xiao Guangrongfeda8052015-09-09 14:05:55 +080010854static void vmcs_set_secondary_exec_control(u32 new_ctl)
10855{
10856 /*
10857 * These bits in the secondary execution controls field
10858 * are dynamic, the others are mostly based on the hypervisor
10859 * architecture and the guest's CPUID. Do not touch the
10860 * dynamic bits.
10861 */
10862 u32 mask =
10863 SECONDARY_EXEC_SHADOW_VMCS |
10864 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini0367f202016-07-12 10:44:55 +020010865 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
10866 SECONDARY_EXEC_DESC;
Xiao Guangrongfeda8052015-09-09 14:05:55 +080010867
10868 u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
10869
10870 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
10871 (new_ctl & ~mask) | (cur_ctl & mask));
10872}
10873
David Matlack8322ebb2016-11-29 18:14:09 -080010874/*
10875 * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
10876 * (indicating "allowed-1") if they are supported in the guest's CPUID.
10877 */
10878static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
10879{
10880 struct vcpu_vmx *vmx = to_vmx(vcpu);
10881 struct kvm_cpuid_entry2 *entry;
10882
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010010883 vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
10884 vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
David Matlack8322ebb2016-11-29 18:14:09 -080010885
10886#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
10887 if (entry && (entry->_reg & (_cpuid_mask))) \
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010010888 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
David Matlack8322ebb2016-11-29 18:14:09 -080010889} while (0)
10890
10891 entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
10892 cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
10893 cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
10894 cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
10895 cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
10896 cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
10897 cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
10898 cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
10899 cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
10900 cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
10901 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
10902 cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
10903 cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
10904 cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
10905 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
10906
10907 entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
10908 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
10909 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
10910 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
10911 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
Paolo Bonzinic4ad77e2017-11-13 14:23:59 +010010912 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
David Matlack8322ebb2016-11-29 18:14:09 -080010913
10914#undef cr4_fixed1_update
10915}
10916
Sheng Yang0e851882009-12-18 16:48:46 +080010917static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
10918{
Sheng Yang4e47c7a2009-12-18 16:48:47 +080010919 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080010920
Paolo Bonzini80154d72017-08-24 13:55:35 +020010921 if (cpu_has_secondary_exec_ctrls()) {
10922 vmx_compute_secondary_exec_control(vmx);
10923 vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080010924 }
Mao, Junjiead756a12012-07-02 01:18:48 +000010925
Haozhong Zhang37e4c992016-06-22 14:59:55 +080010926 if (nested_vmx_allowed(vcpu))
10927 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
10928 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
10929 else
10930 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
10931 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
David Matlack8322ebb2016-11-29 18:14:09 -080010932
10933 if (nested_vmx_allowed(vcpu))
10934 nested_vmx_cr_fixed1_bits_update(vcpu);
Sheng Yang0e851882009-12-18 16:48:46 +080010935}
10936
Joerg Roedeld4330ef2010-04-22 12:33:11 +020010937static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
10938{
Nadav Har'El7b8050f2011-05-25 23:16:10 +030010939 if (func == 1 && nested)
10940 entry->ecx |= bit(X86_FEATURE_VMX);
Joerg Roedeld4330ef2010-04-22 12:33:11 +020010941}
10942
Yang Zhang25d92082013-08-06 12:00:32 +030010943static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
10944 struct x86_exception *fault)
10945{
Jan Kiszka533558b2014-01-04 18:47:20 +010010946 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Bandan Dasc5f983f2017-05-05 15:25:14 -040010947 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka533558b2014-01-04 18:47:20 +010010948 u32 exit_reason;
Bandan Dasc5f983f2017-05-05 15:25:14 -040010949 unsigned long exit_qualification = vcpu->arch.exit_qualification;
Yang Zhang25d92082013-08-06 12:00:32 +030010950
Bandan Dasc5f983f2017-05-05 15:25:14 -040010951 if (vmx->nested.pml_full) {
10952 exit_reason = EXIT_REASON_PML_FULL;
10953 vmx->nested.pml_full = false;
10954 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
10955 } else if (fault->error_code & PFERR_RSVD_MASK)
Jan Kiszka533558b2014-01-04 18:47:20 +010010956 exit_reason = EXIT_REASON_EPT_MISCONFIG;
Yang Zhang25d92082013-08-06 12:00:32 +030010957 else
Jan Kiszka533558b2014-01-04 18:47:20 +010010958 exit_reason = EXIT_REASON_EPT_VIOLATION;
Bandan Dasc5f983f2017-05-05 15:25:14 -040010959
10960 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
Yang Zhang25d92082013-08-06 12:00:32 +030010961 vmcs12->guest_physical_address = fault->address;
10962}
10963
Peter Feiner995f00a2017-06-30 17:26:32 -070010964static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
10965{
David Hildenbrandbb97a012017-08-10 23:15:28 +020010966 return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
Peter Feiner995f00a2017-06-30 17:26:32 -070010967}
10968
Nadav Har'El155a97a2013-08-05 11:07:16 +030010969/* Callbacks for nested_ept_init_mmu_context: */
10970
10971static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
10972{
10973 /* return the page table to be shadowed - in our case, EPT12 */
10974 return get_vmcs12(vcpu)->ept_pointer;
10975}
10976
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020010977static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
Nadav Har'El155a97a2013-08-05 11:07:16 +030010978{
Paolo Bonziniad896af2013-10-02 16:56:14 +020010979 WARN_ON(mmu_is_nested(vcpu));
David Hildenbranda057e0e2017-08-10 23:36:54 +020010980 if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020010981 return 1;
10982
Paolo Bonziniad896af2013-10-02 16:56:14 +020010983 kvm_init_shadow_ept_mmu(vcpu,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010010984 to_vmx(vcpu)->nested.msrs.ept_caps &
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020010985 VMX_EPT_EXECUTE_ONLY_BIT,
Junaid Shahid50c28f22018-06-27 14:59:11 -070010986 nested_ept_ad_enabled(vcpu),
10987 nested_ept_get_cr3(vcpu));
Nadav Har'El155a97a2013-08-05 11:07:16 +030010988 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
10989 vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
10990 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
10991
10992 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020010993 return 0;
Nadav Har'El155a97a2013-08-05 11:07:16 +030010994}
10995
10996static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
10997{
10998 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
10999}
11000
Eugene Korenevsky19d5f102014-12-16 22:35:53 +030011001static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
11002 u16 error_code)
11003{
11004 bool inequality, bit;
11005
11006 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
11007 inequality =
11008 (error_code & vmcs12->page_fault_error_code_mask) !=
11009 vmcs12->page_fault_error_code_match;
11010 return inequality ^ bit;
11011}
11012
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011013static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
11014 struct x86_exception *fault)
11015{
11016 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11017
11018 WARN_ON(!is_guest_mode(vcpu));
11019
Wanpeng Li305d0ab2017-09-28 18:16:44 -070011020 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
11021 !to_vmx(vcpu)->nested.nested_run_pending) {
Paolo Bonzinib96fb432017-07-27 12:29:32 +020011022 vmcs12->vm_exit_intr_error_code = fault->error_code;
11023 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11024 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
11025 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
11026 fault->address);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011027 } else {
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011028 kvm_inject_page_fault(vcpu, fault);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011029 }
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011030}
11031
Paolo Bonzinic9923842017-12-13 14:16:30 +010011032static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
11033 struct vmcs12 *vmcs12);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011034
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011035static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011036{
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011037 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011038 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011039 struct page *page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011040 u64 hpa;
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011041
11042 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011043 /*
11044 * Translate L1 physical address to host physical
11045 * address for vmcs02. Keep the page pinned, so this
11046 * physical address remains valid. We keep a reference
11047 * to it so we can release it later.
11048 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011049 if (vmx->nested.apic_access_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011050 kvm_release_page_dirty(vmx->nested.apic_access_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011051 vmx->nested.apic_access_page = NULL;
11052 }
11053 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011054 /*
11055 * If translation failed, no matter: This feature asks
11056 * to exit when accessing the given address, and if it
11057 * can never be accessed, this feature won't do
11058 * anything anyway.
11059 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011060 if (!is_error_page(page)) {
11061 vmx->nested.apic_access_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011062 hpa = page_to_phys(vmx->nested.apic_access_page);
11063 vmcs_write64(APIC_ACCESS_ADDR, hpa);
11064 } else {
11065 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
11066 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
11067 }
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011068 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011069
11070 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011071 if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011072 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011073 vmx->nested.virtual_apic_page = NULL;
11074 }
11075 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011076
11077 /*
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011078 * If translation failed, VM entry will fail because
11079 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
11080 * Failing the vm entry is _not_ what the processor
11081 * does but it's basically the only possibility we
11082 * have. We could still enter the guest if CR8 load
11083 * exits are enabled, CR8 store exits are enabled, and
11084 * virtualize APIC access is disabled; in this case
11085 * the processor would never use the TPR shadow and we
11086 * could simply clear the bit from the execution
11087 * control. But such a configuration is useless, so
11088 * let's keep the code simple.
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011089 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011090 if (!is_error_page(page)) {
11091 vmx->nested.virtual_apic_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011092 hpa = page_to_phys(vmx->nested.virtual_apic_page);
11093 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
11094 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011095 }
11096
Wincy Van705699a2015-02-03 23:58:17 +080011097 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080011098 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
11099 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011100 kvm_release_page_dirty(vmx->nested.pi_desc_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011101 vmx->nested.pi_desc_page = NULL;
Wincy Van705699a2015-02-03 23:58:17 +080011102 }
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011103 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
11104 if (is_error_page(page))
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011105 return;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011106 vmx->nested.pi_desc_page = page;
11107 vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080011108 vmx->nested.pi_desc =
11109 (struct pi_desc *)((void *)vmx->nested.pi_desc +
11110 (unsigned long)(vmcs12->posted_intr_desc_addr &
11111 (PAGE_SIZE - 1)));
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011112 vmcs_write64(POSTED_INTR_DESC_ADDR,
11113 page_to_phys(vmx->nested.pi_desc_page) +
11114 (unsigned long)(vmcs12->posted_intr_desc_addr &
11115 (PAGE_SIZE - 1)));
Wincy Van705699a2015-02-03 23:58:17 +080011116 }
Linus Torvaldsd4667ca2018-02-14 17:02:15 -080011117 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
KarimAllah Ahmed3712caeb2018-02-10 23:39:26 +000011118 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
11119 CPU_BASED_USE_MSR_BITMAPS);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011120 else
11121 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
11122 CPU_BASED_USE_MSR_BITMAPS);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011123}
11124
Jan Kiszkaf41245002014-03-07 20:03:13 +010011125static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
11126{
11127 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
11128 struct vcpu_vmx *vmx = to_vmx(vcpu);
11129
11130 if (vcpu->arch.virtual_tsc_khz == 0)
11131 return;
11132
11133 /* Make sure short timeouts reliably trigger an immediate vmexit.
11134 * hrtimer_start does not guarantee this. */
11135 if (preemption_timeout <= 1) {
11136 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
11137 return;
11138 }
11139
11140 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
11141 preemption_timeout *= 1000000;
11142 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
11143 hrtimer_start(&vmx->nested.preemption_timer,
11144 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
11145}
11146
Jim Mattson56a20512017-07-06 16:33:06 -070011147static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
11148 struct vmcs12 *vmcs12)
11149{
11150 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
11151 return 0;
11152
11153 if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
11154 !page_address_valid(vcpu, vmcs12->io_bitmap_b))
11155 return -EINVAL;
11156
11157 return 0;
11158}
11159
Wincy Van3af18d92015-02-03 23:49:31 +080011160static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
11161 struct vmcs12 *vmcs12)
11162{
Wincy Van3af18d92015-02-03 23:49:31 +080011163 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
11164 return 0;
11165
Jim Mattson5fa99cb2017-07-06 16:33:07 -070011166 if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
Wincy Van3af18d92015-02-03 23:49:31 +080011167 return -EINVAL;
11168
11169 return 0;
11170}
11171
Jim Mattson712b12d2017-08-24 13:24:47 -070011172static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
11173 struct vmcs12 *vmcs12)
11174{
11175 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
11176 return 0;
11177
11178 if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
11179 return -EINVAL;
11180
11181 return 0;
11182}
11183
Wincy Van3af18d92015-02-03 23:49:31 +080011184/*
11185 * Merge L0's and L1's MSR bitmap, return false to indicate that
11186 * we do not use the hardware.
11187 */
Paolo Bonzinic9923842017-12-13 14:16:30 +010011188static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
11189 struct vmcs12 *vmcs12)
Wincy Van3af18d92015-02-03 23:49:31 +080011190{
Wincy Van82f0dd42015-02-03 23:57:18 +080011191 int msr;
Wincy Vanf2b93282015-02-03 23:56:03 +080011192 struct page *page;
Radim Krčmářd048c092016-08-08 20:16:22 +020011193 unsigned long *msr_bitmap_l1;
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011194 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
Ashok Raj15d45072018-02-01 22:59:43 +010011195 /*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011196 * pred_cmd & spec_ctrl are trying to verify two things:
Ashok Raj15d45072018-02-01 22:59:43 +010011197 *
11198 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
11199 * ensures that we do not accidentally generate an L02 MSR bitmap
11200 * from the L12 MSR bitmap that is too permissive.
11201 * 2. That L1 or L2s have actually used the MSR. This avoids
11202 * unnecessarily merging of the bitmap if the MSR is unused. This
11203 * works properly because we only update the L01 MSR bitmap lazily.
11204 * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
11205 * updated to reflect this when L1 (or its L2s) actually write to
11206 * the MSR.
11207 */
KarimAllah Ahmed206587a2018-02-10 23:39:25 +000011208 bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
11209 bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
Wincy Vanf2b93282015-02-03 23:56:03 +080011210
Paolo Bonzinic9923842017-12-13 14:16:30 +010011211 /* Nothing to do if the MSR bitmap is not in use. */
11212 if (!cpu_has_vmx_msr_bitmap() ||
11213 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
11214 return false;
11215
Ashok Raj15d45072018-02-01 22:59:43 +010011216 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011217 !pred_cmd && !spec_ctrl)
Wincy Vanf2b93282015-02-03 23:56:03 +080011218 return false;
11219
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011220 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
11221 if (is_error_page(page))
Wincy Vanf2b93282015-02-03 23:56:03 +080011222 return false;
Paolo Bonzinic9923842017-12-13 14:16:30 +010011223
Radim Krčmářd048c092016-08-08 20:16:22 +020011224 msr_bitmap_l1 = (unsigned long *)kmap(page);
Paolo Bonzinic9923842017-12-13 14:16:30 +010011225 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
11226 /*
11227 * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
11228 * just lets the processor take the value from the virtual-APIC page;
11229 * take those 256 bits directly from the L1 bitmap.
11230 */
11231 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
11232 unsigned word = msr / BITS_PER_LONG;
11233 msr_bitmap_l0[word] = msr_bitmap_l1[word];
11234 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
Wincy Van608406e2015-02-03 23:57:51 +080011235 }
Paolo Bonzinic9923842017-12-13 14:16:30 +010011236 } else {
11237 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
11238 unsigned word = msr / BITS_PER_LONG;
11239 msr_bitmap_l0[word] = ~0;
11240 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
11241 }
11242 }
11243
11244 nested_vmx_disable_intercept_for_msr(
11245 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011246 X2APIC_MSR(APIC_TASKPRI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010011247 MSR_TYPE_W);
11248
11249 if (nested_cpu_has_vid(vmcs12)) {
11250 nested_vmx_disable_intercept_for_msr(
11251 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011252 X2APIC_MSR(APIC_EOI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010011253 MSR_TYPE_W);
11254 nested_vmx_disable_intercept_for_msr(
11255 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010011256 X2APIC_MSR(APIC_SELF_IPI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010011257 MSR_TYPE_W);
Wincy Van82f0dd42015-02-03 23:57:18 +080011258 }
Ashok Raj15d45072018-02-01 22:59:43 +010011259
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011260 if (spec_ctrl)
11261 nested_vmx_disable_intercept_for_msr(
11262 msr_bitmap_l1, msr_bitmap_l0,
11263 MSR_IA32_SPEC_CTRL,
11264 MSR_TYPE_R | MSR_TYPE_W);
11265
Ashok Raj15d45072018-02-01 22:59:43 +010011266 if (pred_cmd)
11267 nested_vmx_disable_intercept_for_msr(
11268 msr_bitmap_l1, msr_bitmap_l0,
11269 MSR_IA32_PRED_CMD,
11270 MSR_TYPE_W);
11271
Wincy Vanf2b93282015-02-03 23:56:03 +080011272 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011273 kvm_release_page_clean(page);
Wincy Vanf2b93282015-02-03 23:56:03 +080011274
11275 return true;
11276}
11277
Liran Alon61ada742018-06-23 02:35:08 +030011278static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
11279 struct vmcs12 *vmcs12)
11280{
11281 struct vmcs12 *shadow;
11282 struct page *page;
11283
11284 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
11285 vmcs12->vmcs_link_pointer == -1ull)
11286 return;
11287
11288 shadow = get_shadow_vmcs12(vcpu);
11289 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
11290
11291 memcpy(shadow, kmap(page), VMCS12_SIZE);
11292
11293 kunmap(page);
11294 kvm_release_page_clean(page);
11295}
11296
11297static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
11298 struct vmcs12 *vmcs12)
11299{
11300 struct vcpu_vmx *vmx = to_vmx(vcpu);
11301
11302 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
11303 vmcs12->vmcs_link_pointer == -1ull)
11304 return;
11305
11306 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
11307 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
11308}
11309
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040011310static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
11311 struct vmcs12 *vmcs12)
11312{
11313 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
11314 !page_address_valid(vcpu, vmcs12->apic_access_addr))
11315 return -EINVAL;
11316 else
11317 return 0;
11318}
11319
Wincy Vanf2b93282015-02-03 23:56:03 +080011320static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
11321 struct vmcs12 *vmcs12)
11322{
Wincy Van82f0dd42015-02-03 23:57:18 +080011323 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
Wincy Van608406e2015-02-03 23:57:51 +080011324 !nested_cpu_has_apic_reg_virt(vmcs12) &&
Wincy Van705699a2015-02-03 23:58:17 +080011325 !nested_cpu_has_vid(vmcs12) &&
11326 !nested_cpu_has_posted_intr(vmcs12))
Wincy Vanf2b93282015-02-03 23:56:03 +080011327 return 0;
11328
11329 /*
11330 * If virtualize x2apic mode is enabled,
11331 * virtualize apic access must be disabled.
11332 */
Wincy Van82f0dd42015-02-03 23:57:18 +080011333 if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
11334 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Wincy Vanf2b93282015-02-03 23:56:03 +080011335 return -EINVAL;
11336
Wincy Van608406e2015-02-03 23:57:51 +080011337 /*
11338 * If virtual interrupt delivery is enabled,
11339 * we must exit on external interrupts.
11340 */
11341 if (nested_cpu_has_vid(vmcs12) &&
11342 !nested_exit_on_intr(vcpu))
11343 return -EINVAL;
11344
Wincy Van705699a2015-02-03 23:58:17 +080011345 /*
11346 * bits 15:8 should be zero in posted_intr_nv,
11347 * the descriptor address has been already checked
11348 * in nested_get_vmcs12_pages.
11349 */
11350 if (nested_cpu_has_posted_intr(vmcs12) &&
11351 (!nested_cpu_has_vid(vmcs12) ||
11352 !nested_exit_intr_ack_set(vcpu) ||
11353 vmcs12->posted_intr_nv & 0xff00))
11354 return -EINVAL;
11355
Wincy Vanf2b93282015-02-03 23:56:03 +080011356 /* tpr shadow is needed by all apicv features. */
11357 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
11358 return -EINVAL;
11359
11360 return 0;
Wincy Van3af18d92015-02-03 23:49:31 +080011361}
11362
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011363static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
11364 unsigned long count_field,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011365 unsigned long addr_field)
Wincy Vanff651cb2014-12-11 08:52:58 +030011366{
Liran Alone2536742018-06-23 02:35:02 +030011367 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011368 int maxphyaddr;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011369 u64 count, addr;
11370
Liran Alone2536742018-06-23 02:35:02 +030011371 if (vmcs12_read_any(vmcs12, count_field, &count) ||
11372 vmcs12_read_any(vmcs12, addr_field, &addr)) {
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011373 WARN_ON(1);
11374 return -EINVAL;
11375 }
11376 if (count == 0)
11377 return 0;
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011378 maxphyaddr = cpuid_maxphyaddr(vcpu);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011379 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
11380 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011381 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011382 "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
11383 addr_field, maxphyaddr, count, addr);
11384 return -EINVAL;
11385 }
11386 return 0;
11387}
11388
11389static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
11390 struct vmcs12 *vmcs12)
11391{
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011392 if (vmcs12->vm_exit_msr_load_count == 0 &&
11393 vmcs12->vm_exit_msr_store_count == 0 &&
11394 vmcs12->vm_entry_msr_load_count == 0)
11395 return 0; /* Fast path */
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011396 if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011397 VM_EXIT_MSR_LOAD_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011398 nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011399 VM_EXIT_MSR_STORE_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011400 nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030011401 VM_ENTRY_MSR_LOAD_ADDR))
Wincy Vanff651cb2014-12-11 08:52:58 +030011402 return -EINVAL;
11403 return 0;
11404}
11405
Bandan Dasc5f983f2017-05-05 15:25:14 -040011406static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
11407 struct vmcs12 *vmcs12)
11408{
11409 u64 address = vmcs12->pml_address;
11410 int maxphyaddr = cpuid_maxphyaddr(vcpu);
11411
11412 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
11413 if (!nested_cpu_has_ept(vmcs12) ||
11414 !IS_ALIGNED(address, 4096) ||
11415 address >> maxphyaddr)
11416 return -EINVAL;
11417 }
11418
11419 return 0;
11420}
11421
Liran Alona8a7c022018-06-23 02:35:06 +030011422static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
11423 struct vmcs12 *vmcs12)
11424{
11425 if (!nested_cpu_has_shadow_vmcs(vmcs12))
11426 return 0;
11427
11428 if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
11429 !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
11430 return -EINVAL;
11431
11432 return 0;
11433}
11434
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011435static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
11436 struct vmx_msr_entry *e)
11437{
11438 /* x2APIC MSR accesses are not allowed */
Jan Kiszka8a9781f2015-05-04 08:32:32 +020011439 if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011440 return -EINVAL;
11441 if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
11442 e->index == MSR_IA32_UCODE_REV)
11443 return -EINVAL;
11444 if (e->reserved != 0)
11445 return -EINVAL;
11446 return 0;
11447}
11448
11449static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
11450 struct vmx_msr_entry *e)
Wincy Vanff651cb2014-12-11 08:52:58 +030011451{
11452 if (e->index == MSR_FS_BASE ||
11453 e->index == MSR_GS_BASE ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011454 e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
11455 nested_vmx_msr_check_common(vcpu, e))
11456 return -EINVAL;
11457 return 0;
11458}
11459
11460static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
11461 struct vmx_msr_entry *e)
11462{
11463 if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
11464 nested_vmx_msr_check_common(vcpu, e))
Wincy Vanff651cb2014-12-11 08:52:58 +030011465 return -EINVAL;
11466 return 0;
11467}
11468
11469/*
11470 * Load guest's/host's msr at nested entry/exit.
11471 * return 0 for success, entry index for failure.
11472 */
11473static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
11474{
11475 u32 i;
11476 struct vmx_msr_entry e;
11477 struct msr_data msr;
11478
11479 msr.host_initiated = false;
11480 for (i = 0; i < count; i++) {
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020011481 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
11482 &e, sizeof(e))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011483 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011484 "%s cannot read MSR entry (%u, 0x%08llx)\n",
11485 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030011486 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011487 }
11488 if (nested_vmx_load_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011489 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011490 "%s check failed (%u, 0x%x, 0x%x)\n",
11491 __func__, i, e.index, e.reserved);
11492 goto fail;
11493 }
Wincy Vanff651cb2014-12-11 08:52:58 +030011494 msr.index = e.index;
11495 msr.data = e.value;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011496 if (kvm_set_msr(vcpu, &msr)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011497 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011498 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
11499 __func__, i, e.index, e.value);
Wincy Vanff651cb2014-12-11 08:52:58 +030011500 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011501 }
Wincy Vanff651cb2014-12-11 08:52:58 +030011502 }
11503 return 0;
11504fail:
11505 return i + 1;
11506}
11507
11508static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
11509{
11510 u32 i;
11511 struct vmx_msr_entry e;
11512
11513 for (i = 0; i < count; i++) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +020011514 struct msr_data msr_info;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020011515 if (kvm_vcpu_read_guest(vcpu,
11516 gpa + i * sizeof(e),
11517 &e, 2 * sizeof(u32))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011518 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011519 "%s cannot read MSR entry (%u, 0x%08llx)\n",
11520 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030011521 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011522 }
11523 if (nested_vmx_store_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011524 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011525 "%s check failed (%u, 0x%x, 0x%x)\n",
11526 __func__, i, e.index, e.reserved);
Wincy Vanff651cb2014-12-11 08:52:58 +030011527 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011528 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +020011529 msr_info.host_initiated = false;
11530 msr_info.index = e.index;
11531 if (kvm_get_msr(vcpu, &msr_info)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011532 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011533 "%s cannot read MSR (%u, 0x%x)\n",
11534 __func__, i, e.index);
11535 return -EINVAL;
11536 }
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020011537 if (kvm_vcpu_write_guest(vcpu,
11538 gpa + i * sizeof(e) +
11539 offsetof(struct vmx_msr_entry, value),
11540 &msr_info.data, sizeof(msr_info.data))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020011541 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011542 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Paolo Bonzini609e36d2015-04-08 15:30:38 +020011543 __func__, i, e.index, msr_info.data);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030011544 return -EINVAL;
11545 }
Wincy Vanff651cb2014-12-11 08:52:58 +030011546 }
11547 return 0;
11548}
11549
Ladi Prosek1dc35da2016-11-30 16:03:11 +010011550static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
11551{
11552 unsigned long invalid_mask;
11553
11554 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
11555 return (val & invalid_mask) == 0;
11556}
11557
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011558/*
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011559 * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
11560 * emulating VM entry into a guest with EPT enabled.
11561 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
11562 * is assigned to entry_failure_code on failure.
11563 */
11564static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
Jim Mattsonca0bde22016-11-30 12:03:46 -080011565 u32 *entry_failure_code)
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011566{
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011567 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
Ladi Prosek1dc35da2016-11-30 16:03:11 +010011568 if (!nested_cr3_valid(vcpu, cr3)) {
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011569 *entry_failure_code = ENTRY_FAIL_DEFAULT;
11570 return 1;
11571 }
11572
11573 /*
11574 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
11575 * must not be dereferenced.
11576 */
11577 if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
11578 !nested_ept) {
11579 if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
11580 *entry_failure_code = ENTRY_FAIL_PDPTE;
11581 return 1;
11582 }
11583 }
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011584 }
11585
Junaid Shahid50c28f22018-06-27 14:59:11 -070011586 if (!nested_ept)
Junaid Shahidade61e22018-06-27 14:59:15 -070011587 kvm_mmu_new_cr3(vcpu, cr3, false);
Junaid Shahid50c28f22018-06-27 14:59:11 -070011588
11589 vcpu->arch.cr3 = cr3;
11590 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
11591
11592 kvm_init_mmu(vcpu, false);
11593
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011594 return 0;
11595}
11596
Jim Mattson6514dc32018-04-26 16:09:12 -070011597static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
Paolo Bonzini74a497f2017-12-20 13:55:39 +010011598{
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010011599 struct vcpu_vmx *vmx = to_vmx(vcpu);
11600
11601 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
11602 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
11603 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
11604 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
11605 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
11606 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
11607 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
11608 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
11609 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
11610 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
11611 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
11612 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
11613 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
11614 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
11615 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
11616 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
11617 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
11618 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
11619 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
11620 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
11621 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
11622 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
11623 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
11624 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
11625 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
11626 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
11627 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
11628 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
11629 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
11630 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
11631 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010011632
11633 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
11634 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
11635 vmcs12->guest_pending_dbg_exceptions);
11636 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
11637 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
11638
11639 if (nested_cpu_has_xsaves(vmcs12))
11640 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
11641 vmcs_write64(VMCS_LINK_POINTER, -1ull);
11642
11643 if (cpu_has_vmx_posted_intr())
11644 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
11645
11646 /*
11647 * Whether page-faults are trapped is determined by a combination of
11648 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
11649 * If enable_ept, L0 doesn't care about page faults and we should
11650 * set all of these to L1's desires. However, if !enable_ept, L0 does
11651 * care about (at least some) page faults, and because it is not easy
11652 * (if at all possible?) to merge L0 and L1's desires, we simply ask
11653 * to exit on each and every L2 page fault. This is done by setting
11654 * MASK=MATCH=0 and (see below) EB.PF=1.
11655 * Note that below we don't need special code to set EB.PF beyond the
11656 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
11657 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
11658 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
11659 */
11660 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
11661 enable_ept ? vmcs12->page_fault_error_code_mask : 0);
11662 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
11663 enable_ept ? vmcs12->page_fault_error_code_match : 0);
11664
11665 /* All VMFUNCs are currently emulated through L0 vmexits. */
11666 if (cpu_has_vmx_vmfunc())
11667 vmcs_write64(VM_FUNCTION_CONTROL, 0);
11668
11669 if (cpu_has_vmx_apicv()) {
11670 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
11671 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
11672 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
11673 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
11674 }
11675
11676 /*
11677 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
11678 * Some constant fields are set here by vmx_set_constant_host_state().
11679 * Other fields are different per CPU, and will be set later when
Sean Christopherson6d6095b2018-07-23 12:32:44 -070011680 * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
11681 * is called.
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010011682 */
11683 vmx_set_constant_host_state(vmx);
11684
11685 /*
11686 * Set the MSR load/store lists to match L0's settings.
11687 */
11688 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
11689 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
11690 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
11691 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
11692 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
11693
11694 set_cr4_guest_host_mask(vmx);
11695
11696 if (vmx_mpx_supported())
11697 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
11698
11699 if (enable_vpid) {
11700 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
11701 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
11702 else
11703 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
11704 }
11705
11706 /*
11707 * L1 may access the L2's PDPTR, so save them to construct vmcs12
11708 */
11709 if (enable_ept) {
11710 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
11711 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
11712 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
11713 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
11714 }
Radim Krčmář80132f42018-02-02 18:26:58 +010011715
11716 if (cpu_has_vmx_msr_bitmap())
11717 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
Paolo Bonzini74a497f2017-12-20 13:55:39 +010011718}
11719
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011720/*
11721 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
11722 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
Tiejun Chenb4619662014-09-22 10:31:38 +080011723 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011724 * guest in a way that will both be appropriate to L1's requests, and our
11725 * needs. In addition to modifying the active vmcs (which is vmcs02), this
11726 * function also has additional necessary side-effects, like setting various
11727 * vcpu->arch fields.
Ladi Prosekee146c12016-11-30 16:03:09 +010011728 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
11729 * is assigned to entry_failure_code on failure.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011730 */
Ladi Prosekee146c12016-11-30 16:03:09 +010011731static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Jim Mattson6514dc32018-04-26 16:09:12 -070011732 u32 *entry_failure_code)
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011733{
11734 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das03efce62017-05-05 15:25:15 -040011735 u32 exec_control, vmcs12_exec_ctrl;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011736
Sean Christopherson9d1887e2018-03-05 09:33:27 -080011737 if (vmx->nested.dirty_vmcs12) {
Jim Mattson6514dc32018-04-26 16:09:12 -070011738 prepare_vmcs02_full(vcpu, vmcs12);
Sean Christopherson9d1887e2018-03-05 09:33:27 -080011739 vmx->nested.dirty_vmcs12 = false;
11740 }
11741
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010011742 /*
11743 * First, the fields that are shadowed. This must be kept in sync
11744 * with vmx_shadow_fields.h.
11745 */
11746
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011747 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011748 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011749 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011750 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
11751 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010011752
Jim Mattson6514dc32018-04-26 16:09:12 -070011753 if (vmx->nested.nested_run_pending &&
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011754 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
Jan Kiszka2996fca2014-06-16 13:59:43 +020011755 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
11756 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
11757 } else {
11758 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
11759 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
11760 }
Jim Mattson6514dc32018-04-26 16:09:12 -070011761 if (vmx->nested.nested_run_pending) {
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011762 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
11763 vmcs12->vm_entry_intr_info_field);
11764 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
11765 vmcs12->vm_entry_exception_error_code);
11766 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
11767 vmcs12->vm_entry_instruction_len);
11768 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
11769 vmcs12->guest_interruptibility_info);
Wanpeng Li2d6144e2017-07-25 03:40:46 -070011770 vmx->loaded_vmcs->nmi_known_unmasked =
11771 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011772 } else {
11773 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
11774 }
Gleb Natapov63fbf592013-07-28 18:31:06 +030011775 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011776
Jan Kiszkaf41245002014-03-07 20:03:13 +010011777 exec_control = vmcs12->pin_based_vm_exec_control;
Wincy Van705699a2015-02-03 23:58:17 +080011778
Paolo Bonzini9314006db2016-07-06 13:23:51 +020011779 /* Preemption timer setting is only taken from vmcs01. */
11780 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
11781 exec_control |= vmcs_config.pin_based_exec_ctrl;
11782 if (vmx->hv_deadline_tsc == -1)
11783 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
11784
11785 /* Posted interrupts setting is only taken from vmcs12. */
Wincy Van705699a2015-02-03 23:58:17 +080011786 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080011787 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
11788 vmx->nested.pi_pending = false;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011789 } else {
Wincy Van705699a2015-02-03 23:58:17 +080011790 exec_control &= ~PIN_BASED_POSTED_INTR;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011791 }
Wincy Van705699a2015-02-03 23:58:17 +080011792
Jan Kiszkaf41245002014-03-07 20:03:13 +010011793 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011794
Jan Kiszkaf41245002014-03-07 20:03:13 +010011795 vmx->nested.preemption_timer_expired = false;
11796 if (nested_cpu_has_preemption_timer(vmcs12))
11797 vmx_start_preemption_timer(vcpu);
Jan Kiszka0238ea92013-03-13 11:31:24 +010011798
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011799 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +020011800 exec_control = vmx->secondary_exec_control;
Xiao Guangronge2821622015-09-09 14:05:52 +080011801
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011802 /* Take the following fields only from vmcs12 */
Paolo Bonzini696dfd92014-05-07 11:20:54 +020011803 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini90a2db62017-07-27 13:22:13 +020011804 SECONDARY_EXEC_ENABLE_INVPCID |
Jan Kiszkab3a2a902015-03-23 19:27:19 +010011805 SECONDARY_EXEC_RDTSCP |
Paolo Bonzini3db13482017-08-24 14:48:03 +020011806 SECONDARY_EXEC_XSAVES |
Paolo Bonzini696dfd92014-05-07 11:20:54 +020011807 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Bandan Das27c42a12017-08-03 15:54:42 -040011808 SECONDARY_EXEC_APIC_REGISTER_VIRT |
11809 SECONDARY_EXEC_ENABLE_VMFUNC);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011810 if (nested_cpu_has(vmcs12,
Bandan Das03efce62017-05-05 15:25:15 -040011811 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
11812 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
11813 ~SECONDARY_EXEC_ENABLE_PML;
11814 exec_control |= vmcs12_exec_ctrl;
11815 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011816
Liran Alon32c7acf2018-06-23 02:35:11 +030011817 /* VMCS shadowing for L2 is emulated for now */
11818 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
11819
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010011820 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
Wincy Van608406e2015-02-03 23:57:51 +080011821 vmcs_write16(GUEST_INTR_STATUS,
11822 vmcs12->guest_intr_status);
Wincy Van608406e2015-02-03 23:57:51 +080011823
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011824 /*
11825 * Write an illegal value to APIC_ACCESS_ADDR. Later,
11826 * nested_get_vmcs12_pages will either fix it up or
11827 * remove the VM execution control.
11828 */
11829 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
11830 vmcs_write64(APIC_ACCESS_ADDR, -1ull);
11831
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011832 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
11833 }
11834
Jim Mattson83bafef2016-10-04 10:48:38 -070011835 /*
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011836 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
11837 * entry, but only if the current (host) sp changed from the value
11838 * we wrote last (vmx->host_rsp). This cache is no longer relevant
11839 * if we switch vmcs, and rather than hold a separate cache per vmcs,
11840 * here we just force the write to happen on entry.
11841 */
11842 vmx->host_rsp = 0;
11843
11844 exec_control = vmx_exec_control(vmx); /* L0's desires */
11845 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
11846 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
11847 exec_control &= ~CPU_BASED_TPR_SHADOW;
11848 exec_control |= vmcs12->cpu_based_vm_exec_control;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011849
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011850 /*
11851 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
11852 * nested_get_vmcs12_pages can't fix it up, the illegal value
11853 * will result in a VM entry failure.
11854 */
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011855 if (exec_control & CPU_BASED_TPR_SHADOW) {
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011856 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011857 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
Jim Mattson51aa68e2017-09-12 13:02:54 -070011858 } else {
11859#ifdef CONFIG_X86_64
11860 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
11861 CPU_BASED_CR8_STORE_EXITING;
11862#endif
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011863 }
11864
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011865 /*
Quan Xu8eb73e22017-12-12 16:44:21 +080011866 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
11867 * for I/O port accesses.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011868 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011869 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
11870 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
11871
11872 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
11873
11874 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
11875 * bitwise-or of what L1 wants to trap for L2, and what we want to
11876 * trap. Note that CR0.TS also needs updating - we do this later.
11877 */
11878 update_exception_bitmap(vcpu);
11879 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
11880 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
11881
Nadav Har'El8049d652013-08-05 11:07:06 +030011882 /* L2->L1 exit controls are emulated - the hardware exit is to L0 so
11883 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
11884 * bits are further modified by vmx_set_efer() below.
11885 */
Jan Kiszkaf41245002014-03-07 20:03:13 +010011886 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
Nadav Har'El8049d652013-08-05 11:07:06 +030011887
11888 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
11889 * emulated by vmx_set_efer(), below.
11890 */
Gleb Natapov2961e8762013-11-25 15:37:13 +020011891 vm_entry_controls_init(vmx,
Nadav Har'El8049d652013-08-05 11:07:06 +030011892 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
11893 ~VM_ENTRY_IA32E_MODE) |
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011894 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
11895
Jim Mattson6514dc32018-04-26 16:09:12 -070011896 if (vmx->nested.nested_run_pending &&
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011897 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011898 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020011899 vcpu->arch.pat = vmcs12->guest_ia32_pat;
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011900 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011901 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011902 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011903
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020011904 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
11905
Peter Feinerc95ba922016-08-17 09:36:47 -070011906 if (kvm_has_tsc_control)
11907 decache_tsc_multiplier(vmx);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011908
11909 if (enable_vpid) {
11910 /*
Wanpeng Li5c614b32015-10-13 09:18:36 -070011911 * There is no direct mapping between vpid02 and vpid12, the
11912 * vpid02 is per-vCPU for L0 and reused while the value of
11913 * vpid12 is changed w/ one invvpid during nested vmentry.
11914 * The vpid12 is allocated by L1 for L2, so it will not
11915 * influence global bitmap(for vpid01 and vpid02 allocation)
11916 * even if spawn a lot of nested vCPUs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011917 */
Wanpeng Li5c614b32015-10-13 09:18:36 -070011918 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
Wanpeng Li5c614b32015-10-13 09:18:36 -070011919 if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
11920 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Liran Alon6bce30c2018-05-22 17:16:12 +030011921 __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
Wanpeng Li5c614b32015-10-13 09:18:36 -070011922 }
11923 } else {
Wanpeng Lic2ba05c2017-12-12 17:33:03 -080011924 vmx_flush_tlb(vcpu, true);
Wanpeng Li5c614b32015-10-13 09:18:36 -070011925 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011926 }
11927
Ladi Prosek1fb883b2017-04-04 14:18:53 +020011928 if (enable_pml) {
11929 /*
11930 * Conceptually we want to copy the PML address and index from
11931 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
11932 * since we always flush the log on each vmexit, this happens
11933 * to be equivalent to simply resetting the fields in vmcs02.
11934 */
11935 ASSERT(vmx->pml_pg);
11936 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
11937 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
11938 }
11939
Nadav Har'El155a97a2013-08-05 11:07:16 +030011940 if (nested_cpu_has_ept(vmcs12)) {
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020011941 if (nested_ept_init_mmu_context(vcpu)) {
11942 *entry_failure_code = ENTRY_FAIL_DEFAULT;
11943 return 1;
11944 }
Jim Mattsonfb6c8192017-03-16 13:53:59 -070011945 } else if (nested_cpu_has2(vmcs12,
11946 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Junaid Shahida468f2d2018-04-26 13:09:50 -070011947 vmx_flush_tlb(vcpu, true);
Nadav Har'El155a97a2013-08-05 11:07:16 +030011948 }
11949
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011950 /*
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080011951 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
11952 * bits which we consider mandatory enabled.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011953 * The CR0_READ_SHADOW is what L2 should have expected to read given
11954 * the specifications by L1; It's not enough to take
11955 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
11956 * have more bits than L1 expected.
11957 */
11958 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
11959 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
11960
11961 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
11962 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
11963
Jim Mattson6514dc32018-04-26 16:09:12 -070011964 if (vmx->nested.nested_run_pending &&
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011965 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
David Matlack5a6a9742016-11-29 18:14:10 -080011966 vcpu->arch.efer = vmcs12->guest_ia32_efer;
11967 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
11968 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
11969 else
11970 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
11971 /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
11972 vmx_set_efer(vcpu, vcpu->arch.efer);
11973
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070011974 /*
11975 * Guest state is invalid and unrestricted guest is disabled,
11976 * which means L1 attempted VMEntry to L2 with invalid state.
11977 * Fail the VMEntry.
11978 */
Paolo Bonzini3184a992018-03-21 14:20:18 +010011979 if (vmx->emulation_required) {
11980 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070011981 return 1;
Paolo Bonzini3184a992018-03-21 14:20:18 +010011982 }
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070011983
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011984 /* Shadow page tables on either EPT or shadow page tables. */
Ladi Prosek7ad658b2017-03-23 07:18:08 +010011985 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010011986 entry_failure_code))
11987 return 1;
Ladi Prosek7ca29de2016-11-30 16:03:08 +010011988
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011989 if (!enable_ept)
11990 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
11991
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011992 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
11993 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
Ladi Prosekee146c12016-11-30 16:03:09 +010011994 return 0;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030011995}
11996
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050011997static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
11998{
11999 if (!nested_cpu_has_nmi_exiting(vmcs12) &&
12000 nested_cpu_has_virtual_nmis(vmcs12))
12001 return -EINVAL;
12002
12003 if (!nested_cpu_has_virtual_nmis(vmcs12) &&
12004 nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
12005 return -EINVAL;
12006
12007 return 0;
12008}
12009
Jim Mattsonca0bde22016-11-30 12:03:46 -080012010static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
12011{
12012 struct vcpu_vmx *vmx = to_vmx(vcpu);
12013
12014 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
12015 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
12016 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12017
Jim Mattson56a20512017-07-06 16:33:06 -070012018 if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
12019 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12020
Jim Mattsonca0bde22016-11-30 12:03:46 -080012021 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
12022 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12023
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040012024 if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
12025 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12026
Jim Mattson712b12d2017-08-24 13:24:47 -070012027 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
12028 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12029
Jim Mattsonca0bde22016-11-30 12:03:46 -080012030 if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
12031 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12032
12033 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
12034 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12035
Bandan Dasc5f983f2017-05-05 15:25:14 -040012036 if (nested_vmx_check_pml_controls(vcpu, vmcs12))
12037 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12038
Liran Alona8a7c022018-06-23 02:35:06 +030012039 if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
12040 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12041
Jim Mattsonca0bde22016-11-30 12:03:46 -080012042 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012043 vmx->nested.msrs.procbased_ctls_low,
12044 vmx->nested.msrs.procbased_ctls_high) ||
Jim Mattson2e5b0bd2017-05-04 11:51:58 -070012045 (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
12046 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012047 vmx->nested.msrs.secondary_ctls_low,
12048 vmx->nested.msrs.secondary_ctls_high)) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012049 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012050 vmx->nested.msrs.pinbased_ctls_low,
12051 vmx->nested.msrs.pinbased_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012052 !vmx_control_verify(vmcs12->vm_exit_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012053 vmx->nested.msrs.exit_ctls_low,
12054 vmx->nested.msrs.exit_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080012055 !vmx_control_verify(vmcs12->vm_entry_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012056 vmx->nested.msrs.entry_ctls_low,
12057 vmx->nested.msrs.entry_ctls_high))
Jim Mattsonca0bde22016-11-30 12:03:46 -080012058 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12059
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050012060 if (nested_vmx_check_nmi_controls(vmcs12))
Jim Mattsonca0bde22016-11-30 12:03:46 -080012061 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12062
Bandan Das41ab9372017-08-03 15:54:43 -040012063 if (nested_cpu_has_vmfunc(vmcs12)) {
12064 if (vmcs12->vm_function_control &
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010012065 ~vmx->nested.msrs.vmfunc_controls)
Bandan Das41ab9372017-08-03 15:54:43 -040012066 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12067
12068 if (nested_cpu_has_eptp_switching(vmcs12)) {
12069 if (!nested_cpu_has_ept(vmcs12) ||
12070 !page_address_valid(vcpu, vmcs12->eptp_list_address))
12071 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12072 }
12073 }
Bandan Das27c42a12017-08-03 15:54:42 -040012074
Jim Mattsonc7c2c7092017-05-05 11:28:09 -070012075 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
12076 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12077
Jim Mattsonca0bde22016-11-30 12:03:46 -080012078 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
12079 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
12080 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
12081 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
12082
Marc Orr04473782018-06-20 17:21:29 -070012083 /*
12084 * From the Intel SDM, volume 3:
12085 * Fields relevant to VM-entry event injection must be set properly.
12086 * These fields are the VM-entry interruption-information field, the
12087 * VM-entry exception error code, and the VM-entry instruction length.
12088 */
12089 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
12090 u32 intr_info = vmcs12->vm_entry_intr_info_field;
12091 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
12092 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
12093 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
12094 bool should_have_error_code;
12095 bool urg = nested_cpu_has2(vmcs12,
12096 SECONDARY_EXEC_UNRESTRICTED_GUEST);
12097 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
12098
12099 /* VM-entry interruption-info field: interruption type */
12100 if (intr_type == INTR_TYPE_RESERVED ||
12101 (intr_type == INTR_TYPE_OTHER_EVENT &&
12102 !nested_cpu_supports_monitor_trap_flag(vcpu)))
12103 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12104
12105 /* VM-entry interruption-info field: vector */
12106 if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
12107 (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
12108 (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
12109 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12110
12111 /* VM-entry interruption-info field: deliver error code */
12112 should_have_error_code =
12113 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
12114 x86_exception_has_error_code(vector);
12115 if (has_error_code != should_have_error_code)
12116 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12117
12118 /* VM-entry exception error code */
12119 if (has_error_code &&
12120 vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
12121 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12122
12123 /* VM-entry interruption-info field: reserved bits */
12124 if (intr_info & INTR_INFO_RESVD_BITS_MASK)
12125 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12126
12127 /* VM-entry instruction length */
12128 switch (intr_type) {
12129 case INTR_TYPE_SOFT_EXCEPTION:
12130 case INTR_TYPE_SOFT_INTR:
12131 case INTR_TYPE_PRIV_SW_EXCEPTION:
12132 if ((vmcs12->vm_entry_instruction_len > 15) ||
12133 (vmcs12->vm_entry_instruction_len == 0 &&
12134 !nested_cpu_has_zero_length_injection(vcpu)))
12135 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12136 }
12137 }
12138
Jim Mattsonca0bde22016-11-30 12:03:46 -080012139 return 0;
12140}
12141
Liran Alonf145d902018-06-23 02:35:07 +030012142static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
12143 struct vmcs12 *vmcs12)
12144{
12145 int r;
12146 struct page *page;
12147 struct vmcs12 *shadow;
12148
12149 if (vmcs12->vmcs_link_pointer == -1ull)
12150 return 0;
12151
12152 if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
12153 return -EINVAL;
12154
12155 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
12156 if (is_error_page(page))
12157 return -EINVAL;
12158
12159 r = 0;
12160 shadow = kmap(page);
12161 if (shadow->hdr.revision_id != VMCS12_REVISION ||
12162 shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
12163 r = -EINVAL;
12164 kunmap(page);
12165 kvm_release_page_clean(page);
12166 return r;
12167}
12168
Jim Mattsonca0bde22016-11-30 12:03:46 -080012169static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
12170 u32 *exit_qual)
12171{
12172 bool ia32e;
12173
12174 *exit_qual = ENTRY_FAIL_DEFAULT;
12175
12176 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
12177 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
12178 return 1;
12179
Liran Alonf145d902018-06-23 02:35:07 +030012180 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
Jim Mattsonca0bde22016-11-30 12:03:46 -080012181 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
12182 return 1;
12183 }
12184
12185 /*
12186 * If the load IA32_EFER VM-entry control is 1, the following checks
12187 * are performed on the field for the IA32_EFER MSR:
12188 * - Bits reserved in the IA32_EFER MSR must be 0.
12189 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
12190 * the IA-32e mode guest VM-exit control. It must also be identical
12191 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
12192 * CR0.PG) is 1.
12193 */
12194 if (to_vmx(vcpu)->nested.nested_run_pending &&
12195 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
12196 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
12197 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
12198 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
12199 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
12200 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
12201 return 1;
12202 }
12203
12204 /*
12205 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
12206 * IA32_EFER MSR must be 0 in the field for that register. In addition,
12207 * the values of the LMA and LME bits in the field must each be that of
12208 * the host address-space size VM-exit control.
12209 */
12210 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
12211 ia32e = (vmcs12->vm_exit_controls &
12212 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
12213 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
12214 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
12215 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
12216 return 1;
12217 }
12218
Wanpeng Lif1b026a2017-11-05 16:54:48 -080012219 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
12220 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
12221 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
12222 return 1;
12223
Jim Mattsonca0bde22016-11-30 12:03:46 -080012224 return 0;
12225}
12226
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012227/*
Jim Mattson8fcc4b52018-07-10 11:27:20 +020012228 * If exit_qual is NULL, this is being called from state restore (either RSM
12229 * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012230 */
12231static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
Jim Mattson858e25c2016-11-30 12:03:47 -080012232{
12233 struct vcpu_vmx *vmx = to_vmx(vcpu);
12234 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012235 bool from_vmentry = !!exit_qual;
12236 u32 dummy_exit_qual;
12237 int r = 0;
Jim Mattson858e25c2016-11-30 12:03:47 -080012238
Jim Mattson858e25c2016-11-30 12:03:47 -080012239 enter_guest_mode(vcpu);
12240
12241 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
12242 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
12243
Jim Mattsonde3a0022017-11-27 17:22:25 -060012244 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
Jim Mattson858e25c2016-11-30 12:03:47 -080012245 vmx_segment_cache_clear(vmx);
12246
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012247 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
12248 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
12249
12250 r = EXIT_REASON_INVALID_STATE;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012251 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012252 goto fail;
Jim Mattson858e25c2016-11-30 12:03:47 -080012253
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012254 if (from_vmentry) {
12255 nested_get_vmcs12_pages(vcpu);
Jim Mattson858e25c2016-11-30 12:03:47 -080012256
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012257 r = EXIT_REASON_MSR_LOAD_FAIL;
12258 *exit_qual = nested_vmx_load_msr(vcpu,
12259 vmcs12->vm_entry_msr_load_addr,
12260 vmcs12->vm_entry_msr_load_count);
12261 if (*exit_qual)
12262 goto fail;
12263 } else {
12264 /*
12265 * The MMU is not initialized to point at the right entities yet and
12266 * "get pages" would need to read data from the guest (i.e. we will
12267 * need to perform gpa to hpa translation). Request a call
12268 * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
12269 * have already been set at vmentry time and should not be reset.
12270 */
12271 kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
12272 }
Jim Mattson858e25c2016-11-30 12:03:47 -080012273
Jim Mattson858e25c2016-11-30 12:03:47 -080012274 /*
12275 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
12276 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
12277 * returned as far as L1 is concerned. It will only return (and set
12278 * the success flag) when L2 exits (see nested_vmx_vmexit()).
12279 */
12280 return 0;
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012281
12282fail:
12283 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
12284 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
12285 leave_guest_mode(vcpu);
12286 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012287 return r;
Jim Mattson858e25c2016-11-30 12:03:47 -080012288}
12289
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012290/*
12291 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
12292 * for running an L2 nested guest.
12293 */
12294static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
12295{
12296 struct vmcs12 *vmcs12;
12297 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070012298 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Jim Mattsonca0bde22016-11-30 12:03:46 -080012299 u32 exit_qual;
12300 int ret;
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012301
Kyle Hueyeb277562016-11-29 12:40:39 -080012302 if (!nested_vmx_check_permission(vcpu))
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012303 return 1;
12304
Kyle Hueyeb277562016-11-29 12:40:39 -080012305 if (!nested_vmx_check_vmcs12(vcpu))
12306 goto out;
12307
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012308 vmcs12 = get_vmcs12(vcpu);
12309
Liran Alona6192d42018-06-23 02:35:04 +030012310 /*
12311 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
12312 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
12313 * rather than RFLAGS.ZF, and no error number is stored to the
12314 * VM-instruction error field.
12315 */
12316 if (vmcs12->hdr.shadow_vmcs) {
12317 nested_vmx_failInvalid(vcpu);
12318 goto out;
12319 }
12320
Abel Gordon012f83c2013-04-18 14:39:25 +030012321 if (enable_shadow_vmcs)
12322 copy_shadow_to_vmcs12(vmx);
12323
Nadav Har'El7c177932011-05-25 23:12:04 +030012324 /*
12325 * The nested entry process starts with enforcing various prerequisites
12326 * on vmcs12 as required by the Intel SDM, and act appropriately when
12327 * they fail: As the SDM explains, some conditions should cause the
12328 * instruction to fail, while others will cause the instruction to seem
12329 * to succeed, but return an EXIT_REASON_INVALID_STATE.
12330 * To speed up the normal (success) code path, we should avoid checking
12331 * for misconfigurations which will anyway be caught by the processor
12332 * when using the merged vmcs02.
12333 */
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070012334 if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
12335 nested_vmx_failValid(vcpu,
12336 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
12337 goto out;
12338 }
12339
Nadav Har'El7c177932011-05-25 23:12:04 +030012340 if (vmcs12->launch_state == launch) {
12341 nested_vmx_failValid(vcpu,
12342 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
12343 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
Kyle Hueyeb277562016-11-29 12:40:39 -080012344 goto out;
Nadav Har'El7c177932011-05-25 23:12:04 +030012345 }
12346
Jim Mattsonca0bde22016-11-30 12:03:46 -080012347 ret = check_vmentry_prereqs(vcpu, vmcs12);
12348 if (ret) {
12349 nested_vmx_failValid(vcpu, ret);
Kyle Hueyeb277562016-11-29 12:40:39 -080012350 goto out;
Paolo Bonzini26539bd2013-04-15 15:00:27 +020012351 }
12352
Nadav Har'El7c177932011-05-25 23:12:04 +030012353 /*
Jim Mattsonca0bde22016-11-30 12:03:46 -080012354 * After this point, the trap flag no longer triggers a singlestep trap
12355 * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
12356 * This is not 100% correct; for performance reasons, we delegate most
12357 * of the checks on host state to the processor. If those fail,
12358 * the singlestep trap is missed.
Jan Kiszka384bb782013-04-20 10:52:36 +020012359 */
Jim Mattsonca0bde22016-11-30 12:03:46 -080012360 skip_emulated_instruction(vcpu);
Jan Kiszka384bb782013-04-20 10:52:36 +020012361
Jim Mattsonca0bde22016-11-30 12:03:46 -080012362 ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
12363 if (ret) {
12364 nested_vmx_entry_failure(vcpu, vmcs12,
12365 EXIT_REASON_INVALID_STATE, exit_qual);
12366 return 1;
Jan Kiszka384bb782013-04-20 10:52:36 +020012367 }
12368
12369 /*
Nadav Har'El7c177932011-05-25 23:12:04 +030012370 * We're finally done with prerequisite checking, and can start with
12371 * the nested entry.
12372 */
12373
Jim Mattson6514dc32018-04-26 16:09:12 -070012374 vmx->nested.nested_run_pending = 1;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012375 ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
Jim Mattson6514dc32018-04-26 16:09:12 -070012376 if (ret) {
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012377 nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
Jim Mattson6514dc32018-04-26 16:09:12 -070012378 vmx->nested.nested_run_pending = 0;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020012379 return 1;
Jim Mattson6514dc32018-04-26 16:09:12 -070012380 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012381
Chao Gao135a06c2018-02-11 10:06:30 +080012382 /*
Liran Alon61ada742018-06-23 02:35:08 +030012383 * Must happen outside of enter_vmx_non_root_mode() as it will
12384 * also be used as part of restoring nVMX state for
12385 * snapshot restore (migration).
12386 *
12387 * In this flow, it is assumed that vmcs12 cache was
12388 * trasferred as part of captured nVMX state and should
12389 * therefore not be read from guest memory (which may not
12390 * exist on destination host yet).
12391 */
12392 nested_cache_shadow_vmcs12(vcpu, vmcs12);
12393
12394 /*
Chao Gao135a06c2018-02-11 10:06:30 +080012395 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
12396 * by event injection, halt vcpu.
12397 */
12398 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
Jim Mattson6514dc32018-04-26 16:09:12 -070012399 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
12400 vmx->nested.nested_run_pending = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -060012401 return kvm_vcpu_halt(vcpu);
Jim Mattson6514dc32018-04-26 16:09:12 -070012402 }
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012403 return 1;
Kyle Hueyeb277562016-11-29 12:40:39 -080012404
12405out:
Kyle Huey6affcbe2016-11-29 12:40:40 -080012406 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030012407}
12408
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012409/*
12410 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
12411 * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
12412 * This function returns the new value we should put in vmcs12.guest_cr0.
12413 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
12414 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
12415 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
12416 * didn't trap the bit, because if L1 did, so would L0).
12417 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
12418 * been modified by L2, and L1 knows it. So just leave the old value of
12419 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
12420 * isn't relevant, because if L0 traps this bit it can set it to anything.
12421 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
12422 * changed these bits, and therefore they need to be updated, but L0
12423 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
12424 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
12425 */
12426static inline unsigned long
12427vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
12428{
12429 return
12430 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
12431 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
12432 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
12433 vcpu->arch.cr0_guest_owned_bits));
12434}
12435
12436static inline unsigned long
12437vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
12438{
12439 return
12440 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
12441 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
12442 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
12443 vcpu->arch.cr4_guest_owned_bits));
12444}
12445
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012446static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
12447 struct vmcs12 *vmcs12)
12448{
12449 u32 idt_vectoring;
12450 unsigned int nr;
12451
Wanpeng Li664f8e22017-08-24 03:35:09 -070012452 if (vcpu->arch.exception.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012453 nr = vcpu->arch.exception.nr;
12454 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
12455
12456 if (kvm_exception_is_soft(nr)) {
12457 vmcs12->vm_exit_instruction_len =
12458 vcpu->arch.event_exit_inst_len;
12459 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
12460 } else
12461 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
12462
12463 if (vcpu->arch.exception.has_error_code) {
12464 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
12465 vmcs12->idt_vectoring_error_code =
12466 vcpu->arch.exception.error_code;
12467 }
12468
12469 vmcs12->idt_vectoring_info_field = idt_vectoring;
Jan Kiszkacd2633c2013-10-23 17:42:15 +010012470 } else if (vcpu->arch.nmi_injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012471 vmcs12->idt_vectoring_info_field =
12472 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
Liran Alon04140b42018-03-23 03:01:31 +030012473 } else if (vcpu->arch.interrupt.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012474 nr = vcpu->arch.interrupt.nr;
12475 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
12476
12477 if (vcpu->arch.interrupt.soft) {
12478 idt_vectoring |= INTR_TYPE_SOFT_INTR;
12479 vmcs12->vm_entry_instruction_len =
12480 vcpu->arch.event_exit_inst_len;
12481 } else
12482 idt_vectoring |= INTR_TYPE_EXT_INTR;
12483
12484 vmcs12->idt_vectoring_info_field = idt_vectoring;
12485 }
12486}
12487
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012488static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
12489{
12490 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070012491 unsigned long exit_qual;
Liran Alon917dc602017-11-05 16:07:43 +020012492 bool block_nested_events =
12493 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Wanpeng Liacc9ab62017-02-27 04:24:39 -080012494
Wanpeng Libfcf83b2017-08-24 03:35:11 -070012495 if (vcpu->arch.exception.pending &&
12496 nested_vmx_check_exception(vcpu, &exit_qual)) {
Liran Alon917dc602017-11-05 16:07:43 +020012497 if (block_nested_events)
Wanpeng Libfcf83b2017-08-24 03:35:11 -070012498 return -EBUSY;
12499 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070012500 return 0;
12501 }
12502
Jan Kiszkaf41245002014-03-07 20:03:13 +010012503 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
12504 vmx->nested.preemption_timer_expired) {
Liran Alon917dc602017-11-05 16:07:43 +020012505 if (block_nested_events)
Jan Kiszkaf41245002014-03-07 20:03:13 +010012506 return -EBUSY;
12507 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
12508 return 0;
12509 }
12510
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012511 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020012512 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012513 return -EBUSY;
12514 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
12515 NMI_VECTOR | INTR_TYPE_NMI_INTR |
12516 INTR_INFO_VALID_MASK, 0);
12517 /*
12518 * The NMI-triggered VM exit counts as injection:
12519 * clear this one and block further NMIs.
12520 */
12521 vcpu->arch.nmi_pending = 0;
12522 vmx_set_nmi_mask(vcpu, true);
12523 return 0;
12524 }
12525
12526 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
12527 nested_exit_on_intr(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020012528 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012529 return -EBUSY;
12530 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
Wincy Van705699a2015-02-03 23:58:17 +080012531 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012532 }
12533
David Hildenbrand6342c502017-01-25 11:58:58 +010012534 vmx_complete_nested_posted_interrupt(vcpu);
12535 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012536}
12537
Jan Kiszkaf41245002014-03-07 20:03:13 +010012538static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
12539{
12540 ktime_t remaining =
12541 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
12542 u64 value;
12543
12544 if (ktime_to_ns(remaining) <= 0)
12545 return 0;
12546
12547 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
12548 do_div(value, 1000000);
12549 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
12550}
12551
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012552/*
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012553 * Update the guest state fields of vmcs12 to reflect changes that
12554 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
12555 * VM-entry controls is also updated, since this is really a guest
12556 * state bit.)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012557 */
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012558static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012559{
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012560 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
12561 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
12562
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012563 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
12564 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
12565 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
12566
12567 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
12568 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
12569 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
12570 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
12571 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
12572 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
12573 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
12574 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
12575 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
12576 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
12577 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
12578 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
12579 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
12580 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
12581 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
12582 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
12583 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
12584 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
12585 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
12586 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
12587 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
12588 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
12589 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
12590 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
12591 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
12592 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
12593 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
12594 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
12595 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
12596 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
12597 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
12598 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
12599 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
12600 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
12601 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
12602 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
12603
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012604 vmcs12->guest_interruptibility_info =
12605 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
12606 vmcs12->guest_pending_dbg_exceptions =
12607 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
Jan Kiszka3edf1e62014-01-04 18:47:24 +010012608 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
12609 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
12610 else
12611 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012612
Jan Kiszkaf41245002014-03-07 20:03:13 +010012613 if (nested_cpu_has_preemption_timer(vmcs12)) {
12614 if (vmcs12->vm_exit_controls &
12615 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
12616 vmcs12->vmx_preemption_timer_value =
12617 vmx_get_preemption_timer_value(vcpu);
12618 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
12619 }
Arthur Chunqi Li7854cbc2013-09-16 16:11:44 +080012620
Nadav Har'El3633cfc2013-08-05 11:07:07 +030012621 /*
12622 * In some cases (usually, nested EPT), L2 is allowed to change its
12623 * own CR3 without exiting. If it has changed it, we must keep it.
12624 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
12625 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
12626 *
12627 * Additionally, restore L2's PDPTR to vmcs12.
12628 */
12629 if (enable_ept) {
Paolo Bonzinif3531052015-12-03 15:49:56 +010012630 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Nadav Har'El3633cfc2013-08-05 11:07:07 +030012631 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
12632 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
12633 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
12634 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
12635 }
12636
Jim Mattsond281e132017-06-01 12:44:46 -070012637 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
Jan Dakinevich119a9c02016-09-04 21:22:47 +030012638
Wincy Van608406e2015-02-03 23:57:51 +080012639 if (nested_cpu_has_vid(vmcs12))
12640 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
12641
Jan Kiszkac18911a2013-03-13 16:06:41 +010012642 vmcs12->vm_entry_controls =
12643 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
Gleb Natapov2961e8762013-11-25 15:37:13 +020012644 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
Jan Kiszkac18911a2013-03-13 16:06:41 +010012645
Jan Kiszka2996fca2014-06-16 13:59:43 +020012646 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
12647 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
12648 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
12649 }
12650
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012651 /* TODO: These cannot have changed unless we have MSR bitmaps and
12652 * the relevant bit asks not to trap the change */
Jan Kiszkab8c07d52013-04-06 13:51:21 +020012653 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012654 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
Jan Kiszka10ba54a2013-08-08 16:26:31 +020012655 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
12656 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012657 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
12658 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
12659 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
Paolo Bonzinia87036a2016-03-08 09:52:13 +010012660 if (kvm_mpx_supported())
Paolo Bonzini36be0b92014-02-24 12:30:04 +010012661 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012662}
12663
12664/*
12665 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
12666 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
12667 * and this function updates it to reflect the changes to the guest state while
12668 * L2 was running (and perhaps made some exits which were handled directly by L0
12669 * without going back to L1), and to reflect the exit reason.
12670 * Note that we do not have to copy here all VMCS fields, just those that
12671 * could have changed by the L2 guest or the exit - i.e., the guest-state and
12672 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
12673 * which already writes to vmcs12 directly.
12674 */
12675static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
12676 u32 exit_reason, u32 exit_intr_info,
12677 unsigned long exit_qualification)
12678{
12679 /* update guest state fields: */
12680 sync_vmcs12(vcpu, vmcs12);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012681
12682 /* update exit information fields: */
12683
Jan Kiszka533558b2014-01-04 18:47:20 +010012684 vmcs12->vm_exit_reason = exit_reason;
12685 vmcs12->exit_qualification = exit_qualification;
Jan Kiszka533558b2014-01-04 18:47:20 +010012686 vmcs12->vm_exit_intr_info = exit_intr_info;
Paolo Bonzini7313c692017-07-27 10:31:25 +020012687
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012688 vmcs12->idt_vectoring_info_field = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012689 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
12690 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
12691
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012692 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
Jim Mattson7cdc2d62017-07-06 16:33:05 -070012693 vmcs12->launch_state = 1;
12694
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012695 /* vm_entry_intr_info_field is cleared on exit. Emulate this
12696 * instead of reading the real value. */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012697 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012698
12699 /*
12700 * Transfer the event that L0 or L1 may wanted to inject into
12701 * L2 to IDT_VECTORING_INFO_FIELD.
12702 */
12703 vmcs12_save_pending_event(vcpu, vmcs12);
12704 }
12705
12706 /*
12707 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
12708 * preserved above and would only end up incorrectly in L1.
12709 */
12710 vcpu->arch.nmi_injected = false;
12711 kvm_clear_exception_queue(vcpu);
12712 kvm_clear_interrupt_queue(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012713}
12714
Wanpeng Li5af41572017-11-05 16:54:49 -080012715static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
12716 struct vmcs12 *vmcs12)
12717{
12718 u32 entry_failure_code;
12719
12720 nested_ept_uninit_mmu_context(vcpu);
12721
12722 /*
12723 * Only PDPTE load can fail as the value of cr3 was checked on entry and
12724 * couldn't have changed.
12725 */
12726 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
12727 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
12728
12729 if (!enable_ept)
12730 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
12731}
12732
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012733/*
12734 * A part of what we need to when the nested L2 guest exits and we want to
12735 * run its L1 parent, is to reset L1's guest state to the host state specified
12736 * in vmcs12.
12737 * This function is to be called not only on normal nested exit, but also on
12738 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
12739 * Failures During or After Loading Guest State").
12740 * This function should be called when the active VMCS is L1's (vmcs01).
12741 */
Jan Kiszka733568f2013-02-23 15:07:47 +010012742static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
12743 struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012744{
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080012745 struct kvm_segment seg;
12746
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012747 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
12748 vcpu->arch.efer = vmcs12->host_ia32_efer;
Jan Kiszkad1fa0352013-04-14 12:44:54 +020012749 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012750 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
12751 else
12752 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
12753 vmx_set_efer(vcpu, vcpu->arch.efer);
12754
12755 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
12756 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
H. Peter Anvin1adfa762013-04-27 16:10:11 -070012757 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012758 /*
12759 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080012760 * actually changed, because vmx_set_cr0 refers to efer set above.
12761 *
12762 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
12763 * (KVM doesn't change it);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012764 */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080012765 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
Jan Kiszka9e3e4db2013-09-03 21:11:45 +020012766 vmx_set_cr0(vcpu, vmcs12->host_cr0);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012767
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080012768 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012769 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
Haozhong Zhang8eb3f872017-10-10 15:01:22 +080012770 vmx_set_cr4(vcpu, vmcs12->host_cr4);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012771
Wanpeng Li5af41572017-11-05 16:54:49 -080012772 load_vmcs12_mmu_host_state(vcpu, vmcs12);
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030012773
Liran Alon6f1e03b2018-05-22 17:16:14 +030012774 /*
12775 * If vmcs01 don't use VPID, CPU flushes TLB on every
12776 * VMEntry/VMExit. Thus, no need to flush TLB.
12777 *
12778 * If vmcs12 uses VPID, TLB entries populated by L2 are
12779 * tagged with vmx->nested.vpid02 while L1 entries are tagged
12780 * with vmx->vpid. Thus, no need to flush TLB.
12781 *
12782 * Therefore, flush TLB only in case vmcs01 uses VPID and
12783 * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
12784 * are both tagged with vmx->vpid.
12785 */
12786 if (enable_vpid &&
12787 !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
Wanpeng Lic2ba05c2017-12-12 17:33:03 -080012788 vmx_flush_tlb(vcpu, true);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012789 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012790
12791 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
12792 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
12793 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
12794 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
12795 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
Ladi Prosek21f2d5512017-10-11 16:54:42 +020012796 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
12797 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012798
Paolo Bonzini36be0b92014-02-24 12:30:04 +010012799 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
12800 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
12801 vmcs_write64(GUEST_BNDCFGS, 0);
12802
Jan Kiszka44811c02013-08-04 17:17:27 +020012803 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012804 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020012805 vcpu->arch.pat = vmcs12->host_ia32_pat;
12806 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012807 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
12808 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
12809 vmcs12->host_ia32_perf_global_ctrl);
Jan Kiszka503cd0c2013-03-03 13:05:44 +010012810
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080012811 /* Set L1 segment info according to Intel SDM
12812 27.5.2 Loading Host Segment and Descriptor-Table Registers */
12813 seg = (struct kvm_segment) {
12814 .base = 0,
12815 .limit = 0xFFFFFFFF,
12816 .selector = vmcs12->host_cs_selector,
12817 .type = 11,
12818 .present = 1,
12819 .s = 1,
12820 .g = 1
12821 };
12822 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
12823 seg.l = 1;
12824 else
12825 seg.db = 1;
12826 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
12827 seg = (struct kvm_segment) {
12828 .base = 0,
12829 .limit = 0xFFFFFFFF,
12830 .type = 3,
12831 .present = 1,
12832 .s = 1,
12833 .db = 1,
12834 .g = 1
12835 };
12836 seg.selector = vmcs12->host_ds_selector;
12837 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
12838 seg.selector = vmcs12->host_es_selector;
12839 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
12840 seg.selector = vmcs12->host_ss_selector;
12841 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
12842 seg.selector = vmcs12->host_fs_selector;
12843 seg.base = vmcs12->host_fs_base;
12844 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
12845 seg.selector = vmcs12->host_gs_selector;
12846 seg.base = vmcs12->host_gs_base;
12847 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
12848 seg = (struct kvm_segment) {
Gleb Natapov205befd2013-08-04 15:08:06 +030012849 .base = vmcs12->host_tr_base,
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080012850 .limit = 0x67,
12851 .selector = vmcs12->host_tr_selector,
12852 .type = 11,
12853 .present = 1
12854 };
12855 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
12856
Jan Kiszka503cd0c2013-03-03 13:05:44 +010012857 kvm_set_dr(vcpu, 7, 0x400);
12858 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
Wincy Vanff651cb2014-12-11 08:52:58 +030012859
Wincy Van3af18d92015-02-03 23:49:31 +080012860 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +010012861 vmx_update_msr_bitmap(vcpu);
Wincy Van3af18d92015-02-03 23:49:31 +080012862
Wincy Vanff651cb2014-12-11 08:52:58 +030012863 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
12864 vmcs12->vm_exit_msr_load_count))
12865 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012866}
12867
12868/*
12869 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
12870 * and modify vmcs12 to make it see what it would expect to see there if
12871 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
12872 */
Jan Kiszka533558b2014-01-04 18:47:20 +010012873static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
12874 u32 exit_intr_info,
12875 unsigned long exit_qualification)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012876{
12877 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012878 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
12879
Jan Kiszka5f3d5792013-04-14 12:12:46 +020012880 /* trying to cancel vmlaunch/vmresume is a bug */
12881 WARN_ON_ONCE(vmx->nested.nested_run_pending);
12882
Wanpeng Li6550c4d2017-07-31 19:25:27 -070012883 /*
Jim Mattson4f350c62017-09-14 16:31:44 -070012884 * The only expected VM-instruction error is "VM entry with
12885 * invalid control field(s)." Anything else indicates a
12886 * problem with L0.
Wanpeng Li6550c4d2017-07-31 19:25:27 -070012887 */
Jim Mattson4f350c62017-09-14 16:31:44 -070012888 WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
12889 VMXERR_ENTRY_INVALID_CONTROL_FIELD));
12890
12891 leave_guest_mode(vcpu);
12892
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012893 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
12894 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
12895
Jim Mattson4f350c62017-09-14 16:31:44 -070012896 if (likely(!vmx->fail)) {
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020012897 if (exit_reason == -1)
12898 sync_vmcs12(vcpu, vmcs12);
12899 else
12900 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
12901 exit_qualification);
Jim Mattson4f350c62017-09-14 16:31:44 -070012902
Liran Alon61ada742018-06-23 02:35:08 +030012903 /*
12904 * Must happen outside of sync_vmcs12() as it will
12905 * also be used to capture vmcs12 cache as part of
12906 * capturing nVMX state for snapshot (migration).
12907 *
12908 * Otherwise, this flush will dirty guest memory at a
12909 * point it is already assumed by user-space to be
12910 * immutable.
12911 */
12912 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
12913
Jim Mattson4f350c62017-09-14 16:31:44 -070012914 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
12915 vmcs12->vm_exit_msr_store_count))
12916 nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Bandan Das77b0f5d2014-04-19 18:17:45 -040012917 }
12918
Jim Mattson4f350c62017-09-14 16:31:44 -070012919 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Paolo Bonzini8391ce42016-07-07 14:58:33 +020012920 vm_entry_controls_reset_shadow(vmx);
12921 vm_exit_controls_reset_shadow(vmx);
Jan Kiszka36c3cc42013-02-23 22:35:37 +010012922 vmx_segment_cache_clear(vmx);
12923
Paolo Bonzini9314006db2016-07-06 13:23:51 +020012924 /* Update any VMCS fields that might have changed while L2 ran */
Jim Mattson83bafef2016-10-04 10:48:38 -070012925 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
12926 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010012927 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Paolo Bonzini9314006db2016-07-06 13:23:51 +020012928 if (vmx->hv_deadline_tsc == -1)
12929 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
12930 PIN_BASED_VMX_PREEMPTION_TIMER);
12931 else
12932 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
12933 PIN_BASED_VMX_PREEMPTION_TIMER);
Peter Feinerc95ba922016-08-17 09:36:47 -070012934 if (kvm_has_tsc_control)
12935 decache_tsc_multiplier(vmx);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012936
Jim Mattson8d860bb2018-05-09 16:56:05 -040012937 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
12938 vmx->nested.change_vmcs01_virtual_apic_mode = false;
12939 vmx_set_virtual_apic_mode(vcpu);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070012940 } else if (!nested_cpu_has_ept(vmcs12) &&
12941 nested_cpu_has2(vmcs12,
12942 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Junaid Shahida468f2d2018-04-26 13:09:50 -070012943 vmx_flush_tlb(vcpu, true);
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020012944 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012945
12946 /* This is needed for same reason as it was needed in prepare_vmcs02 */
12947 vmx->host_rsp = 0;
12948
12949 /* Unpin physical memory we referred to in vmcs02 */
12950 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020012951 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020012952 vmx->nested.apic_access_page = NULL;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012953 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080012954 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020012955 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020012956 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080012957 }
Wincy Van705699a2015-02-03 23:58:17 +080012958 if (vmx->nested.pi_desc_page) {
12959 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020012960 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080012961 vmx->nested.pi_desc_page = NULL;
12962 vmx->nested.pi_desc = NULL;
12963 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030012964
12965 /*
Tang Chen38b99172014-09-24 15:57:54 +080012966 * We are now running in L2, mmu_notifier will force to reload the
12967 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
12968 */
Wanpeng Lic83b6d12016-09-06 17:20:33 +080012969 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Tang Chen38b99172014-09-24 15:57:54 +080012970
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020012971 if (enable_shadow_vmcs && exit_reason != -1)
Abel Gordon012f83c2013-04-18 14:39:25 +030012972 vmx->nested.sync_shadow_vmcs = true;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012973
12974 /* in case we halted in L2 */
12975 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Jim Mattson4f350c62017-09-14 16:31:44 -070012976
12977 if (likely(!vmx->fail)) {
12978 /*
12979 * TODO: SDM says that with acknowledge interrupt on
12980 * exit, bit 31 of the VM-exit interrupt information
12981 * (valid interrupt) is always set to 1 on
12982 * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
12983 * need kvm_cpu_has_interrupt(). See the commit
12984 * message for details.
12985 */
12986 if (nested_exit_intr_ack_set(vcpu) &&
12987 exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
12988 kvm_cpu_has_interrupt(vcpu)) {
12989 int irq = kvm_cpu_get_interrupt(vcpu);
12990 WARN_ON(irq < 0);
12991 vmcs12->vm_exit_intr_info = irq |
12992 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
12993 }
12994
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020012995 if (exit_reason != -1)
12996 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
12997 vmcs12->exit_qualification,
12998 vmcs12->idt_vectoring_info_field,
12999 vmcs12->vm_exit_intr_info,
13000 vmcs12->vm_exit_intr_error_code,
13001 KVM_ISA_VMX);
Jim Mattson4f350c62017-09-14 16:31:44 -070013002
13003 load_vmcs12_host_state(vcpu, vmcs12);
13004
13005 return;
13006 }
13007
13008 /*
13009 * After an early L2 VM-entry failure, we're now back
13010 * in L1 which thinks it just finished a VMLAUNCH or
13011 * VMRESUME instruction, so we need to set the failure
13012 * flag and the VM-instruction error field of the VMCS
13013 * accordingly.
13014 */
13015 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Wanpeng Li5af41572017-11-05 16:54:49 -080013016
13017 load_vmcs12_mmu_host_state(vcpu, vmcs12);
13018
Jim Mattson4f350c62017-09-14 16:31:44 -070013019 /*
13020 * The emulated instruction was already skipped in
13021 * nested_vmx_run, but the updated RIP was never
13022 * written back to the vmcs01.
13023 */
13024 skip_emulated_instruction(vcpu);
13025 vmx->fail = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013026}
13027
Nadav Har'El7c177932011-05-25 23:12:04 +030013028/*
Jan Kiszka42124922014-01-04 18:47:19 +010013029 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
13030 */
13031static void vmx_leave_nested(struct kvm_vcpu *vcpu)
13032{
Wanpeng Li2f707d92017-03-06 04:03:28 -080013033 if (is_guest_mode(vcpu)) {
13034 to_vmx(vcpu)->nested.nested_run_pending = 0;
Jan Kiszka533558b2014-01-04 18:47:20 +010013035 nested_vmx_vmexit(vcpu, -1, 0, 0);
Wanpeng Li2f707d92017-03-06 04:03:28 -080013036 }
Jan Kiszka42124922014-01-04 18:47:19 +010013037 free_nested(to_vmx(vcpu));
13038}
13039
13040/*
Nadav Har'El7c177932011-05-25 23:12:04 +030013041 * L1's failure to enter L2 is a subset of a normal exit, as explained in
13042 * 23.7 "VM-entry failures during or after loading guest state" (this also
13043 * lists the acceptable exit-reason and exit-qualification parameters).
13044 * It should only be called before L2 actually succeeded to run, and when
13045 * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
13046 */
13047static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
13048 struct vmcs12 *vmcs12,
13049 u32 reason, unsigned long qualification)
13050{
13051 load_vmcs12_host_state(vcpu, vmcs12);
13052 vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
13053 vmcs12->exit_qualification = qualification;
13054 nested_vmx_succeed(vcpu);
Abel Gordon012f83c2013-04-18 14:39:25 +030013055 if (enable_shadow_vmcs)
13056 to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
Nadav Har'El7c177932011-05-25 23:12:04 +030013057}
13058
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020013059static int vmx_check_intercept(struct kvm_vcpu *vcpu,
13060 struct x86_instruction_info *info,
13061 enum x86_intercept_stage stage)
13062{
Paolo Bonzinifb6d4d32016-07-12 11:04:26 +020013063 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
13064 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
13065
13066 /*
13067 * RDPID causes #UD if disabled through secondary execution controls.
13068 * Because it is marked as EmulateOnUD, we need to intercept it here.
13069 */
13070 if (info->intercept == x86_intercept_rdtscp &&
13071 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
13072 ctxt->exception.vector = UD_VECTOR;
13073 ctxt->exception.error_code_valid = false;
13074 return X86EMUL_PROPAGATE_FAULT;
13075 }
13076
13077 /* TODO: check more intercepts... */
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020013078 return X86EMUL_CONTINUE;
13079}
13080
Yunhong Jiang64672c92016-06-13 14:19:59 -070013081#ifdef CONFIG_X86_64
13082/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
13083static inline int u64_shl_div_u64(u64 a, unsigned int shift,
13084 u64 divisor, u64 *result)
13085{
13086 u64 low = a << shift, high = a >> (64 - shift);
13087
13088 /* To avoid the overflow on divq */
13089 if (high >= divisor)
13090 return 1;
13091
13092 /* Low hold the result, high hold rem which is discarded */
13093 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
13094 "rm" (divisor), "0" (low), "1" (high));
13095 *result = low;
13096
13097 return 0;
13098}
13099
13100static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
13101{
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020013102 struct vcpu_vmx *vmx;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080013103 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020013104
13105 if (kvm_mwait_in_guest(vcpu->kvm))
13106 return -EOPNOTSUPP;
13107
13108 vmx = to_vmx(vcpu);
13109 tscl = rdtsc();
13110 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
13111 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080013112 lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
13113
13114 if (delta_tsc > lapic_timer_advance_cycles)
13115 delta_tsc -= lapic_timer_advance_cycles;
13116 else
13117 delta_tsc = 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070013118
13119 /* Convert to host delta tsc if tsc scaling is enabled */
13120 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
13121 u64_shl_div_u64(delta_tsc,
13122 kvm_tsc_scaling_ratio_frac_bits,
13123 vcpu->arch.tsc_scaling_ratio,
13124 &delta_tsc))
13125 return -ERANGE;
13126
13127 /*
13128 * If the delta tsc can't fit in the 32 bit after the multi shift,
13129 * we can't use the preemption timer.
13130 * It's possible that it fits on later vmentries, but checking
13131 * on every vmentry is costly so we just use an hrtimer.
13132 */
13133 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
13134 return -ERANGE;
13135
13136 vmx->hv_deadline_tsc = tscl + delta_tsc;
13137 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
13138 PIN_BASED_VMX_PREEMPTION_TIMER);
Wanpeng Lic8533542017-06-29 06:28:09 -070013139
13140 return delta_tsc == 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070013141}
13142
13143static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
13144{
13145 struct vcpu_vmx *vmx = to_vmx(vcpu);
13146 vmx->hv_deadline_tsc = -1;
13147 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
13148 PIN_BASED_VMX_PREEMPTION_TIMER);
13149}
13150#endif
13151
Paolo Bonzini48d89b92014-08-26 13:27:46 +020013152static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
Radim Krčmářae97a3b2014-08-21 18:08:06 +020013153{
Wanpeng Lib31c1142018-03-12 04:53:04 -070013154 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +020013155 shrink_ple_window(vcpu);
Radim Krčmářae97a3b2014-08-21 18:08:06 +020013156}
13157
Kai Huang843e4332015-01-28 10:54:28 +080013158static void vmx_slot_enable_log_dirty(struct kvm *kvm,
13159 struct kvm_memory_slot *slot)
13160{
13161 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
13162 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
13163}
13164
13165static void vmx_slot_disable_log_dirty(struct kvm *kvm,
13166 struct kvm_memory_slot *slot)
13167{
13168 kvm_mmu_slot_set_dirty(kvm, slot);
13169}
13170
13171static void vmx_flush_log_dirty(struct kvm *kvm)
13172{
13173 kvm_flush_pml_buffers(kvm);
13174}
13175
Bandan Dasc5f983f2017-05-05 15:25:14 -040013176static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
13177{
13178 struct vmcs12 *vmcs12;
13179 struct vcpu_vmx *vmx = to_vmx(vcpu);
13180 gpa_t gpa;
13181 struct page *page = NULL;
13182 u64 *pml_address;
13183
13184 if (is_guest_mode(vcpu)) {
13185 WARN_ON_ONCE(vmx->nested.pml_full);
13186
13187 /*
13188 * Check if PML is enabled for the nested guest.
13189 * Whether eptp bit 6 is set is already checked
13190 * as part of A/D emulation.
13191 */
13192 vmcs12 = get_vmcs12(vcpu);
13193 if (!nested_cpu_has_pml(vmcs12))
13194 return 0;
13195
Dan Carpenter47698862017-05-10 22:43:17 +030013196 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
Bandan Dasc5f983f2017-05-05 15:25:14 -040013197 vmx->nested.pml_full = true;
13198 return 1;
13199 }
13200
13201 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
13202
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020013203 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
13204 if (is_error_page(page))
Bandan Dasc5f983f2017-05-05 15:25:14 -040013205 return 0;
13206
13207 pml_address = kmap(page);
13208 pml_address[vmcs12->guest_pml_index--] = gpa;
13209 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020013210 kvm_release_page_clean(page);
Bandan Dasc5f983f2017-05-05 15:25:14 -040013211 }
13212
13213 return 0;
13214}
13215
Kai Huang843e4332015-01-28 10:54:28 +080013216static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
13217 struct kvm_memory_slot *memslot,
13218 gfn_t offset, unsigned long mask)
13219{
13220 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
13221}
13222
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013223static void __pi_post_block(struct kvm_vcpu *vcpu)
13224{
13225 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
13226 struct pi_desc old, new;
13227 unsigned int dest;
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013228
13229 do {
13230 old.control = new.control = pi_desc->control;
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013231 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
13232 "Wakeup handler not enabled while the VCPU is blocked\n");
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013233
13234 dest = cpu_physical_id(vcpu->cpu);
13235
13236 if (x2apic_enabled())
13237 new.ndst = dest;
13238 else
13239 new.ndst = (dest << 8) & 0xFF00;
13240
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013241 /* set 'NV' to 'notification vector' */
13242 new.nv = POSTED_INTR_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020013243 } while (cmpxchg64(&pi_desc->control, old.control,
13244 new.control) != old.control);
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013245
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013246 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
13247 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013248 list_del(&vcpu->blocked_vcpu_list);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013249 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013250 vcpu->pre_pcpu = -1;
13251 }
13252}
13253
Feng Wuefc64402015-09-18 22:29:51 +080013254/*
Feng Wubf9f6ac2015-09-18 22:29:55 +080013255 * This routine does the following things for vCPU which is going
13256 * to be blocked if VT-d PI is enabled.
13257 * - Store the vCPU to the wakeup list, so when interrupts happen
13258 * we can find the right vCPU to wake up.
13259 * - Change the Posted-interrupt descriptor as below:
13260 * 'NDST' <-- vcpu->pre_pcpu
13261 * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
13262 * - If 'ON' is set during this process, which means at least one
13263 * interrupt is posted for this vCPU, we cannot block it, in
13264 * this case, return 1, otherwise, return 0.
13265 *
13266 */
Yunhong Jiangbc225122016-06-13 14:19:58 -070013267static int pi_pre_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080013268{
Feng Wubf9f6ac2015-09-18 22:29:55 +080013269 unsigned int dest;
13270 struct pi_desc old, new;
13271 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
13272
13273 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080013274 !irq_remapping_cap(IRQ_POSTING_CAP) ||
13275 !kvm_vcpu_apicv_active(vcpu))
Feng Wubf9f6ac2015-09-18 22:29:55 +080013276 return 0;
13277
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013278 WARN_ON(irqs_disabled());
13279 local_irq_disable();
13280 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
13281 vcpu->pre_pcpu = vcpu->cpu;
13282 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
13283 list_add_tail(&vcpu->blocked_vcpu_list,
13284 &per_cpu(blocked_vcpu_on_cpu,
13285 vcpu->pre_pcpu));
13286 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
13287 }
Feng Wubf9f6ac2015-09-18 22:29:55 +080013288
13289 do {
13290 old.control = new.control = pi_desc->control;
13291
Feng Wubf9f6ac2015-09-18 22:29:55 +080013292 WARN((pi_desc->sn == 1),
13293 "Warning: SN field of posted-interrupts "
13294 "is set before blocking\n");
13295
13296 /*
13297 * Since vCPU can be preempted during this process,
13298 * vcpu->cpu could be different with pre_pcpu, we
13299 * need to set pre_pcpu as the destination of wakeup
13300 * notification event, then we can find the right vCPU
13301 * to wakeup in wakeup handler if interrupts happen
13302 * when the vCPU is in blocked state.
13303 */
13304 dest = cpu_physical_id(vcpu->pre_pcpu);
13305
13306 if (x2apic_enabled())
13307 new.ndst = dest;
13308 else
13309 new.ndst = (dest << 8) & 0xFF00;
13310
13311 /* set 'NV' to 'wakeup vector' */
13312 new.nv = POSTED_INTR_WAKEUP_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020013313 } while (cmpxchg64(&pi_desc->control, old.control,
13314 new.control) != old.control);
Feng Wubf9f6ac2015-09-18 22:29:55 +080013315
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013316 /* We should not block the vCPU if an interrupt is posted for it. */
13317 if (pi_test_on(pi_desc) == 1)
13318 __pi_post_block(vcpu);
13319
13320 local_irq_enable();
13321 return (vcpu->pre_pcpu == -1);
Feng Wubf9f6ac2015-09-18 22:29:55 +080013322}
13323
Yunhong Jiangbc225122016-06-13 14:19:58 -070013324static int vmx_pre_block(struct kvm_vcpu *vcpu)
13325{
13326 if (pi_pre_block(vcpu))
13327 return 1;
13328
Yunhong Jiang64672c92016-06-13 14:19:59 -070013329 if (kvm_lapic_hv_timer_in_use(vcpu))
13330 kvm_lapic_switch_to_sw_timer(vcpu);
13331
Yunhong Jiangbc225122016-06-13 14:19:58 -070013332 return 0;
13333}
13334
13335static void pi_post_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080013336{
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013337 if (vcpu->pre_pcpu == -1)
Feng Wubf9f6ac2015-09-18 22:29:55 +080013338 return;
13339
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013340 WARN_ON(irqs_disabled());
13341 local_irq_disable();
Paolo Bonzinicd39e112017-06-06 12:57:04 +020013342 __pi_post_block(vcpu);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020013343 local_irq_enable();
Feng Wubf9f6ac2015-09-18 22:29:55 +080013344}
13345
Yunhong Jiangbc225122016-06-13 14:19:58 -070013346static void vmx_post_block(struct kvm_vcpu *vcpu)
13347{
Yunhong Jiang64672c92016-06-13 14:19:59 -070013348 if (kvm_x86_ops->set_hv_timer)
13349 kvm_lapic_switch_to_hv_timer(vcpu);
13350
Yunhong Jiangbc225122016-06-13 14:19:58 -070013351 pi_post_block(vcpu);
13352}
13353
Feng Wubf9f6ac2015-09-18 22:29:55 +080013354/*
Feng Wuefc64402015-09-18 22:29:51 +080013355 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
13356 *
13357 * @kvm: kvm
13358 * @host_irq: host irq of the interrupt
13359 * @guest_irq: gsi of the interrupt
13360 * @set: set or unset PI
13361 * returns 0 on success, < 0 on failure
13362 */
13363static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
13364 uint32_t guest_irq, bool set)
13365{
13366 struct kvm_kernel_irq_routing_entry *e;
13367 struct kvm_irq_routing_table *irq_rt;
13368 struct kvm_lapic_irq irq;
13369 struct kvm_vcpu *vcpu;
13370 struct vcpu_data vcpu_info;
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010013371 int idx, ret = 0;
Feng Wuefc64402015-09-18 22:29:51 +080013372
13373 if (!kvm_arch_has_assigned_device(kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080013374 !irq_remapping_cap(IRQ_POSTING_CAP) ||
13375 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
Feng Wuefc64402015-09-18 22:29:51 +080013376 return 0;
13377
13378 idx = srcu_read_lock(&kvm->irq_srcu);
13379 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010013380 if (guest_irq >= irq_rt->nr_rt_entries ||
13381 hlist_empty(&irq_rt->map[guest_irq])) {
13382 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
13383 guest_irq, irq_rt->nr_rt_entries);
13384 goto out;
13385 }
Feng Wuefc64402015-09-18 22:29:51 +080013386
13387 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
13388 if (e->type != KVM_IRQ_ROUTING_MSI)
13389 continue;
13390 /*
13391 * VT-d PI cannot support posting multicast/broadcast
13392 * interrupts to a vCPU, we still use interrupt remapping
13393 * for these kind of interrupts.
13394 *
13395 * For lowest-priority interrupts, we only support
13396 * those with single CPU as the destination, e.g. user
13397 * configures the interrupts via /proc/irq or uses
13398 * irqbalance to make the interrupts single-CPU.
13399 *
13400 * We will support full lowest-priority interrupt later.
13401 */
13402
Radim Krčmář371313132016-07-12 22:09:27 +020013403 kvm_set_msi_irq(kvm, e, &irq);
Feng Wu23a1c252016-01-25 16:53:32 +080013404 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
13405 /*
13406 * Make sure the IRTE is in remapped mode if
13407 * we don't handle it in posted mode.
13408 */
13409 ret = irq_set_vcpu_affinity(host_irq, NULL);
13410 if (ret < 0) {
13411 printk(KERN_INFO
13412 "failed to back to remapped mode, irq: %u\n",
13413 host_irq);
13414 goto out;
13415 }
13416
Feng Wuefc64402015-09-18 22:29:51 +080013417 continue;
Feng Wu23a1c252016-01-25 16:53:32 +080013418 }
Feng Wuefc64402015-09-18 22:29:51 +080013419
13420 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
13421 vcpu_info.vector = irq.vector;
13422
hu huajun2698d822018-04-11 15:16:40 +080013423 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
Feng Wuefc64402015-09-18 22:29:51 +080013424 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
13425
13426 if (set)
13427 ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
Haozhong Zhangdc91f2eb2017-09-18 09:56:49 +080013428 else
Feng Wuefc64402015-09-18 22:29:51 +080013429 ret = irq_set_vcpu_affinity(host_irq, NULL);
Feng Wuefc64402015-09-18 22:29:51 +080013430
13431 if (ret < 0) {
13432 printk(KERN_INFO "%s: failed to update PI IRTE\n",
13433 __func__);
13434 goto out;
13435 }
13436 }
13437
13438 ret = 0;
13439out:
13440 srcu_read_unlock(&kvm->irq_srcu, idx);
13441 return ret;
13442}
13443
Ashok Rajc45dcc72016-06-22 14:59:56 +080013444static void vmx_setup_mce(struct kvm_vcpu *vcpu)
13445{
13446 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
13447 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
13448 FEATURE_CONTROL_LMCE;
13449 else
13450 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
13451 ~FEATURE_CONTROL_LMCE;
13452}
13453
Ladi Prosek72d7b372017-10-11 16:54:41 +020013454static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
13455{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020013456 /* we need a nested vmexit to enter SMM, postpone if run is pending */
13457 if (to_vmx(vcpu)->nested.nested_run_pending)
13458 return 0;
Ladi Prosek72d7b372017-10-11 16:54:41 +020013459 return 1;
13460}
13461
Ladi Prosek0234bf82017-10-11 16:54:40 +020013462static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
13463{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020013464 struct vcpu_vmx *vmx = to_vmx(vcpu);
13465
13466 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
13467 if (vmx->nested.smm.guest_mode)
13468 nested_vmx_vmexit(vcpu, -1, 0, 0);
13469
13470 vmx->nested.smm.vmxon = vmx->nested.vmxon;
13471 vmx->nested.vmxon = false;
Wanpeng Licaa057a2018-03-12 04:53:03 -070013472 vmx_clear_hlt(vcpu);
Ladi Prosek0234bf82017-10-11 16:54:40 +020013473 return 0;
13474}
13475
13476static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
13477{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020013478 struct vcpu_vmx *vmx = to_vmx(vcpu);
13479 int ret;
13480
13481 if (vmx->nested.smm.vmxon) {
13482 vmx->nested.vmxon = true;
13483 vmx->nested.smm.vmxon = false;
13484 }
13485
13486 if (vmx->nested.smm.guest_mode) {
13487 vcpu->arch.hflags &= ~HF_SMM_MASK;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013488 ret = enter_vmx_non_root_mode(vcpu, NULL);
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020013489 vcpu->arch.hflags |= HF_SMM_MASK;
13490 if (ret)
13491 return ret;
13492
13493 vmx->nested.smm.guest_mode = false;
13494 }
Ladi Prosek0234bf82017-10-11 16:54:40 +020013495 return 0;
13496}
13497
Ladi Prosekcc3d9672017-10-17 16:02:39 +020013498static int enable_smi_window(struct kvm_vcpu *vcpu)
13499{
13500 return 0;
13501}
13502
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013503static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
13504 struct kvm_nested_state __user *user_kvm_nested_state,
13505 u32 user_data_size)
13506{
13507 struct vcpu_vmx *vmx;
13508 struct vmcs12 *vmcs12;
13509 struct kvm_nested_state kvm_state = {
13510 .flags = 0,
13511 .format = 0,
13512 .size = sizeof(kvm_state),
13513 .vmx.vmxon_pa = -1ull,
13514 .vmx.vmcs_pa = -1ull,
13515 };
13516
13517 if (!vcpu)
13518 return kvm_state.size + 2 * VMCS12_SIZE;
13519
13520 vmx = to_vmx(vcpu);
13521 vmcs12 = get_vmcs12(vcpu);
13522 if (nested_vmx_allowed(vcpu) &&
13523 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
13524 kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
13525 kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
13526
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020013527 if (vmx->nested.current_vmptr != -1ull) {
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013528 kvm_state.size += VMCS12_SIZE;
13529
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020013530 if (is_guest_mode(vcpu) &&
13531 nested_cpu_has_shadow_vmcs(vmcs12) &&
13532 vmcs12->vmcs_link_pointer != -1ull)
13533 kvm_state.size += VMCS12_SIZE;
13534 }
13535
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013536 if (vmx->nested.smm.vmxon)
13537 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
13538
13539 if (vmx->nested.smm.guest_mode)
13540 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
13541
13542 if (is_guest_mode(vcpu)) {
13543 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
13544
13545 if (vmx->nested.nested_run_pending)
13546 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
13547 }
13548 }
13549
13550 if (user_data_size < kvm_state.size)
13551 goto out;
13552
13553 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
13554 return -EFAULT;
13555
13556 if (vmx->nested.current_vmptr == -1ull)
13557 goto out;
13558
13559 /*
13560 * When running L2, the authoritative vmcs12 state is in the
13561 * vmcs02. When running L1, the authoritative vmcs12 state is
13562 * in the shadow vmcs linked to vmcs01, unless
13563 * sync_shadow_vmcs is set, in which case, the authoritative
13564 * vmcs12 state is in the vmcs12 already.
13565 */
13566 if (is_guest_mode(vcpu))
13567 sync_vmcs12(vcpu, vmcs12);
13568 else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
13569 copy_shadow_to_vmcs12(vmx);
13570
13571 if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
13572 return -EFAULT;
13573
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020013574 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
13575 vmcs12->vmcs_link_pointer != -1ull) {
13576 if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
13577 get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
13578 return -EFAULT;
13579 }
13580
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013581out:
13582 return kvm_state.size;
13583}
13584
13585static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
13586 struct kvm_nested_state __user *user_kvm_nested_state,
13587 struct kvm_nested_state *kvm_state)
13588{
13589 struct vcpu_vmx *vmx = to_vmx(vcpu);
13590 struct vmcs12 *vmcs12;
13591 u32 exit_qual;
13592 int ret;
13593
13594 if (kvm_state->format != 0)
13595 return -EINVAL;
13596
13597 if (!nested_vmx_allowed(vcpu))
13598 return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
13599
13600 if (kvm_state->vmx.vmxon_pa == -1ull) {
13601 if (kvm_state->vmx.smm.flags)
13602 return -EINVAL;
13603
13604 if (kvm_state->vmx.vmcs_pa != -1ull)
13605 return -EINVAL;
13606
13607 vmx_leave_nested(vcpu);
13608 return 0;
13609 }
13610
13611 if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
13612 return -EINVAL;
13613
13614 if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
13615 return -EINVAL;
13616
13617 if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
13618 !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
13619 return -EINVAL;
13620
13621 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
13622 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
13623 return -EINVAL;
13624
13625 if (kvm_state->vmx.smm.flags &
13626 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
13627 return -EINVAL;
13628
13629 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
13630 !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
13631 return -EINVAL;
13632
13633 vmx_leave_nested(vcpu);
13634 if (kvm_state->vmx.vmxon_pa == -1ull)
13635 return 0;
13636
13637 vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
13638 ret = enter_vmx_operation(vcpu);
13639 if (ret)
13640 return ret;
13641
13642 set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
13643
13644 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
13645 vmx->nested.smm.vmxon = true;
13646 vmx->nested.vmxon = false;
13647
13648 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
13649 vmx->nested.smm.guest_mode = true;
13650 }
13651
13652 vmcs12 = get_vmcs12(vcpu);
13653 if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
13654 return -EFAULT;
13655
Liran Alon392b2f22018-06-23 02:35:01 +030013656 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013657 return -EINVAL;
13658
13659 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
13660 return 0;
13661
13662 vmx->nested.nested_run_pending =
13663 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
13664
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020013665 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
13666 vmcs12->vmcs_link_pointer != -1ull) {
13667 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
13668 if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
13669 return -EINVAL;
13670
13671 if (copy_from_user(shadow_vmcs12,
13672 user_kvm_nested_state->data + VMCS12_SIZE,
13673 sizeof(*vmcs12)))
13674 return -EFAULT;
13675
13676 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
13677 !shadow_vmcs12->hdr.shadow_vmcs)
13678 return -EINVAL;
13679 }
13680
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013681 if (check_vmentry_prereqs(vcpu, vmcs12) ||
13682 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
13683 return -EINVAL;
13684
13685 if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
13686 vmx->nested.nested_run_pending = 1;
13687
13688 vmx->nested.dirty_vmcs12 = true;
13689 ret = enter_vmx_non_root_mode(vcpu, NULL);
13690 if (ret)
13691 return -EINVAL;
13692
13693 return 0;
13694}
13695
Kees Cook404f6aa2016-08-08 16:29:06 -070013696static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
Avi Kivity6aa8b732006-12-10 02:21:36 -080013697 .cpu_has_kvm_support = cpu_has_kvm_support,
13698 .disabled_by_bios = vmx_disabled_by_bios,
13699 .hardware_setup = hardware_setup,
13700 .hardware_unsetup = hardware_unsetup,
Yang, Sheng002c7f72007-07-31 14:23:01 +030013701 .check_processor_compatibility = vmx_check_processor_compat,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013702 .hardware_enable = hardware_enable,
13703 .hardware_disable = hardware_disable,
Sheng Yang04547152009-04-01 15:52:31 +080013704 .cpu_has_accelerated_tpr = report_flexpriority,
Tom Lendackybc226f02018-05-10 22:06:39 +020013705 .has_emulated_msr = vmx_has_emulated_msr,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013706
Wanpeng Lib31c1142018-03-12 04:53:04 -070013707 .vm_init = vmx_vm_init,
Sean Christopherson434a1e92018-03-20 12:17:18 -070013708 .vm_alloc = vmx_vm_alloc,
13709 .vm_free = vmx_vm_free,
Wanpeng Lib31c1142018-03-12 04:53:04 -070013710
Avi Kivity6aa8b732006-12-10 02:21:36 -080013711 .vcpu_create = vmx_create_vcpu,
13712 .vcpu_free = vmx_free_vcpu,
Avi Kivity04d2cc72007-09-10 18:10:54 +030013713 .vcpu_reset = vmx_vcpu_reset,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013714
Sean Christopherson6d6095b2018-07-23 12:32:44 -070013715 .prepare_guest_switch = vmx_prepare_switch_to_guest,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013716 .vcpu_load = vmx_vcpu_load,
13717 .vcpu_put = vmx_vcpu_put,
13718
Paolo Bonzinia96036b2015-11-10 11:55:36 +010013719 .update_bp_intercept = update_exception_bitmap,
Tom Lendacky801e4592018-02-21 13:39:51 -060013720 .get_msr_feature = vmx_get_msr_feature,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013721 .get_msr = vmx_get_msr,
13722 .set_msr = vmx_set_msr,
13723 .get_segment_base = vmx_get_segment_base,
13724 .get_segment = vmx_get_segment,
13725 .set_segment = vmx_set_segment,
Izik Eidus2e4d2652008-03-24 19:38:34 +020013726 .get_cpl = vmx_get_cpl,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013727 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
Avi Kivitye8467fd2009-12-29 18:43:06 +020013728 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
Avi Kivityaff48ba2010-12-05 18:56:11 +020013729 .decache_cr3 = vmx_decache_cr3,
Anthony Liguori25c4c272007-04-27 09:29:21 +030013730 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013731 .set_cr0 = vmx_set_cr0,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013732 .set_cr3 = vmx_set_cr3,
13733 .set_cr4 = vmx_set_cr4,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013734 .set_efer = vmx_set_efer,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013735 .get_idt = vmx_get_idt,
13736 .set_idt = vmx_set_idt,
13737 .get_gdt = vmx_get_gdt,
13738 .set_gdt = vmx_set_gdt,
Jan Kiszka73aaf249e2014-01-04 18:47:16 +010013739 .get_dr6 = vmx_get_dr6,
13740 .set_dr6 = vmx_set_dr6,
Gleb Natapov020df072010-04-13 10:05:23 +030013741 .set_dr7 = vmx_set_dr7,
Paolo Bonzini81908bf2014-02-21 10:32:27 +010013742 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030013743 .cache_reg = vmx_cache_reg,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013744 .get_rflags = vmx_get_rflags,
13745 .set_rflags = vmx_set_rflags,
Huaitong Hanbe94f6b2016-03-22 16:51:20 +080013746
Avi Kivity6aa8b732006-12-10 02:21:36 -080013747 .tlb_flush = vmx_flush_tlb,
Junaid Shahidfaff8752018-06-29 13:10:05 -070013748 .tlb_flush_gva = vmx_flush_tlb_gva,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013749
Avi Kivity6aa8b732006-12-10 02:21:36 -080013750 .run = vmx_vcpu_run,
Avi Kivity6062d012009-03-23 17:35:17 +020013751 .handle_exit = vmx_handle_exit,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013752 .skip_emulated_instruction = skip_emulated_instruction,
Glauber Costa2809f5d2009-05-12 16:21:05 -040013753 .set_interrupt_shadow = vmx_set_interrupt_shadow,
13754 .get_interrupt_shadow = vmx_get_interrupt_shadow,
Ingo Molnar102d8322007-02-19 14:37:47 +020013755 .patch_hypercall = vmx_patch_hypercall,
Eddie Dong2a8067f2007-08-06 16:29:07 +030013756 .set_irq = vmx_inject_irq,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030013757 .set_nmi = vmx_inject_nmi,
Avi Kivity298101d2007-11-25 13:41:11 +020013758 .queue_exception = vmx_queue_exception,
Avi Kivityb463a6f2010-07-20 15:06:17 +030013759 .cancel_injection = vmx_cancel_injection,
Gleb Natapov78646122009-03-23 12:12:11 +020013760 .interrupt_allowed = vmx_interrupt_allowed,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030013761 .nmi_allowed = vmx_nmi_allowed,
Jan Kiszka3cfc3092009-11-12 01:04:25 +010013762 .get_nmi_mask = vmx_get_nmi_mask,
13763 .set_nmi_mask = vmx_set_nmi_mask,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030013764 .enable_nmi_window = enable_nmi_window,
13765 .enable_irq_window = enable_irq_window,
13766 .update_cr8_intercept = update_cr8_intercept,
Jim Mattson8d860bb2018-05-09 16:56:05 -040013767 .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
Tang Chen38b99172014-09-24 15:57:54 +080013768 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
Andrey Smetanind62caab2015-11-10 15:36:33 +030013769 .get_enable_apicv = vmx_get_enable_apicv,
13770 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
Yang Zhangc7c9c562013-01-25 10:18:51 +080013771 .load_eoi_exitmap = vmx_load_eoi_exitmap,
Paolo Bonzini967235d2016-12-19 14:03:45 +010013772 .apicv_post_state_restore = vmx_apicv_post_state_restore,
Yang Zhangc7c9c562013-01-25 10:18:51 +080013773 .hwapic_irr_update = vmx_hwapic_irr_update,
13774 .hwapic_isr_update = vmx_hwapic_isr_update,
Yang Zhanga20ed542013-04-11 19:25:15 +080013775 .sync_pir_to_irr = vmx_sync_pir_to_irr,
13776 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030013777
Izik Eiduscbc94022007-10-25 00:29:55 +020013778 .set_tss_addr = vmx_set_tss_addr,
Sean Christopherson2ac52ab2018-03-20 12:17:19 -070013779 .set_identity_map_addr = vmx_set_identity_map_addr,
Sheng Yang67253af2008-04-25 10:20:22 +080013780 .get_tdp_level = get_ept_level,
Sheng Yang4b12f0d2009-04-27 20:35:42 +080013781 .get_mt_mask = vmx_get_mt_mask,
Marcelo Tosatti229456f2009-06-17 09:22:14 -030013782
Avi Kivity586f9602010-11-18 13:09:54 +020013783 .get_exit_info = vmx_get_exit_info,
Avi Kivity586f9602010-11-18 13:09:54 +020013784
Sheng Yang17cc3932010-01-05 19:02:27 +080013785 .get_lpage_level = vmx_get_lpage_level,
Sheng Yang0e851882009-12-18 16:48:46 +080013786
13787 .cpuid_update = vmx_cpuid_update,
Sheng Yang4e47c7a2009-12-18 16:48:47 +080013788
13789 .rdtscp_supported = vmx_rdtscp_supported,
Mao, Junjiead756a12012-07-02 01:18:48 +000013790 .invpcid_supported = vmx_invpcid_supported,
Joerg Roedeld4330ef2010-04-22 12:33:11 +020013791
13792 .set_supported_cpuid = vmx_set_supported_cpuid,
Sheng Yangf5f48ee2010-06-30 12:25:15 +080013793
13794 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
Zachary Amsden99e3e302010-08-19 22:07:17 -100013795
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013796 .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
Zachary Amsden99e3e302010-08-19 22:07:17 -100013797 .write_tsc_offset = vmx_write_tsc_offset,
Joerg Roedel1c97f0a2010-09-10 17:30:41 +020013798
13799 .set_tdp_cr3 = vmx_set_cr3,
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020013800
13801 .check_intercept = vmx_check_intercept,
Yang Zhanga547c6d2013-04-11 19:25:10 +080013802 .handle_external_intr = vmx_handle_external_intr,
Liu, Jinsongda8999d2014-02-24 10:55:46 +000013803 .mpx_supported = vmx_mpx_supported,
Wanpeng Li55412b22014-12-02 19:21:30 +080013804 .xsaves_supported = vmx_xsaves_supported,
Paolo Bonzini66336ca2016-07-12 10:36:41 +020013805 .umip_emulated = vmx_umip_emulated,
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013806
13807 .check_nested_events = vmx_check_nested_events,
Radim Krčmářae97a3b2014-08-21 18:08:06 +020013808
13809 .sched_in = vmx_sched_in,
Kai Huang843e4332015-01-28 10:54:28 +080013810
13811 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
13812 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
13813 .flush_log_dirty = vmx_flush_log_dirty,
13814 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
Bandan Dasc5f983f2017-05-05 15:25:14 -040013815 .write_log_dirty = vmx_write_pml_buffer,
Wei Huang25462f72015-06-19 15:45:05 +020013816
Feng Wubf9f6ac2015-09-18 22:29:55 +080013817 .pre_block = vmx_pre_block,
13818 .post_block = vmx_post_block,
13819
Wei Huang25462f72015-06-19 15:45:05 +020013820 .pmu_ops = &intel_pmu_ops,
Feng Wuefc64402015-09-18 22:29:51 +080013821
13822 .update_pi_irte = vmx_update_pi_irte,
Yunhong Jiang64672c92016-06-13 14:19:59 -070013823
13824#ifdef CONFIG_X86_64
13825 .set_hv_timer = vmx_set_hv_timer,
13826 .cancel_hv_timer = vmx_cancel_hv_timer,
13827#endif
Ashok Rajc45dcc72016-06-22 14:59:56 +080013828
13829 .setup_mce = vmx_setup_mce,
Ladi Prosek0234bf82017-10-11 16:54:40 +020013830
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013831 .get_nested_state = vmx_get_nested_state,
13832 .set_nested_state = vmx_set_nested_state,
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013833 .get_vmcs12_pages = nested_get_vmcs12_pages,
13834
Ladi Prosek72d7b372017-10-11 16:54:41 +020013835 .smi_allowed = vmx_smi_allowed,
Ladi Prosek0234bf82017-10-11 16:54:40 +020013836 .pre_enter_smm = vmx_pre_enter_smm,
13837 .pre_leave_smm = vmx_pre_leave_smm,
Ladi Prosekcc3d9672017-10-17 16:02:39 +020013838 .enable_smi_window = enable_smi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -080013839};
13840
13841static int __init vmx_init(void)
13842{
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010013843 int r;
13844
13845#if IS_ENABLED(CONFIG_HYPERV)
13846 /*
13847 * Enlightened VMCS usage should be recommended and the host needs
13848 * to support eVMCS v1 or above. We can also disable eVMCS support
13849 * with module parameter.
13850 */
13851 if (enlightened_vmcs &&
13852 ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
13853 (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
13854 KVM_EVMCS_VERSION) {
13855 int cpu;
13856
13857 /* Check that we have assist pages on all online CPUs */
13858 for_each_online_cpu(cpu) {
13859 if (!hv_get_vp_assist_page(cpu)) {
13860 enlightened_vmcs = false;
13861 break;
13862 }
13863 }
13864
13865 if (enlightened_vmcs) {
13866 pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
13867 static_branch_enable(&enable_evmcs);
13868 }
13869 } else {
13870 enlightened_vmcs = false;
13871 }
13872#endif
13873
13874 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
Tiejun Chen34a1cd62014-10-28 10:14:48 +080013875 __alignof__(struct vcpu_vmx), THIS_MODULE);
He, Qingfdef3ad2007-04-30 09:45:24 +030013876 if (r)
Tiejun Chen34a1cd62014-10-28 10:14:48 +080013877 return r;
Sheng Yang25c5f222008-03-28 13:18:56 +080013878
Dave Young2965faa2015-09-09 15:38:55 -070013879#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +080013880 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
13881 crash_vmclear_local_loaded_vmcss);
13882#endif
Jim Mattson21ebf532018-05-01 15:40:28 -070013883 vmx_check_vmcs12_offsets();
Zhang Yanfei8f536b72012-12-06 23:43:34 +080013884
He, Qingfdef3ad2007-04-30 09:45:24 +030013885 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -080013886}
13887
13888static void __exit vmx_exit(void)
13889{
Dave Young2965faa2015-09-09 15:38:55 -070013890#ifdef CONFIG_KEXEC_CORE
Monam Agarwal3b63a432014-03-22 12:28:10 +053013891 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
Zhang Yanfei8f536b72012-12-06 23:43:34 +080013892 synchronize_rcu();
13893#endif
13894
Zhang Xiantaocb498ea2007-11-14 20:39:31 +080013895 kvm_exit();
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010013896
13897#if IS_ENABLED(CONFIG_HYPERV)
13898 if (static_branch_unlikely(&enable_evmcs)) {
13899 int cpu;
13900 struct hv_vp_assist_page *vp_ap;
13901 /*
13902 * Reset everything to support using non-enlightened VMCS
13903 * access later (e.g. when we reload the module with
13904 * enlightened_vmcs=0)
13905 */
13906 for_each_online_cpu(cpu) {
13907 vp_ap = hv_get_vp_assist_page(cpu);
13908
13909 if (!vp_ap)
13910 continue;
13911
13912 vp_ap->current_nested_vmcs = 0;
13913 vp_ap->enlighten_vmentry = 0;
13914 }
13915
13916 static_branch_disable(&enable_evmcs);
13917 }
13918#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -080013919}
13920
13921module_init(vmx_init)
13922module_exit(vmx_exit)