blob: c1791759f1e6447feac882c87b7b81d88c126c2a [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
Nicolas Kaiser9611c182010-10-06 14:23:22 +02008 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity6aa8b732006-12-10 02:21:36 -08009 *
10 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2. See
15 * the COPYING file in the top-level directory.
16 *
17 */
18
Eddie Dong85f455f2007-07-06 12:20:49 +030019#include "irq.h"
Zhang Xiantao1d737c82007-12-14 09:35:10 +080020#include "mmu.h"
Avi Kivity00b27a32011-11-23 16:30:32 +020021#include "cpuid.h"
Andrey Smetanind62caab2015-11-10 15:36:33 +030022#include "lapic.h"
Avi Kivitye4956062007-06-28 14:15:57 -040023
Avi Kivityedf88412007-12-16 11:02:48 +020024#include <linux/kvm_host.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080025#include <linux/module.h>
Ahmed S. Darwish9d8f5492007-02-19 14:37:46 +020026#include <linux/kernel.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080027#include <linux/mm.h>
28#include <linux/highmem.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040029#include <linux/sched.h>
Avi Kivityc7addb92007-09-16 18:58:32 +020030#include <linux/moduleparam.h>
Josh Triplette9bda3b2012-03-20 23:33:51 -070031#include <linux/mod_devicetable.h>
Steven Rostedt (Red Hat)af658dc2015-04-29 14:36:05 -040032#include <linux/trace_events.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090033#include <linux/slab.h>
Shane Wangcafd6652010-04-29 12:09:01 -040034#include <linux/tboot.h>
Jan Kiszkaf41245002014-03-07 20:03:13 +010035#include <linux/hrtimer.h>
Josh Poimboeufc207aee2017-06-28 10:11:06 -050036#include <linux/frame.h>
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030037#include "kvm_cache_regs.h"
Avi Kivity35920a32008-07-03 14:50:12 +030038#include "x86.h"
Avi Kivitye4956062007-06-28 14:15:57 -040039
Feng Wu28b835d2015-09-18 22:29:54 +080040#include <asm/cpu.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080041#include <asm/io.h>
Anthony Liguori3b3be0d2006-12-13 00:33:43 -080042#include <asm/desc.h>
Eduardo Habkost13673a92008-11-17 19:03:13 -020043#include <asm/vmx.h>
Eduardo Habkost6210e372008-11-17 19:03:16 -020044#include <asm/virtext.h>
Andi Kleena0861c02009-06-08 17:37:09 +080045#include <asm/mce.h>
Ingo Molnar952f07e2015-04-26 16:56:05 +020046#include <asm/fpu/internal.h>
Gleb Natapovd7cd9792011-10-05 14:01:23 +020047#include <asm/perf_event.h>
Paolo Bonzini81908bf2014-02-21 10:32:27 +010048#include <asm/debugreg.h>
Zhang Yanfei8f536b72012-12-06 23:43:34 +080049#include <asm/kexec.h>
Radim Krčmářdab20872015-02-09 22:44:07 +010050#include <asm/apic.h>
Feng Wuefc64402015-09-18 22:29:51 +080051#include <asm/irq_remapping.h>
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070052#include <asm/mmu_context.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080053
Marcelo Tosatti229456f2009-06-17 09:22:14 -030054#include "trace.h"
Wei Huang25462f72015-06-19 15:45:05 +020055#include "pmu.h"
Marcelo Tosatti229456f2009-06-17 09:22:14 -030056
Avi Kivity4ecac3f2008-05-13 13:23:38 +030057#define __ex(x) __kvm_handle_fault_on_reboot(x)
Avi Kivity5e520e62011-05-15 10:13:12 -040058#define __ex_clear(x, reg) \
59 ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
Avi Kivity4ecac3f2008-05-13 13:23:38 +030060
Avi Kivity6aa8b732006-12-10 02:21:36 -080061MODULE_AUTHOR("Qumranet");
62MODULE_LICENSE("GPL");
63
Josh Triplette9bda3b2012-03-20 23:33:51 -070064static const struct x86_cpu_id vmx_cpu_id[] = {
65 X86_FEATURE_MATCH(X86_FEATURE_VMX),
66 {}
67};
68MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
69
Rusty Russell476bc002012-01-13 09:32:18 +103070static bool __read_mostly enable_vpid = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020071module_param_named(vpid, enable_vpid, bool, 0444);
Sheng Yang2384d2b2008-01-17 15:14:33 +080072
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010073static bool __read_mostly enable_vnmi = 1;
74module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
75
Rusty Russell476bc002012-01-13 09:32:18 +103076static bool __read_mostly flexpriority_enabled = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020077module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
Avi Kivity4c9fc8e2008-03-24 18:15:14 +020078
Rusty Russell476bc002012-01-13 09:32:18 +103079static bool __read_mostly enable_ept = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020080module_param_named(ept, enable_ept, bool, S_IRUGO);
Sheng Yangd56f5462008-04-25 10:13:16 +080081
Rusty Russell476bc002012-01-13 09:32:18 +103082static bool __read_mostly enable_unrestricted_guest = 1;
Nitin A Kamble3a624e22009-06-08 11:34:16 -070083module_param_named(unrestricted_guest,
84 enable_unrestricted_guest, bool, S_IRUGO);
85
Xudong Hao83c3a332012-05-28 19:33:35 +080086static bool __read_mostly enable_ept_ad_bits = 1;
87module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
88
Avi Kivitya27685c2012-06-12 20:30:18 +030089static bool __read_mostly emulate_invalid_guest_state = true;
Avi Kivityc1f8bc02009-03-23 15:41:17 +020090module_param(emulate_invalid_guest_state, bool, S_IRUGO);
Mohammed Gamal04fa4d32008-08-17 16:39:48 +030091
Rusty Russell476bc002012-01-13 09:32:18 +103092static bool __read_mostly fasteoi = 1;
Kevin Tian58fbbf22011-08-30 13:56:17 +030093module_param(fasteoi, bool, S_IRUGO);
94
Yang Zhang5a717852013-04-11 19:25:16 +080095static bool __read_mostly enable_apicv = 1;
Yang Zhang01e439b2013-04-11 19:25:12 +080096module_param(enable_apicv, bool, S_IRUGO);
Yang Zhang83d4c282013-01-25 10:18:49 +080097
Abel Gordonabc4fc52013-04-18 14:35:25 +030098static bool __read_mostly enable_shadow_vmcs = 1;
99module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
Nadav Har'El801d3422011-05-25 23:02:23 +0300100/*
101 * If nested=1, nested virtualization is supported, i.e., guests may use
102 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
103 * use VMX instructions.
104 */
Rusty Russell476bc002012-01-13 09:32:18 +1030105static bool __read_mostly nested = 0;
Nadav Har'El801d3422011-05-25 23:02:23 +0300106module_param(nested, bool, S_IRUGO);
107
Wanpeng Li20300092014-12-02 19:14:59 +0800108static u64 __read_mostly host_xss;
109
Kai Huang843e4332015-01-28 10:54:28 +0800110static bool __read_mostly enable_pml = 1;
111module_param_named(pml, enable_pml, bool, S_IRUGO);
112
Haozhong Zhang64903d62015-10-20 15:39:09 +0800113#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
114
Yunhong Jiang64672c92016-06-13 14:19:59 -0700115/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
116static int __read_mostly cpu_preemption_timer_multi;
117static bool __read_mostly enable_preemption_timer = 1;
118#ifdef CONFIG_X86_64
119module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
120#endif
121
Gleb Natapov50378782013-02-04 16:00:28 +0200122#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
123#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
Avi Kivitycdc0e242009-12-06 17:21:14 +0200124#define KVM_VM_CR0_ALWAYS_ON \
125 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
Avi Kivity4c386092009-12-07 12:26:18 +0200126#define KVM_CR4_GUEST_OWNED_BITS \
127 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
Yu Zhangfd8cb432017-08-24 20:27:56 +0800128 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
Avi Kivity4c386092009-12-07 12:26:18 +0200129
Avi Kivitycdc0e242009-12-06 17:21:14 +0200130#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
131#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
132
Avi Kivity78ac8b42010-04-08 18:19:35 +0300133#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
134
Jan Kiszkaf41245002014-03-07 20:03:13 +0100135#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
136
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800137/*
Jan Dakinevich16c2aec2016-10-28 07:00:30 +0300138 * Hyper-V requires all of these, so mark them as supported even though
139 * they are just treated the same as all-context.
140 */
141#define VMX_VPID_EXTENT_SUPPORTED_MASK \
142 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
143 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
144 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
145 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
146
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800147/*
148 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
149 * ple_gap: upper bound on the amount of time between two successive
150 * executions of PAUSE in a loop. Also indicate if ple enabled.
Rik van Riel00c25bc2011-01-04 09:51:33 -0500151 * According to test, this time is usually smaller than 128 cycles.
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800152 * ple_window: upper bound on the amount of time a guest is allowed to execute
153 * in a PAUSE loop. Tests indicate that most spinlocks are held for
154 * less than 2^12 cycles
155 * Time is measured based on a counter that runs at the same rate as the TSC,
156 * refer SDM volume 3b section 21.6.13 & 22.1.3.
157 */
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200158#define KVM_VMX_DEFAULT_PLE_GAP 128
159#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
160#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
161#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
162#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
163 INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
164
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800165static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
166module_param(ple_gap, int, S_IRUGO);
167
168static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
169module_param(ple_window, int, S_IRUGO);
170
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200171/* Default doubles per-vcpu window every exit. */
172static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
173module_param(ple_window_grow, int, S_IRUGO);
174
175/* Default resets per-vcpu window every exit to ple_window. */
176static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
177module_param(ple_window_shrink, int, S_IRUGO);
178
179/* Default is to compute the maximum so we can never overflow. */
180static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
181static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
182module_param(ple_window_max, int, S_IRUGO);
183
Avi Kivity83287ea422012-09-16 15:10:57 +0300184extern const ulong vmx_return;
185
Gleb Natapov8bf00a52011-10-05 14:01:22 +0200186#define NR_AUTOLOAD_MSRS 8
Avi Kivity61d2ef22010-04-28 16:40:38 +0300187
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400188struct vmcs {
189 u32 revision_id;
190 u32 abort;
191 char data[0];
192};
193
Nadav Har'Eld462b812011-05-24 15:26:10 +0300194/*
195 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
196 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
197 * loaded on this CPU (so we can clear them if the CPU goes down).
198 */
199struct loaded_vmcs {
200 struct vmcs *vmcs;
Jim Mattson355f4fb2016-10-28 08:29:39 -0700201 struct vmcs *shadow_vmcs;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300202 int cpu;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +0200203 bool launched;
204 bool nmi_known_unmasked;
Ladi Prosek44889942017-09-22 07:53:15 +0200205 unsigned long vmcs_host_cr3; /* May not match real cr3 */
206 unsigned long vmcs_host_cr4; /* May not match real cr4 */
Paolo Bonzini8a1b4392017-11-06 13:31:12 +0100207 /* Support for vnmi-less CPUs */
208 int soft_vnmi_blocked;
209 ktime_t entry_time;
210 s64 vnmi_blocked_time;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300211 struct list_head loaded_vmcss_on_cpu_link;
212};
213
Avi Kivity26bb0982009-09-07 11:14:12 +0300214struct shared_msr_entry {
215 unsigned index;
216 u64 data;
Avi Kivityd5696722009-12-02 12:28:47 +0200217 u64 mask;
Avi Kivity26bb0982009-09-07 11:14:12 +0300218};
219
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300220/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300221 * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
222 * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
223 * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
224 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
225 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
226 * More than one of these structures may exist, if L1 runs multiple L2 guests.
Jim Mattson00647b42017-11-27 17:22:25 -0600227 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300228 * underlying hardware which will be used to run L2.
229 * This structure is packed to ensure that its layout is identical across
230 * machines (necessary for live migration).
231 * If there are changes in this struct, VMCS12_REVISION must be changed.
232 */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300233typedef u64 natural_width;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300234struct __packed vmcs12 {
235 /* According to the Intel spec, a VMCS region must start with the
236 * following two fields. Then follow implementation-specific data.
237 */
238 u32 revision_id;
239 u32 abort;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300240
Nadav Har'El27d6c862011-05-25 23:06:59 +0300241 u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
242 u32 padding[7]; /* room for future expansion */
243
Nadav Har'El22bd0352011-05-25 23:05:57 +0300244 u64 io_bitmap_a;
245 u64 io_bitmap_b;
246 u64 msr_bitmap;
247 u64 vm_exit_msr_store_addr;
248 u64 vm_exit_msr_load_addr;
249 u64 vm_entry_msr_load_addr;
250 u64 tsc_offset;
251 u64 virtual_apic_page_addr;
252 u64 apic_access_addr;
Wincy Van705699a2015-02-03 23:58:17 +0800253 u64 posted_intr_desc_addr;
Bandan Das27c42a12017-08-03 15:54:42 -0400254 u64 vm_function_control;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300255 u64 ept_pointer;
Wincy Van608406e2015-02-03 23:57:51 +0800256 u64 eoi_exit_bitmap0;
257 u64 eoi_exit_bitmap1;
258 u64 eoi_exit_bitmap2;
259 u64 eoi_exit_bitmap3;
Bandan Das41ab9372017-08-03 15:54:43 -0400260 u64 eptp_list_address;
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800261 u64 xss_exit_bitmap;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300262 u64 guest_physical_address;
263 u64 vmcs_link_pointer;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400264 u64 pml_address;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300265 u64 guest_ia32_debugctl;
266 u64 guest_ia32_pat;
267 u64 guest_ia32_efer;
268 u64 guest_ia32_perf_global_ctrl;
269 u64 guest_pdptr0;
270 u64 guest_pdptr1;
271 u64 guest_pdptr2;
272 u64 guest_pdptr3;
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100273 u64 guest_bndcfgs;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300274 u64 host_ia32_pat;
275 u64 host_ia32_efer;
276 u64 host_ia32_perf_global_ctrl;
277 u64 padding64[8]; /* room for future expansion */
278 /*
279 * To allow migration of L1 (complete with its L2 guests) between
280 * machines of different natural widths (32 or 64 bit), we cannot have
281 * unsigned long fields with no explict size. We use u64 (aliased
282 * natural_width) instead. Luckily, x86 is little-endian.
283 */
284 natural_width cr0_guest_host_mask;
285 natural_width cr4_guest_host_mask;
286 natural_width cr0_read_shadow;
287 natural_width cr4_read_shadow;
288 natural_width cr3_target_value0;
289 natural_width cr3_target_value1;
290 natural_width cr3_target_value2;
291 natural_width cr3_target_value3;
292 natural_width exit_qualification;
293 natural_width guest_linear_address;
294 natural_width guest_cr0;
295 natural_width guest_cr3;
296 natural_width guest_cr4;
297 natural_width guest_es_base;
298 natural_width guest_cs_base;
299 natural_width guest_ss_base;
300 natural_width guest_ds_base;
301 natural_width guest_fs_base;
302 natural_width guest_gs_base;
303 natural_width guest_ldtr_base;
304 natural_width guest_tr_base;
305 natural_width guest_gdtr_base;
306 natural_width guest_idtr_base;
307 natural_width guest_dr7;
308 natural_width guest_rsp;
309 natural_width guest_rip;
310 natural_width guest_rflags;
311 natural_width guest_pending_dbg_exceptions;
312 natural_width guest_sysenter_esp;
313 natural_width guest_sysenter_eip;
314 natural_width host_cr0;
315 natural_width host_cr3;
316 natural_width host_cr4;
317 natural_width host_fs_base;
318 natural_width host_gs_base;
319 natural_width host_tr_base;
320 natural_width host_gdtr_base;
321 natural_width host_idtr_base;
322 natural_width host_ia32_sysenter_esp;
323 natural_width host_ia32_sysenter_eip;
324 natural_width host_rsp;
325 natural_width host_rip;
326 natural_width paddingl[8]; /* room for future expansion */
327 u32 pin_based_vm_exec_control;
328 u32 cpu_based_vm_exec_control;
329 u32 exception_bitmap;
330 u32 page_fault_error_code_mask;
331 u32 page_fault_error_code_match;
332 u32 cr3_target_count;
333 u32 vm_exit_controls;
334 u32 vm_exit_msr_store_count;
335 u32 vm_exit_msr_load_count;
336 u32 vm_entry_controls;
337 u32 vm_entry_msr_load_count;
338 u32 vm_entry_intr_info_field;
339 u32 vm_entry_exception_error_code;
340 u32 vm_entry_instruction_len;
341 u32 tpr_threshold;
342 u32 secondary_vm_exec_control;
343 u32 vm_instruction_error;
344 u32 vm_exit_reason;
345 u32 vm_exit_intr_info;
346 u32 vm_exit_intr_error_code;
347 u32 idt_vectoring_info_field;
348 u32 idt_vectoring_error_code;
349 u32 vm_exit_instruction_len;
350 u32 vmx_instruction_info;
351 u32 guest_es_limit;
352 u32 guest_cs_limit;
353 u32 guest_ss_limit;
354 u32 guest_ds_limit;
355 u32 guest_fs_limit;
356 u32 guest_gs_limit;
357 u32 guest_ldtr_limit;
358 u32 guest_tr_limit;
359 u32 guest_gdtr_limit;
360 u32 guest_idtr_limit;
361 u32 guest_es_ar_bytes;
362 u32 guest_cs_ar_bytes;
363 u32 guest_ss_ar_bytes;
364 u32 guest_ds_ar_bytes;
365 u32 guest_fs_ar_bytes;
366 u32 guest_gs_ar_bytes;
367 u32 guest_ldtr_ar_bytes;
368 u32 guest_tr_ar_bytes;
369 u32 guest_interruptibility_info;
370 u32 guest_activity_state;
371 u32 guest_sysenter_cs;
372 u32 host_ia32_sysenter_cs;
Jan Kiszka0238ea92013-03-13 11:31:24 +0100373 u32 vmx_preemption_timer_value;
374 u32 padding32[7]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300375 u16 virtual_processor_id;
Wincy Van705699a2015-02-03 23:58:17 +0800376 u16 posted_intr_nv;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300377 u16 guest_es_selector;
378 u16 guest_cs_selector;
379 u16 guest_ss_selector;
380 u16 guest_ds_selector;
381 u16 guest_fs_selector;
382 u16 guest_gs_selector;
383 u16 guest_ldtr_selector;
384 u16 guest_tr_selector;
Wincy Van608406e2015-02-03 23:57:51 +0800385 u16 guest_intr_status;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400386 u16 guest_pml_index;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300387 u16 host_es_selector;
388 u16 host_cs_selector;
389 u16 host_ss_selector;
390 u16 host_ds_selector;
391 u16 host_fs_selector;
392 u16 host_gs_selector;
393 u16 host_tr_selector;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300394};
395
396/*
397 * VMCS12_REVISION is an arbitrary id that should be changed if the content or
398 * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
399 * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
400 */
401#define VMCS12_REVISION 0x11e57ed0
402
403/*
404 * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
405 * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
406 * current implementation, 4K are reserved to avoid future complications.
407 */
408#define VMCS12_SIZE 0x1000
409
410/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300411 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
412 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
413 */
414struct nested_vmx {
415 /* Has the level1 guest done vmxon? */
416 bool vmxon;
Bandan Das3573e222014-05-06 02:19:16 -0400417 gpa_t vmxon_ptr;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400418 bool pml_full;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300419
420 /* The guest-physical address of the current VMCS L1 keeps for L2 */
421 gpa_t current_vmptr;
David Matlack4f2777b2016-07-13 17:16:37 -0700422 /*
423 * Cache of the guest's VMCS, existing outside of guest memory.
424 * Loaded from guest memory during VMPTRLD. Flushed to guest
David Matlack8ca44e82017-08-01 14:00:39 -0700425 * memory during VMCLEAR and VMPTRLD.
David Matlack4f2777b2016-07-13 17:16:37 -0700426 */
427 struct vmcs12 *cached_vmcs12;
Abel Gordon012f83c2013-04-18 14:39:25 +0300428 /*
429 * Indicates if the shadow vmcs must be updated with the
430 * data hold by vmcs12
431 */
432 bool sync_shadow_vmcs;
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +0300433
Radim Krčmářdccbfcf2016-08-08 20:16:23 +0200434 bool change_vmcs01_virtual_x2apic_mode;
Nadav Har'El644d7112011-05-25 23:12:35 +0300435 /* L2 must run next, and mustn't decide to exit to L1. */
436 bool nested_run_pending;
Jim Mattson00647b42017-11-27 17:22:25 -0600437
438 struct loaded_vmcs vmcs02;
439
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300440 /*
Jim Mattson00647b42017-11-27 17:22:25 -0600441 * Guest pages referred to in the vmcs02 with host-physical
442 * pointers, so we must keep them pinned while L2 runs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300443 */
444 struct page *apic_access_page;
Wanpeng Lia7c0b072014-08-21 19:46:50 +0800445 struct page *virtual_apic_page;
Wincy Van705699a2015-02-03 23:58:17 +0800446 struct page *pi_desc_page;
447 struct pi_desc *pi_desc;
448 bool pi_pending;
449 u16 posted_intr_nv;
Jan Kiszkaf41245002014-03-07 20:03:13 +0100450
Radim Krčmářd048c092016-08-08 20:16:22 +0200451 unsigned long *msr_bitmap;
452
Jan Kiszkaf41245002014-03-07 20:03:13 +0100453 struct hrtimer preemption_timer;
454 bool preemption_timer_expired;
Jan Kiszka2996fca2014-06-16 13:59:43 +0200455
456 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
457 u64 vmcs01_debugctl;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800458
Wanpeng Li5c614b32015-10-13 09:18:36 -0700459 u16 vpid02;
460 u16 last_vpid;
461
David Matlack0115f9c2016-11-29 18:14:06 -0800462 /*
463 * We only store the "true" versions of the VMX capability MSRs. We
464 * generate the "non-true" versions by setting the must-be-1 bits
465 * according to the SDM.
466 */
Wincy Vanb9c237b2015-02-03 23:56:30 +0800467 u32 nested_vmx_procbased_ctls_low;
468 u32 nested_vmx_procbased_ctls_high;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800469 u32 nested_vmx_secondary_ctls_low;
470 u32 nested_vmx_secondary_ctls_high;
471 u32 nested_vmx_pinbased_ctls_low;
472 u32 nested_vmx_pinbased_ctls_high;
473 u32 nested_vmx_exit_ctls_low;
474 u32 nested_vmx_exit_ctls_high;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800475 u32 nested_vmx_entry_ctls_low;
476 u32 nested_vmx_entry_ctls_high;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800477 u32 nested_vmx_misc_low;
478 u32 nested_vmx_misc_high;
479 u32 nested_vmx_ept_caps;
Wanpeng Li99b83ac2015-10-13 09:12:21 -0700480 u32 nested_vmx_vpid_caps;
David Matlack62cc6b9d2016-11-29 18:14:07 -0800481 u64 nested_vmx_basic;
482 u64 nested_vmx_cr0_fixed0;
483 u64 nested_vmx_cr0_fixed1;
484 u64 nested_vmx_cr4_fixed0;
485 u64 nested_vmx_cr4_fixed1;
486 u64 nested_vmx_vmcs_enum;
Bandan Das27c42a12017-08-03 15:54:42 -0400487 u64 nested_vmx_vmfunc_controls;
Ladi Prosek72e9cbd2017-10-11 16:54:43 +0200488
489 /* SMM related state */
490 struct {
491 /* in VMX operation on SMM entry? */
492 bool vmxon;
493 /* in guest mode on SMM entry? */
494 bool guest_mode;
495 } smm;
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300496};
497
Yang Zhang01e439b2013-04-11 19:25:12 +0800498#define POSTED_INTR_ON 0
Feng Wuebbfc762015-09-18 22:29:46 +0800499#define POSTED_INTR_SN 1
500
Yang Zhang01e439b2013-04-11 19:25:12 +0800501/* Posted-Interrupt Descriptor */
502struct pi_desc {
503 u32 pir[8]; /* Posted interrupt requested */
Feng Wu6ef15222015-09-18 22:29:45 +0800504 union {
505 struct {
506 /* bit 256 - Outstanding Notification */
507 u16 on : 1,
508 /* bit 257 - Suppress Notification */
509 sn : 1,
510 /* bit 271:258 - Reserved */
511 rsvd_1 : 14;
512 /* bit 279:272 - Notification Vector */
513 u8 nv;
514 /* bit 287:280 - Reserved */
515 u8 rsvd_2;
516 /* bit 319:288 - Notification Destination */
517 u32 ndst;
518 };
519 u64 control;
520 };
521 u32 rsvd[6];
Yang Zhang01e439b2013-04-11 19:25:12 +0800522} __aligned(64);
523
Yang Zhanga20ed542013-04-11 19:25:15 +0800524static bool pi_test_and_set_on(struct pi_desc *pi_desc)
525{
526 return test_and_set_bit(POSTED_INTR_ON,
527 (unsigned long *)&pi_desc->control);
528}
529
530static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
531{
532 return test_and_clear_bit(POSTED_INTR_ON,
533 (unsigned long *)&pi_desc->control);
534}
535
536static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
537{
538 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
539}
540
Feng Wuebbfc762015-09-18 22:29:46 +0800541static inline void pi_clear_sn(struct pi_desc *pi_desc)
542{
543 return clear_bit(POSTED_INTR_SN,
544 (unsigned long *)&pi_desc->control);
545}
546
547static inline void pi_set_sn(struct pi_desc *pi_desc)
548{
549 return set_bit(POSTED_INTR_SN,
550 (unsigned long *)&pi_desc->control);
551}
552
Paolo Bonziniad361092016-09-20 16:15:05 +0200553static inline void pi_clear_on(struct pi_desc *pi_desc)
554{
555 clear_bit(POSTED_INTR_ON,
556 (unsigned long *)&pi_desc->control);
557}
558
Feng Wuebbfc762015-09-18 22:29:46 +0800559static inline int pi_test_on(struct pi_desc *pi_desc)
560{
561 return test_bit(POSTED_INTR_ON,
562 (unsigned long *)&pi_desc->control);
563}
564
565static inline int pi_test_sn(struct pi_desc *pi_desc)
566{
567 return test_bit(POSTED_INTR_SN,
568 (unsigned long *)&pi_desc->control);
569}
570
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400571struct vcpu_vmx {
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000572 struct kvm_vcpu vcpu;
Avi Kivity313dbd492008-07-17 18:04:30 +0300573 unsigned long host_rsp;
Avi Kivity29bd8a72007-09-10 17:27:03 +0300574 u8 fail;
Avi Kivity51aa01d2010-07-20 14:31:20 +0300575 u32 exit_intr_info;
Avi Kivity1155f762007-11-22 11:30:47 +0200576 u32 idt_vectoring_info;
Avi Kivity6de12732011-03-07 12:51:22 +0200577 ulong rflags;
Avi Kivity26bb0982009-09-07 11:14:12 +0300578 struct shared_msr_entry *guest_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400579 int nmsrs;
580 int save_nmsrs;
Yang Zhanga547c6d2013-04-11 19:25:10 +0800581 unsigned long host_idt_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400582#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +0300583 u64 msr_host_kernel_gs_base;
584 u64 msr_guest_kernel_gs_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400585#endif
Gleb Natapov2961e8762013-11-25 15:37:13 +0200586 u32 vm_entry_controls_shadow;
587 u32 vm_exit_controls_shadow;
Paolo Bonzini80154d72017-08-24 13:55:35 +0200588 u32 secondary_exec_control;
589
Nadav Har'Eld462b812011-05-24 15:26:10 +0300590 /*
591 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
592 * non-nested (L1) guest, it always points to vmcs01. For a nested
593 * guest (L2), it points to a different VMCS.
594 */
595 struct loaded_vmcs vmcs01;
596 struct loaded_vmcs *loaded_vmcs;
597 bool __launched; /* temporary, used in vmx_vcpu_run */
Avi Kivity61d2ef22010-04-28 16:40:38 +0300598 struct msr_autoload {
599 unsigned nr;
600 struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
601 struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
602 } msr_autoload;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400603 struct {
604 int loaded;
605 u16 fs_sel, gs_sel, ldt_sel;
Avi Kivityb2da15a2012-05-13 19:53:24 +0300606#ifdef CONFIG_X86_64
607 u16 ds_sel, es_sel;
608#endif
Laurent Vivier152d3f22007-08-23 16:33:11 +0200609 int gs_ldt_reload_needed;
610 int fs_reload_needed;
Liu, Jinsongda8999d2014-02-24 10:55:46 +0000611 u64 msr_host_bndcfgs;
Mike Dayd77c26f2007-10-08 09:02:08 -0400612 } host_state;
Avi Kivity9c8cba32007-11-22 11:42:59 +0200613 struct {
Avi Kivity7ffd92c2009-06-09 14:10:45 +0300614 int vm86_active;
Avi Kivity78ac8b42010-04-08 18:19:35 +0300615 ulong save_rflags;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +0300616 struct kvm_segment segs[8];
617 } rmode;
618 struct {
619 u32 bitmask; /* 4 bits per segment (1 bit per field) */
Avi Kivity7ffd92c2009-06-09 14:10:45 +0300620 struct kvm_save_segment {
621 u16 selector;
622 unsigned long base;
623 u32 limit;
624 u32 ar;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +0300625 } seg[8];
Avi Kivity2fb92db2011-04-27 19:42:18 +0300626 } segment_cache;
Sheng Yang2384d2b2008-01-17 15:14:33 +0800627 int vpid;
Mohammed Gamal04fa4d32008-08-17 16:39:48 +0300628 bool emulation_required;
Jan Kiszka3b86cd92008-09-26 09:30:57 +0200629
Andi Kleena0861c02009-06-08 17:37:09 +0800630 u32 exit_reason;
Sheng Yang4e47c7a2009-12-18 16:48:47 +0800631
Yang Zhang01e439b2013-04-11 19:25:12 +0800632 /* Posted interrupt descriptor */
633 struct pi_desc pi_desc;
634
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300635 /* Support for a guest hypervisor (nested VMX) */
636 struct nested_vmx nested;
Radim Krčmářa7653ec2014-08-21 18:08:07 +0200637
638 /* Dynamic PLE window. */
639 int ple_window;
640 bool ple_window_dirty;
Kai Huang843e4332015-01-28 10:54:28 +0800641
642 /* Support for PML */
643#define PML_ENTITY_NUM 512
644 struct page *pml_pg;
Owen Hofmann2680d6d2016-03-01 13:36:13 -0800645
Yunhong Jiang64672c92016-06-13 14:19:59 -0700646 /* apic deadline value in host tsc */
647 u64 hv_deadline_tsc;
648
Owen Hofmann2680d6d2016-03-01 13:36:13 -0800649 u64 current_tsc_ratio;
Xiao Guangrong1be0e612016-03-22 16:51:18 +0800650
Xiao Guangrong1be0e612016-03-22 16:51:18 +0800651 u32 host_pkru;
Haozhong Zhang3b840802016-06-22 14:59:54 +0800652
Wanpeng Li74c55932017-11-29 01:31:20 -0800653 unsigned long host_debugctlmsr;
654
Haozhong Zhang37e4c992016-06-22 14:59:55 +0800655 /*
656 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
657 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
658 * in msr_ia32_feature_control_valid_bits.
659 */
Haozhong Zhang3b840802016-06-22 14:59:54 +0800660 u64 msr_ia32_feature_control;
Haozhong Zhang37e4c992016-06-22 14:59:55 +0800661 u64 msr_ia32_feature_control_valid_bits;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400662};
663
Avi Kivity2fb92db2011-04-27 19:42:18 +0300664enum segment_cache_field {
665 SEG_FIELD_SEL = 0,
666 SEG_FIELD_BASE = 1,
667 SEG_FIELD_LIMIT = 2,
668 SEG_FIELD_AR = 3,
669
670 SEG_FIELD_NR = 4
671};
672
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400673static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
674{
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000675 return container_of(vcpu, struct vcpu_vmx, vcpu);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400676}
677
Feng Wuefc64402015-09-18 22:29:51 +0800678static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
679{
680 return &(to_vmx(vcpu)->pi_desc);
681}
682
Nadav Har'El22bd0352011-05-25 23:05:57 +0300683#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
684#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
685#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
686 [number##_HIGH] = VMCS12_OFFSET(name)+4
687
Abel Gordon4607c2d2013-04-18 14:35:55 +0300688
Bandan Dasfe2b2012014-04-21 15:20:14 -0400689static unsigned long shadow_read_only_fields[] = {
Abel Gordon4607c2d2013-04-18 14:35:55 +0300690 /*
691 * We do NOT shadow fields that are modified when L0
692 * traps and emulates any vmx instruction (e.g. VMPTRLD,
693 * VMXON...) executed by L1.
694 * For example, VM_INSTRUCTION_ERROR is read
695 * by L1 if a vmx instruction fails (part of the error path).
696 * Note the code assumes this logic. If for some reason
697 * we start shadowing these fields then we need to
698 * force a shadow sync when L0 emulates vmx instructions
699 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
700 * by nested_vmx_failValid)
701 */
702 VM_EXIT_REASON,
703 VM_EXIT_INTR_INFO,
704 VM_EXIT_INSTRUCTION_LEN,
705 IDT_VECTORING_INFO_FIELD,
706 IDT_VECTORING_ERROR_CODE,
707 VM_EXIT_INTR_ERROR_CODE,
708 EXIT_QUALIFICATION,
709 GUEST_LINEAR_ADDRESS,
710 GUEST_PHYSICAL_ADDRESS
711};
Bandan Dasfe2b2012014-04-21 15:20:14 -0400712static int max_shadow_read_only_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +0300713 ARRAY_SIZE(shadow_read_only_fields);
714
Bandan Dasfe2b2012014-04-21 15:20:14 -0400715static unsigned long shadow_read_write_fields[] = {
Wanpeng Lia7c0b072014-08-21 19:46:50 +0800716 TPR_THRESHOLD,
Abel Gordon4607c2d2013-04-18 14:35:55 +0300717 GUEST_RIP,
718 GUEST_RSP,
719 GUEST_CR0,
720 GUEST_CR3,
721 GUEST_CR4,
722 GUEST_INTERRUPTIBILITY_INFO,
723 GUEST_RFLAGS,
724 GUEST_CS_SELECTOR,
725 GUEST_CS_AR_BYTES,
726 GUEST_CS_LIMIT,
727 GUEST_CS_BASE,
728 GUEST_ES_BASE,
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100729 GUEST_BNDCFGS,
Abel Gordon4607c2d2013-04-18 14:35:55 +0300730 CR0_GUEST_HOST_MASK,
731 CR0_READ_SHADOW,
732 CR4_READ_SHADOW,
733 TSC_OFFSET,
734 EXCEPTION_BITMAP,
735 CPU_BASED_VM_EXEC_CONTROL,
736 VM_ENTRY_EXCEPTION_ERROR_CODE,
737 VM_ENTRY_INTR_INFO_FIELD,
738 VM_ENTRY_INSTRUCTION_LEN,
739 VM_ENTRY_EXCEPTION_ERROR_CODE,
740 HOST_FS_BASE,
741 HOST_GS_BASE,
742 HOST_FS_SELECTOR,
743 HOST_GS_SELECTOR
744};
Bandan Dasfe2b2012014-04-21 15:20:14 -0400745static int max_shadow_read_write_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +0300746 ARRAY_SIZE(shadow_read_write_fields);
747
Mathias Krause772e0312012-08-30 01:30:19 +0200748static const unsigned short vmcs_field_to_offset_table[] = {
Nadav Har'El22bd0352011-05-25 23:05:57 +0300749 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
Wincy Van705699a2015-02-03 23:58:17 +0800750 FIELD(POSTED_INTR_NV, posted_intr_nv),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300751 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
752 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
753 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
754 FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
755 FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
756 FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
757 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
758 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
Wincy Van608406e2015-02-03 23:57:51 +0800759 FIELD(GUEST_INTR_STATUS, guest_intr_status),
Bandan Dasc5f983f2017-05-05 15:25:14 -0400760 FIELD(GUEST_PML_INDEX, guest_pml_index),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300761 FIELD(HOST_ES_SELECTOR, host_es_selector),
762 FIELD(HOST_CS_SELECTOR, host_cs_selector),
763 FIELD(HOST_SS_SELECTOR, host_ss_selector),
764 FIELD(HOST_DS_SELECTOR, host_ds_selector),
765 FIELD(HOST_FS_SELECTOR, host_fs_selector),
766 FIELD(HOST_GS_SELECTOR, host_gs_selector),
767 FIELD(HOST_TR_SELECTOR, host_tr_selector),
768 FIELD64(IO_BITMAP_A, io_bitmap_a),
769 FIELD64(IO_BITMAP_B, io_bitmap_b),
770 FIELD64(MSR_BITMAP, msr_bitmap),
771 FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
772 FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
773 FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
774 FIELD64(TSC_OFFSET, tsc_offset),
775 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
776 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
Wincy Van705699a2015-02-03 23:58:17 +0800777 FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
Bandan Das27c42a12017-08-03 15:54:42 -0400778 FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300779 FIELD64(EPT_POINTER, ept_pointer),
Wincy Van608406e2015-02-03 23:57:51 +0800780 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
781 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
782 FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
783 FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
Bandan Das41ab9372017-08-03 15:54:43 -0400784 FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800785 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300786 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
787 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
Bandan Dasc5f983f2017-05-05 15:25:14 -0400788 FIELD64(PML_ADDRESS, pml_address),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300789 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
790 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
791 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
792 FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
793 FIELD64(GUEST_PDPTR0, guest_pdptr0),
794 FIELD64(GUEST_PDPTR1, guest_pdptr1),
795 FIELD64(GUEST_PDPTR2, guest_pdptr2),
796 FIELD64(GUEST_PDPTR3, guest_pdptr3),
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100797 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300798 FIELD64(HOST_IA32_PAT, host_ia32_pat),
799 FIELD64(HOST_IA32_EFER, host_ia32_efer),
800 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
801 FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
802 FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
803 FIELD(EXCEPTION_BITMAP, exception_bitmap),
804 FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
805 FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
806 FIELD(CR3_TARGET_COUNT, cr3_target_count),
807 FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
808 FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
809 FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
810 FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
811 FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
812 FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
813 FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
814 FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
815 FIELD(TPR_THRESHOLD, tpr_threshold),
816 FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
817 FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
818 FIELD(VM_EXIT_REASON, vm_exit_reason),
819 FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
820 FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
821 FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
822 FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
823 FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
824 FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
825 FIELD(GUEST_ES_LIMIT, guest_es_limit),
826 FIELD(GUEST_CS_LIMIT, guest_cs_limit),
827 FIELD(GUEST_SS_LIMIT, guest_ss_limit),
828 FIELD(GUEST_DS_LIMIT, guest_ds_limit),
829 FIELD(GUEST_FS_LIMIT, guest_fs_limit),
830 FIELD(GUEST_GS_LIMIT, guest_gs_limit),
831 FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
832 FIELD(GUEST_TR_LIMIT, guest_tr_limit),
833 FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
834 FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
835 FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
836 FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
837 FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
838 FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
839 FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
840 FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
841 FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
842 FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
843 FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
844 FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
845 FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
846 FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
Jan Kiszka0238ea92013-03-13 11:31:24 +0100847 FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
Nadav Har'El22bd0352011-05-25 23:05:57 +0300848 FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
849 FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
850 FIELD(CR0_READ_SHADOW, cr0_read_shadow),
851 FIELD(CR4_READ_SHADOW, cr4_read_shadow),
852 FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
853 FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
854 FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
855 FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
856 FIELD(EXIT_QUALIFICATION, exit_qualification),
857 FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
858 FIELD(GUEST_CR0, guest_cr0),
859 FIELD(GUEST_CR3, guest_cr3),
860 FIELD(GUEST_CR4, guest_cr4),
861 FIELD(GUEST_ES_BASE, guest_es_base),
862 FIELD(GUEST_CS_BASE, guest_cs_base),
863 FIELD(GUEST_SS_BASE, guest_ss_base),
864 FIELD(GUEST_DS_BASE, guest_ds_base),
865 FIELD(GUEST_FS_BASE, guest_fs_base),
866 FIELD(GUEST_GS_BASE, guest_gs_base),
867 FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
868 FIELD(GUEST_TR_BASE, guest_tr_base),
869 FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
870 FIELD(GUEST_IDTR_BASE, guest_idtr_base),
871 FIELD(GUEST_DR7, guest_dr7),
872 FIELD(GUEST_RSP, guest_rsp),
873 FIELD(GUEST_RIP, guest_rip),
874 FIELD(GUEST_RFLAGS, guest_rflags),
875 FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
876 FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
877 FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
878 FIELD(HOST_CR0, host_cr0),
879 FIELD(HOST_CR3, host_cr3),
880 FIELD(HOST_CR4, host_cr4),
881 FIELD(HOST_FS_BASE, host_fs_base),
882 FIELD(HOST_GS_BASE, host_gs_base),
883 FIELD(HOST_TR_BASE, host_tr_base),
884 FIELD(HOST_GDTR_BASE, host_gdtr_base),
885 FIELD(HOST_IDTR_BASE, host_idtr_base),
886 FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
887 FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
888 FIELD(HOST_RSP, host_rsp),
889 FIELD(HOST_RIP, host_rip),
890};
Nadav Har'El22bd0352011-05-25 23:05:57 +0300891
892static inline short vmcs_field_to_offset(unsigned long field)
893{
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +0100894 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
895
896 if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
897 vmcs_field_to_offset_table[field] == 0)
898 return -ENOENT;
899
Nadav Har'El22bd0352011-05-25 23:05:57 +0300900 return vmcs_field_to_offset_table[field];
901}
902
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300903static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
904{
David Matlack4f2777b2016-07-13 17:16:37 -0700905 return to_vmx(vcpu)->nested.cached_vmcs12;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300906}
907
Peter Feiner995f00a2017-06-30 17:26:32 -0700908static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +0300909static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
Peter Feiner995f00a2017-06-30 17:26:32 -0700910static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
Wanpeng Lif53cd632014-12-02 19:14:58 +0800911static bool vmx_xsaves_supported(void);
Orit Wassermanb246dd52012-05-31 14:49:22 +0300912static void vmx_set_segment(struct kvm_vcpu *vcpu,
913 struct kvm_segment *var, int seg);
914static void vmx_get_segment(struct kvm_vcpu *vcpu,
915 struct kvm_segment *var, int seg);
Gleb Natapovd99e4152012-12-20 16:57:45 +0200916static bool guest_state_valid(struct kvm_vcpu *vcpu);
917static u32 vmx_segment_access_rights(struct kvm_segment *var);
Abel Gordon16f5b902013-04-18 14:38:25 +0300918static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
Paolo Bonzinib96fb432017-07-27 12:29:32 +0200919static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
920static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
921static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
922 u16 error_code);
Avi Kivity75880a02007-06-20 11:20:04 +0300923
Avi Kivity6aa8b732006-12-10 02:21:36 -0800924static DEFINE_PER_CPU(struct vmcs *, vmxarea);
925static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +0300926/*
927 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
928 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
929 */
930static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800931
Feng Wubf9f6ac2015-09-18 22:29:55 +0800932/*
933 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
934 * can find which vCPU should be waken up.
935 */
936static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
937static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
938
Radim Krčmář23611332016-09-29 22:41:33 +0200939enum {
Radim Krčmář23611332016-09-29 22:41:33 +0200940 VMX_MSR_BITMAP_LEGACY,
941 VMX_MSR_BITMAP_LONGMODE,
942 VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
943 VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
944 VMX_MSR_BITMAP_LEGACY_X2APIC,
945 VMX_MSR_BITMAP_LONGMODE_X2APIC,
946 VMX_VMREAD_BITMAP,
947 VMX_VMWRITE_BITMAP,
948 VMX_BITMAP_NR
949};
950
951static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
952
Radim Krčmář23611332016-09-29 22:41:33 +0200953#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
954#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
955#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
956#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
957#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
958#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
959#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
960#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
He, Qingfdef3ad2007-04-30 09:45:24 +0300961
Avi Kivity110312c2010-12-21 12:54:20 +0200962static bool cpu_has_load_ia32_efer;
Gleb Natapov8bf00a52011-10-05 14:01:22 +0200963static bool cpu_has_load_perf_global_ctrl;
Avi Kivity110312c2010-12-21 12:54:20 +0200964
Sheng Yang2384d2b2008-01-17 15:14:33 +0800965static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
966static DEFINE_SPINLOCK(vmx_vpid_lock);
967
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +0300968static struct vmcs_config {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800969 int size;
970 int order;
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +0300971 u32 basic_cap;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800972 u32 revision_id;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +0300973 u32 pin_based_exec_ctrl;
974 u32 cpu_based_exec_ctrl;
Sheng Yangf78e0e22007-10-29 09:40:42 +0800975 u32 cpu_based_2nd_exec_ctrl;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +0300976 u32 vmexit_ctrl;
977 u32 vmentry_ctrl;
978} vmcs_config;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800979
Hannes Ederefff9e52008-11-28 17:02:06 +0100980static struct vmx_capability {
Sheng Yangd56f5462008-04-25 10:13:16 +0800981 u32 ept;
982 u32 vpid;
983} vmx_capability;
984
Avi Kivity6aa8b732006-12-10 02:21:36 -0800985#define VMX_SEGMENT_FIELD(seg) \
986 [VCPU_SREG_##seg] = { \
987 .selector = GUEST_##seg##_SELECTOR, \
988 .base = GUEST_##seg##_BASE, \
989 .limit = GUEST_##seg##_LIMIT, \
990 .ar_bytes = GUEST_##seg##_AR_BYTES, \
991 }
992
Mathias Krause772e0312012-08-30 01:30:19 +0200993static const struct kvm_vmx_segment_field {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800994 unsigned selector;
995 unsigned base;
996 unsigned limit;
997 unsigned ar_bytes;
998} kvm_vmx_segment_fields[] = {
999 VMX_SEGMENT_FIELD(CS),
1000 VMX_SEGMENT_FIELD(DS),
1001 VMX_SEGMENT_FIELD(ES),
1002 VMX_SEGMENT_FIELD(FS),
1003 VMX_SEGMENT_FIELD(GS),
1004 VMX_SEGMENT_FIELD(SS),
1005 VMX_SEGMENT_FIELD(TR),
1006 VMX_SEGMENT_FIELD(LDTR),
1007};
1008
Avi Kivity26bb0982009-09-07 11:14:12 +03001009static u64 host_efer;
1010
Avi Kivity6de4f3a2009-05-31 22:58:47 +03001011static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
1012
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001013/*
Brian Gerst8c065852010-07-17 09:03:26 -04001014 * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001015 * away by decrementing the array size.
1016 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08001017static const u32 vmx_msr_index[] = {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001018#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +03001019 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001020#endif
Brian Gerst8c065852010-07-17 09:03:26 -04001021 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001022};
Avi Kivity6aa8b732006-12-10 02:21:36 -08001023
Jan Kiszka5bb16012016-02-09 20:14:21 +01001024static inline bool is_exception_n(u32 intr_info, u8 vector)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001025{
1026 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1027 INTR_INFO_VALID_MASK)) ==
Jan Kiszka5bb16012016-02-09 20:14:21 +01001028 (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
1029}
1030
Jan Kiszka6f054852016-02-09 20:15:18 +01001031static inline bool is_debug(u32 intr_info)
1032{
1033 return is_exception_n(intr_info, DB_VECTOR);
1034}
1035
1036static inline bool is_breakpoint(u32 intr_info)
1037{
1038 return is_exception_n(intr_info, BP_VECTOR);
1039}
1040
Jan Kiszka5bb16012016-02-09 20:14:21 +01001041static inline bool is_page_fault(u32 intr_info)
1042{
1043 return is_exception_n(intr_info, PF_VECTOR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001044}
1045
Gui Jianfeng31299942010-03-15 17:29:09 +08001046static inline bool is_no_device(u32 intr_info)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001047{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001048 return is_exception_n(intr_info, NM_VECTOR);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001049}
1050
Gui Jianfeng31299942010-03-15 17:29:09 +08001051static inline bool is_invalid_opcode(u32 intr_info)
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001052{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001053 return is_exception_n(intr_info, UD_VECTOR);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001054}
1055
Gui Jianfeng31299942010-03-15 17:29:09 +08001056static inline bool is_external_interrupt(u32 intr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001057{
1058 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
1059 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
1060}
1061
Gui Jianfeng31299942010-03-15 17:29:09 +08001062static inline bool is_machine_check(u32 intr_info)
Andi Kleena0861c02009-06-08 17:37:09 +08001063{
1064 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1065 INTR_INFO_VALID_MASK)) ==
1066 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
1067}
1068
Gui Jianfeng31299942010-03-15 17:29:09 +08001069static inline bool cpu_has_vmx_msr_bitmap(void)
Sheng Yang25c5f222008-03-28 13:18:56 +08001070{
Sheng Yang04547152009-04-01 15:52:31 +08001071 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
Sheng Yang25c5f222008-03-28 13:18:56 +08001072}
1073
Gui Jianfeng31299942010-03-15 17:29:09 +08001074static inline bool cpu_has_vmx_tpr_shadow(void)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001075{
Sheng Yang04547152009-04-01 15:52:31 +08001076 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001077}
1078
Paolo Bonzini35754c92015-07-29 12:05:37 +02001079static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001080{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001081 return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001082}
1083
Gui Jianfeng31299942010-03-15 17:29:09 +08001084static inline bool cpu_has_secondary_exec_ctrls(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001085{
Sheng Yang04547152009-04-01 15:52:31 +08001086 return vmcs_config.cpu_based_exec_ctrl &
1087 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001088}
1089
Avi Kivity774ead32007-12-26 13:57:04 +02001090static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001091{
Sheng Yang04547152009-04-01 15:52:31 +08001092 return vmcs_config.cpu_based_2nd_exec_ctrl &
1093 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1094}
1095
Yang Zhang8d146952013-01-25 10:18:50 +08001096static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
1097{
1098 return vmcs_config.cpu_based_2nd_exec_ctrl &
1099 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
1100}
1101
Yang Zhang83d4c282013-01-25 10:18:49 +08001102static inline bool cpu_has_vmx_apic_register_virt(void)
1103{
1104 return vmcs_config.cpu_based_2nd_exec_ctrl &
1105 SECONDARY_EXEC_APIC_REGISTER_VIRT;
1106}
1107
Yang Zhangc7c9c562013-01-25 10:18:51 +08001108static inline bool cpu_has_vmx_virtual_intr_delivery(void)
1109{
1110 return vmcs_config.cpu_based_2nd_exec_ctrl &
1111 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
1112}
1113
Yunhong Jiang64672c92016-06-13 14:19:59 -07001114/*
1115 * Comment's format: document - errata name - stepping - processor name.
1116 * Refer from
1117 * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1118 */
1119static u32 vmx_preemption_cpu_tfms[] = {
1120/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
11210x000206E6,
1122/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1123/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1124/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
11250x00020652,
1126/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
11270x00020655,
1128/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1129/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1130/*
1131 * 320767.pdf - AAP86 - B1 -
1132 * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1133 */
11340x000106E5,
1135/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
11360x000106A0,
1137/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
11380x000106A1,
1139/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
11400x000106A4,
1141 /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1142 /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1143 /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
11440x000106A5,
1145};
1146
1147static inline bool cpu_has_broken_vmx_preemption_timer(void)
1148{
1149 u32 eax = cpuid_eax(0x00000001), i;
1150
1151 /* Clear the reserved bits */
1152 eax &= ~(0x3U << 14 | 0xfU << 28);
Wei Yongjun03f6a222016-07-04 15:13:07 +00001153 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
Yunhong Jiang64672c92016-06-13 14:19:59 -07001154 if (eax == vmx_preemption_cpu_tfms[i])
1155 return true;
1156
1157 return false;
1158}
1159
1160static inline bool cpu_has_vmx_preemption_timer(void)
1161{
Yunhong Jiang64672c92016-06-13 14:19:59 -07001162 return vmcs_config.pin_based_exec_ctrl &
1163 PIN_BASED_VMX_PREEMPTION_TIMER;
1164}
1165
Yang Zhang01e439b2013-04-11 19:25:12 +08001166static inline bool cpu_has_vmx_posted_intr(void)
1167{
Paolo Bonzinid6a858d2015-09-28 11:58:14 +02001168 return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
1169 vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
Yang Zhang01e439b2013-04-11 19:25:12 +08001170}
1171
1172static inline bool cpu_has_vmx_apicv(void)
1173{
1174 return cpu_has_vmx_apic_register_virt() &&
1175 cpu_has_vmx_virtual_intr_delivery() &&
1176 cpu_has_vmx_posted_intr();
1177}
1178
Sheng Yang04547152009-04-01 15:52:31 +08001179static inline bool cpu_has_vmx_flexpriority(void)
1180{
1181 return cpu_has_vmx_tpr_shadow() &&
1182 cpu_has_vmx_virtualize_apic_accesses();
Sheng Yangf78e0e22007-10-29 09:40:42 +08001183}
1184
Marcelo Tosattie7997942009-06-11 12:07:40 -03001185static inline bool cpu_has_vmx_ept_execute_only(void)
1186{
Gui Jianfeng31299942010-03-15 17:29:09 +08001187 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001188}
1189
Marcelo Tosattie7997942009-06-11 12:07:40 -03001190static inline bool cpu_has_vmx_ept_2m_page(void)
1191{
Gui Jianfeng31299942010-03-15 17:29:09 +08001192 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001193}
1194
Sheng Yang878403b2010-01-05 19:02:29 +08001195static inline bool cpu_has_vmx_ept_1g_page(void)
1196{
Gui Jianfeng31299942010-03-15 17:29:09 +08001197 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
Sheng Yang878403b2010-01-05 19:02:29 +08001198}
1199
Sheng Yang4bc9b982010-06-02 14:05:24 +08001200static inline bool cpu_has_vmx_ept_4levels(void)
1201{
1202 return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
1203}
1204
David Hildenbrand42aa53b2017-08-10 23:15:29 +02001205static inline bool cpu_has_vmx_ept_mt_wb(void)
1206{
1207 return vmx_capability.ept & VMX_EPTP_WB_BIT;
1208}
1209
Yu Zhang855feb62017-08-24 20:27:55 +08001210static inline bool cpu_has_vmx_ept_5levels(void)
1211{
1212 return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
1213}
1214
Xudong Hao83c3a332012-05-28 19:33:35 +08001215static inline bool cpu_has_vmx_ept_ad_bits(void)
1216{
1217 return vmx_capability.ept & VMX_EPT_AD_BIT;
1218}
1219
Gui Jianfeng31299942010-03-15 17:29:09 +08001220static inline bool cpu_has_vmx_invept_context(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001221{
Gui Jianfeng31299942010-03-15 17:29:09 +08001222 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001223}
1224
Gui Jianfeng31299942010-03-15 17:29:09 +08001225static inline bool cpu_has_vmx_invept_global(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001226{
Gui Jianfeng31299942010-03-15 17:29:09 +08001227 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001228}
1229
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08001230static inline bool cpu_has_vmx_invvpid_single(void)
1231{
1232 return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
1233}
1234
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001235static inline bool cpu_has_vmx_invvpid_global(void)
1236{
1237 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
1238}
1239
Wanpeng Li08d839c2017-03-23 05:30:08 -07001240static inline bool cpu_has_vmx_invvpid(void)
1241{
1242 return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
1243}
1244
Gui Jianfeng31299942010-03-15 17:29:09 +08001245static inline bool cpu_has_vmx_ept(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001246{
Sheng Yang04547152009-04-01 15:52:31 +08001247 return vmcs_config.cpu_based_2nd_exec_ctrl &
1248 SECONDARY_EXEC_ENABLE_EPT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001249}
1250
Gui Jianfeng31299942010-03-15 17:29:09 +08001251static inline bool cpu_has_vmx_unrestricted_guest(void)
Nitin A Kamble3a624e22009-06-08 11:34:16 -07001252{
1253 return vmcs_config.cpu_based_2nd_exec_ctrl &
1254 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1255}
1256
Gui Jianfeng31299942010-03-15 17:29:09 +08001257static inline bool cpu_has_vmx_ple(void)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08001258{
1259 return vmcs_config.cpu_based_2nd_exec_ctrl &
1260 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1261}
1262
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001263static inline bool cpu_has_vmx_basic_inout(void)
1264{
1265 return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
1266}
1267
Paolo Bonzini35754c92015-07-29 12:05:37 +02001268static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001269{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001270 return flexpriority_enabled && lapic_in_kernel(vcpu);
Sheng Yangf78e0e22007-10-29 09:40:42 +08001271}
1272
Gui Jianfeng31299942010-03-15 17:29:09 +08001273static inline bool cpu_has_vmx_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001274{
Sheng Yang04547152009-04-01 15:52:31 +08001275 return vmcs_config.cpu_based_2nd_exec_ctrl &
1276 SECONDARY_EXEC_ENABLE_VPID;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001277}
1278
Gui Jianfeng31299942010-03-15 17:29:09 +08001279static inline bool cpu_has_vmx_rdtscp(void)
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001280{
1281 return vmcs_config.cpu_based_2nd_exec_ctrl &
1282 SECONDARY_EXEC_RDTSCP;
1283}
1284
Mao, Junjiead756a12012-07-02 01:18:48 +00001285static inline bool cpu_has_vmx_invpcid(void)
1286{
1287 return vmcs_config.cpu_based_2nd_exec_ctrl &
1288 SECONDARY_EXEC_ENABLE_INVPCID;
1289}
1290
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01001291static inline bool cpu_has_virtual_nmis(void)
1292{
1293 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
1294}
1295
Sheng Yangf5f48ee2010-06-30 12:25:15 +08001296static inline bool cpu_has_vmx_wbinvd_exit(void)
1297{
1298 return vmcs_config.cpu_based_2nd_exec_ctrl &
1299 SECONDARY_EXEC_WBINVD_EXITING;
1300}
1301
Abel Gordonabc4fc52013-04-18 14:35:25 +03001302static inline bool cpu_has_vmx_shadow_vmcs(void)
1303{
1304 u64 vmx_msr;
1305 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
1306 /* check if the cpu supports writing r/o exit information fields */
1307 if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
1308 return false;
1309
1310 return vmcs_config.cpu_based_2nd_exec_ctrl &
1311 SECONDARY_EXEC_SHADOW_VMCS;
1312}
1313
Kai Huang843e4332015-01-28 10:54:28 +08001314static inline bool cpu_has_vmx_pml(void)
1315{
1316 return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1317}
1318
Haozhong Zhang64903d62015-10-20 15:39:09 +08001319static inline bool cpu_has_vmx_tsc_scaling(void)
1320{
1321 return vmcs_config.cpu_based_2nd_exec_ctrl &
1322 SECONDARY_EXEC_TSC_SCALING;
1323}
1324
Bandan Das2a499e42017-08-03 15:54:41 -04001325static inline bool cpu_has_vmx_vmfunc(void)
1326{
1327 return vmcs_config.cpu_based_2nd_exec_ctrl &
1328 SECONDARY_EXEC_ENABLE_VMFUNC;
1329}
1330
Sheng Yang04547152009-04-01 15:52:31 +08001331static inline bool report_flexpriority(void)
1332{
1333 return flexpriority_enabled;
1334}
1335
Jim Mattsonc7c2c7092017-05-05 11:28:09 -07001336static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
1337{
1338 return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
1339}
1340
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03001341static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
1342{
1343 return vmcs12->cpu_based_vm_exec_control & bit;
1344}
1345
1346static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
1347{
1348 return (vmcs12->cpu_based_vm_exec_control &
1349 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
1350 (vmcs12->secondary_vm_exec_control & bit);
1351}
1352
Jan Kiszkaf41245002014-03-07 20:03:13 +01001353static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
1354{
1355 return vmcs12->pin_based_vm_exec_control &
1356 PIN_BASED_VMX_PREEMPTION_TIMER;
1357}
1358
Nadav Har'El155a97a2013-08-05 11:07:16 +03001359static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1360{
1361 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
1362}
1363
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001364static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
1365{
Paolo Bonzini3db13482017-08-24 14:48:03 +02001366 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001367}
1368
Bandan Dasc5f983f2017-05-05 15:25:14 -04001369static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
1370{
1371 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
1372}
1373
Wincy Vanf2b93282015-02-03 23:56:03 +08001374static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
1375{
1376 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
1377}
1378
Wanpeng Li5c614b32015-10-13 09:18:36 -07001379static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
1380{
1381 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
1382}
1383
Wincy Van82f0dd42015-02-03 23:57:18 +08001384static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
1385{
1386 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
1387}
1388
Wincy Van608406e2015-02-03 23:57:51 +08001389static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
1390{
1391 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
1392}
1393
Wincy Van705699a2015-02-03 23:58:17 +08001394static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
1395{
1396 return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
1397}
1398
Bandan Das27c42a12017-08-03 15:54:42 -04001399static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
1400{
1401 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
1402}
1403
Bandan Das41ab9372017-08-03 15:54:43 -04001404static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
1405{
1406 return nested_cpu_has_vmfunc(vmcs12) &&
1407 (vmcs12->vm_function_control &
1408 VMX_VMFUNC_EPTP_SWITCHING);
1409}
1410
Jim Mattsonef85b672016-12-12 11:01:37 -08001411static inline bool is_nmi(u32 intr_info)
Nadav Har'El644d7112011-05-25 23:12:35 +03001412{
1413 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
Jim Mattsonef85b672016-12-12 11:01:37 -08001414 == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
Nadav Har'El644d7112011-05-25 23:12:35 +03001415}
1416
Jan Kiszka533558b2014-01-04 18:47:20 +01001417static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1418 u32 exit_intr_info,
1419 unsigned long exit_qualification);
Nadav Har'El7c177932011-05-25 23:12:04 +03001420static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1421 struct vmcs12 *vmcs12,
1422 u32 reason, unsigned long qualification);
1423
Rusty Russell8b9cf982007-07-30 16:31:43 +10001424static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
Avi Kivity7725f0b2006-12-13 00:34:01 -08001425{
1426 int i;
1427
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001428 for (i = 0; i < vmx->nmsrs; ++i)
Avi Kivity26bb0982009-09-07 11:14:12 +03001429 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
Eddie Donga75beee2007-05-17 18:55:15 +03001430 return i;
1431 return -1;
1432}
1433
Sheng Yang2384d2b2008-01-17 15:14:33 +08001434static inline void __invvpid(int ext, u16 vpid, gva_t gva)
1435{
1436 struct {
1437 u64 vpid : 16;
1438 u64 rsvd : 48;
1439 u64 gva;
1440 } operand = { vpid, 0, gva };
1441
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001442 asm volatile (__ex(ASM_VMX_INVVPID)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001443 /* CF==1 or ZF==1 --> rc = -1 */
1444 "; ja 1f ; ud2 ; 1:"
1445 : : "a"(&operand), "c"(ext) : "cc", "memory");
1446}
1447
Sheng Yang14394422008-04-28 12:24:45 +08001448static inline void __invept(int ext, u64 eptp, gpa_t gpa)
1449{
1450 struct {
1451 u64 eptp, gpa;
1452 } operand = {eptp, gpa};
1453
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001454 asm volatile (__ex(ASM_VMX_INVEPT)
Sheng Yang14394422008-04-28 12:24:45 +08001455 /* CF==1 or ZF==1 --> rc = -1 */
1456 "; ja 1f ; ud2 ; 1:\n"
1457 : : "a" (&operand), "c" (ext) : "cc", "memory");
1458}
1459
Avi Kivity26bb0982009-09-07 11:14:12 +03001460static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
Eddie Donga75beee2007-05-17 18:55:15 +03001461{
1462 int i;
1463
Rusty Russell8b9cf982007-07-30 16:31:43 +10001464 i = __find_msr_index(vmx, msr);
Eddie Donga75beee2007-05-17 18:55:15 +03001465 if (i >= 0)
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001466 return &vmx->guest_msrs[i];
Al Viro8b6d44c2007-02-09 16:38:40 +00001467 return NULL;
Avi Kivity7725f0b2006-12-13 00:34:01 -08001468}
1469
Avi Kivity6aa8b732006-12-10 02:21:36 -08001470static void vmcs_clear(struct vmcs *vmcs)
1471{
1472 u64 phys_addr = __pa(vmcs);
1473 u8 error;
1474
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001475 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
Avi Kivity16d8f722010-12-21 16:51:50 +02001476 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001477 : "cc", "memory");
1478 if (error)
1479 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
1480 vmcs, phys_addr);
1481}
1482
Nadav Har'Eld462b812011-05-24 15:26:10 +03001483static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
1484{
1485 vmcs_clear(loaded_vmcs->vmcs);
Jim Mattson355f4fb2016-10-28 08:29:39 -07001486 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
1487 vmcs_clear(loaded_vmcs->shadow_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001488 loaded_vmcs->cpu = -1;
1489 loaded_vmcs->launched = 0;
1490}
1491
Dongxiao Xu7725b892010-05-11 18:29:38 +08001492static void vmcs_load(struct vmcs *vmcs)
1493{
1494 u64 phys_addr = __pa(vmcs);
1495 u8 error;
1496
1497 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
Avi Kivity16d8f722010-12-21 16:51:50 +02001498 : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
Dongxiao Xu7725b892010-05-11 18:29:38 +08001499 : "cc", "memory");
1500 if (error)
Nadav Har'El2844d842011-05-25 23:16:40 +03001501 printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
Dongxiao Xu7725b892010-05-11 18:29:38 +08001502 vmcs, phys_addr);
1503}
1504
Dave Young2965faa2015-09-09 15:38:55 -07001505#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +08001506/*
1507 * This bitmap is used to indicate whether the vmclear
1508 * operation is enabled on all cpus. All disabled by
1509 * default.
1510 */
1511static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
1512
1513static inline void crash_enable_local_vmclear(int cpu)
1514{
1515 cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1516}
1517
1518static inline void crash_disable_local_vmclear(int cpu)
1519{
1520 cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
1521}
1522
1523static inline int crash_local_vmclear_enabled(int cpu)
1524{
1525 return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
1526}
1527
1528static void crash_vmclear_local_loaded_vmcss(void)
1529{
1530 int cpu = raw_smp_processor_id();
1531 struct loaded_vmcs *v;
1532
1533 if (!crash_local_vmclear_enabled(cpu))
1534 return;
1535
1536 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
1537 loaded_vmcss_on_cpu_link)
1538 vmcs_clear(v->vmcs);
1539}
1540#else
1541static inline void crash_enable_local_vmclear(int cpu) { }
1542static inline void crash_disable_local_vmclear(int cpu) { }
Dave Young2965faa2015-09-09 15:38:55 -07001543#endif /* CONFIG_KEXEC_CORE */
Zhang Yanfei8f536b72012-12-06 23:43:34 +08001544
Nadav Har'Eld462b812011-05-24 15:26:10 +03001545static void __loaded_vmcs_clear(void *arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001546{
Nadav Har'Eld462b812011-05-24 15:26:10 +03001547 struct loaded_vmcs *loaded_vmcs = arg;
Ingo Molnard3b2c332007-01-05 16:36:23 -08001548 int cpu = raw_smp_processor_id();
Avi Kivity6aa8b732006-12-10 02:21:36 -08001549
Nadav Har'Eld462b812011-05-24 15:26:10 +03001550 if (loaded_vmcs->cpu != cpu)
1551 return; /* vcpu migration can race with cpu offline */
1552 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001553 per_cpu(current_vmcs, cpu) = NULL;
Zhang Yanfei8f536b72012-12-06 23:43:34 +08001554 crash_disable_local_vmclear(cpu);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001555 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08001556
1557 /*
1558 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
1559 * is before setting loaded_vmcs->vcpu to -1 which is done in
1560 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
1561 * then adds the vmcs into percpu list before it is deleted.
1562 */
1563 smp_wmb();
1564
Nadav Har'Eld462b812011-05-24 15:26:10 +03001565 loaded_vmcs_init(loaded_vmcs);
Zhang Yanfei8f536b72012-12-06 23:43:34 +08001566 crash_enable_local_vmclear(cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001567}
1568
Nadav Har'Eld462b812011-05-24 15:26:10 +03001569static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
Avi Kivity8d0be2b2007-02-12 00:54:46 -08001570{
Xiao Guangronge6c7d322012-11-28 20:53:15 +08001571 int cpu = loaded_vmcs->cpu;
1572
1573 if (cpu != -1)
1574 smp_call_function_single(cpu,
1575 __loaded_vmcs_clear, loaded_vmcs, 1);
Avi Kivity8d0be2b2007-02-12 00:54:46 -08001576}
1577
Wanpeng Lidd5f5342015-09-23 18:26:57 +08001578static inline void vpid_sync_vcpu_single(int vpid)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001579{
Wanpeng Lidd5f5342015-09-23 18:26:57 +08001580 if (vpid == 0)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001581 return;
1582
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08001583 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08001584 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
Sheng Yang2384d2b2008-01-17 15:14:33 +08001585}
1586
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001587static inline void vpid_sync_vcpu_global(void)
1588{
1589 if (cpu_has_vmx_invvpid_global())
1590 __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
1591}
1592
Wanpeng Lidd5f5342015-09-23 18:26:57 +08001593static inline void vpid_sync_context(int vpid)
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001594{
1595 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08001596 vpid_sync_vcpu_single(vpid);
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001597 else
1598 vpid_sync_vcpu_global();
1599}
1600
Sheng Yang14394422008-04-28 12:24:45 +08001601static inline void ept_sync_global(void)
1602{
David Hildenbrandf5f51582017-08-24 20:51:30 +02001603 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
Sheng Yang14394422008-04-28 12:24:45 +08001604}
1605
1606static inline void ept_sync_context(u64 eptp)
1607{
David Hildenbrand0e1252d2017-08-24 20:51:28 +02001608 if (cpu_has_vmx_invept_context())
1609 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
1610 else
1611 ept_sync_global();
Sheng Yang14394422008-04-28 12:24:45 +08001612}
1613
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001614static __always_inline void vmcs_check16(unsigned long field)
1615{
1616 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
1617 "16-bit accessor invalid for 64-bit field");
1618 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
1619 "16-bit accessor invalid for 64-bit high field");
1620 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
1621 "16-bit accessor invalid for 32-bit high field");
1622 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
1623 "16-bit accessor invalid for natural width field");
1624}
1625
1626static __always_inline void vmcs_check32(unsigned long field)
1627{
1628 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
1629 "32-bit accessor invalid for 16-bit field");
1630 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
1631 "32-bit accessor invalid for natural width field");
1632}
1633
1634static __always_inline void vmcs_check64(unsigned long field)
1635{
1636 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
1637 "64-bit accessor invalid for 16-bit field");
1638 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
1639 "64-bit accessor invalid for 64-bit high field");
1640 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
1641 "64-bit accessor invalid for 32-bit field");
1642 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
1643 "64-bit accessor invalid for natural width field");
1644}
1645
1646static __always_inline void vmcs_checkl(unsigned long field)
1647{
1648 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
1649 "Natural width accessor invalid for 16-bit field");
1650 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
1651 "Natural width accessor invalid for 64-bit field");
1652 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
1653 "Natural width accessor invalid for 64-bit high field");
1654 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
1655 "Natural width accessor invalid for 32-bit field");
1656}
1657
1658static __always_inline unsigned long __vmcs_readl(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001659{
Avi Kivity5e520e62011-05-15 10:13:12 -04001660 unsigned long value;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001661
Avi Kivity5e520e62011-05-15 10:13:12 -04001662 asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
1663 : "=a"(value) : "d"(field) : "cc");
Avi Kivity6aa8b732006-12-10 02:21:36 -08001664 return value;
1665}
1666
Avi Kivity96304212011-05-15 10:13:13 -04001667static __always_inline u16 vmcs_read16(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001668{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001669 vmcs_check16(field);
1670 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001671}
1672
Avi Kivity96304212011-05-15 10:13:13 -04001673static __always_inline u32 vmcs_read32(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001674{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001675 vmcs_check32(field);
1676 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001677}
1678
Avi Kivity96304212011-05-15 10:13:13 -04001679static __always_inline u64 vmcs_read64(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001680{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001681 vmcs_check64(field);
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001682#ifdef CONFIG_X86_64
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001683 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001684#else
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001685 return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001686#endif
1687}
1688
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001689static __always_inline unsigned long vmcs_readl(unsigned long field)
1690{
1691 vmcs_checkl(field);
1692 return __vmcs_readl(field);
1693}
1694
Avi Kivitye52de1b2007-01-05 16:36:56 -08001695static noinline void vmwrite_error(unsigned long field, unsigned long value)
1696{
1697 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
1698 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1699 dump_stack();
1700}
1701
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001702static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001703{
1704 u8 error;
1705
Avi Kivity4ecac3f2008-05-13 13:23:38 +03001706 asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
Mike Dayd77c26f2007-10-08 09:02:08 -04001707 : "=q"(error) : "a"(value), "d"(field) : "cc");
Avi Kivitye52de1b2007-01-05 16:36:56 -08001708 if (unlikely(error))
1709 vmwrite_error(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001710}
1711
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001712static __always_inline void vmcs_write16(unsigned long field, u16 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001713{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001714 vmcs_check16(field);
1715 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001716}
1717
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001718static __always_inline void vmcs_write32(unsigned long field, u32 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001719{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001720 vmcs_check32(field);
1721 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001722}
1723
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001724static __always_inline void vmcs_write64(unsigned long field, u64 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001725{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001726 vmcs_check64(field);
1727 __vmcs_writel(field, value);
Avi Kivity7682f2d2008-05-12 19:25:43 +03001728#ifndef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001729 asm volatile ("");
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001730 __vmcs_writel(field+1, value >> 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001731#endif
1732}
1733
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001734static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001735{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001736 vmcs_checkl(field);
1737 __vmcs_writel(field, value);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001738}
1739
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001740static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001741{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01001742 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
1743 "vmcs_clear_bits does not support 64-bit fields");
1744 __vmcs_writel(field, __vmcs_readl(field) & ~mask);
1745}
1746
1747static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
1748{
1749 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
1750 "vmcs_set_bits does not support 64-bit fields");
1751 __vmcs_writel(field, __vmcs_readl(field) | mask);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03001752}
1753
Paolo Bonzini8391ce42016-07-07 14:58:33 +02001754static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
1755{
1756 vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
1757}
1758
Gleb Natapov2961e8762013-11-25 15:37:13 +02001759static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1760{
1761 vmcs_write32(VM_ENTRY_CONTROLS, val);
1762 vmx->vm_entry_controls_shadow = val;
1763}
1764
1765static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1766{
1767 if (vmx->vm_entry_controls_shadow != val)
1768 vm_entry_controls_init(vmx, val);
1769}
1770
1771static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1772{
1773 return vmx->vm_entry_controls_shadow;
1774}
1775
1776
1777static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1778{
1779 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1780}
1781
1782static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1783{
1784 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1785}
1786
Paolo Bonzini8391ce42016-07-07 14:58:33 +02001787static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
1788{
1789 vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
1790}
1791
Gleb Natapov2961e8762013-11-25 15:37:13 +02001792static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1793{
1794 vmcs_write32(VM_EXIT_CONTROLS, val);
1795 vmx->vm_exit_controls_shadow = val;
1796}
1797
1798static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1799{
1800 if (vmx->vm_exit_controls_shadow != val)
1801 vm_exit_controls_init(vmx, val);
1802}
1803
1804static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1805{
1806 return vmx->vm_exit_controls_shadow;
1807}
1808
1809
1810static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1811{
1812 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1813}
1814
1815static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1816{
1817 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1818}
1819
Avi Kivity2fb92db2011-04-27 19:42:18 +03001820static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1821{
1822 vmx->segment_cache.bitmask = 0;
1823}
1824
1825static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
1826 unsigned field)
1827{
1828 bool ret;
1829 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
1830
1831 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
1832 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
1833 vmx->segment_cache.bitmask = 0;
1834 }
1835 ret = vmx->segment_cache.bitmask & mask;
1836 vmx->segment_cache.bitmask |= mask;
1837 return ret;
1838}
1839
1840static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
1841{
1842 u16 *p = &vmx->segment_cache.seg[seg].selector;
1843
1844 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
1845 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
1846 return *p;
1847}
1848
1849static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
1850{
1851 ulong *p = &vmx->segment_cache.seg[seg].base;
1852
1853 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
1854 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
1855 return *p;
1856}
1857
1858static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
1859{
1860 u32 *p = &vmx->segment_cache.seg[seg].limit;
1861
1862 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
1863 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
1864 return *p;
1865}
1866
1867static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
1868{
1869 u32 *p = &vmx->segment_cache.seg[seg].ar;
1870
1871 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
1872 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
1873 return *p;
1874}
1875
Avi Kivityabd3f2d2007-05-02 17:57:40 +03001876static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1877{
1878 u32 eb;
1879
Liran Alonac9b3052017-11-06 16:15:10 +02001880 eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08001881 (1u << DB_VECTOR) | (1u << AC_VECTOR);
Jan Kiszkafd7373c2010-01-20 18:20:20 +01001882 if ((vcpu->guest_debug &
1883 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
1884 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
1885 eb |= 1u << BP_VECTOR;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03001886 if (to_vmx(vcpu)->rmode.vm86_active)
Avi Kivityabd3f2d2007-05-02 17:57:40 +03001887 eb = ~0;
Avi Kivity089d0342009-03-23 18:26:32 +02001888 if (enable_ept)
Sheng Yang14394422008-04-28 12:24:45 +08001889 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
Nadav Har'El36cf24e2011-05-25 23:15:08 +03001890
1891 /* When we are running a nested L2 guest and L1 specified for it a
1892 * certain exception bitmap, we must trap the same exceptions and pass
1893 * them to L1. When running L2, we will only handle the exceptions
1894 * specified above if L1 did not want them.
1895 */
1896 if (is_guest_mode(vcpu))
1897 eb |= get_vmcs12(vcpu)->exception_bitmap;
Liran Alonac9b3052017-11-06 16:15:10 +02001898 else
1899 eb |= 1u << UD_VECTOR;
Nadav Har'El36cf24e2011-05-25 23:15:08 +03001900
Avi Kivityabd3f2d2007-05-02 17:57:40 +03001901 vmcs_write32(EXCEPTION_BITMAP, eb);
1902}
1903
Gleb Natapov2961e8762013-11-25 15:37:13 +02001904static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1905 unsigned long entry, unsigned long exit)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001906{
Gleb Natapov2961e8762013-11-25 15:37:13 +02001907 vm_entry_controls_clearbit(vmx, entry);
1908 vm_exit_controls_clearbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001909}
1910
Avi Kivity61d2ef22010-04-28 16:40:38 +03001911static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1912{
1913 unsigned i;
1914 struct msr_autoload *m = &vmx->msr_autoload;
1915
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001916 switch (msr) {
1917 case MSR_EFER:
1918 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02001919 clear_atomic_switch_msr_special(vmx,
1920 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001921 VM_EXIT_LOAD_IA32_EFER);
1922 return;
1923 }
1924 break;
1925 case MSR_CORE_PERF_GLOBAL_CTRL:
1926 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02001927 clear_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001928 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1929 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1930 return;
1931 }
1932 break;
Avi Kivity110312c2010-12-21 12:54:20 +02001933 }
1934
Avi Kivity61d2ef22010-04-28 16:40:38 +03001935 for (i = 0; i < m->nr; ++i)
1936 if (m->guest[i].index == msr)
1937 break;
1938
1939 if (i == m->nr)
1940 return;
1941 --m->nr;
1942 m->guest[i] = m->guest[m->nr];
1943 m->host[i] = m->host[m->nr];
1944 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
1945 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1946}
1947
Gleb Natapov2961e8762013-11-25 15:37:13 +02001948static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1949 unsigned long entry, unsigned long exit,
1950 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1951 u64 guest_val, u64 host_val)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001952{
1953 vmcs_write64(guest_val_vmcs, guest_val);
1954 vmcs_write64(host_val_vmcs, host_val);
Gleb Natapov2961e8762013-11-25 15:37:13 +02001955 vm_entry_controls_setbit(vmx, entry);
1956 vm_exit_controls_setbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001957}
1958
Avi Kivity61d2ef22010-04-28 16:40:38 +03001959static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1960 u64 guest_val, u64 host_val)
1961{
1962 unsigned i;
1963 struct msr_autoload *m = &vmx->msr_autoload;
1964
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001965 switch (msr) {
1966 case MSR_EFER:
1967 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02001968 add_atomic_switch_msr_special(vmx,
1969 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001970 VM_EXIT_LOAD_IA32_EFER,
1971 GUEST_IA32_EFER,
1972 HOST_IA32_EFER,
1973 guest_val, host_val);
1974 return;
1975 }
1976 break;
1977 case MSR_CORE_PERF_GLOBAL_CTRL:
1978 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02001979 add_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001980 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1981 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1982 GUEST_IA32_PERF_GLOBAL_CTRL,
1983 HOST_IA32_PERF_GLOBAL_CTRL,
1984 guest_val, host_val);
1985 return;
1986 }
1987 break;
Radim Krčmář7099e2e2016-03-04 15:08:42 +01001988 case MSR_IA32_PEBS_ENABLE:
1989 /* PEBS needs a quiescent period after being disabled (to write
1990 * a record). Disabling PEBS through VMX MSR swapping doesn't
1991 * provide that period, so a CPU could write host's record into
1992 * guest's memory.
1993 */
1994 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
Avi Kivity110312c2010-12-21 12:54:20 +02001995 }
1996
Avi Kivity61d2ef22010-04-28 16:40:38 +03001997 for (i = 0; i < m->nr; ++i)
1998 if (m->guest[i].index == msr)
1999 break;
2000
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002001 if (i == NR_AUTOLOAD_MSRS) {
Michael S. Tsirkin60266202013-10-31 00:34:56 +02002002 printk_once(KERN_WARNING "Not enough msr switch entries. "
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002003 "Can't add msr %x\n", msr);
2004 return;
2005 } else if (i == m->nr) {
Avi Kivity61d2ef22010-04-28 16:40:38 +03002006 ++m->nr;
2007 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
2008 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
2009 }
2010
2011 m->guest[i].index = msr;
2012 m->guest[i].value = guest_val;
2013 m->host[i].index = msr;
2014 m->host[i].value = host_val;
2015}
2016
Avi Kivity92c0d902009-10-29 11:00:16 +02002017static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
Eddie Dong2cc51562007-05-21 07:28:09 +03002018{
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002019 u64 guest_efer = vmx->vcpu.arch.efer;
2020 u64 ignore_bits = 0;
Eddie Dong2cc51562007-05-21 07:28:09 +03002021
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002022 if (!enable_ept) {
2023 /*
2024 * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
2025 * host CPUID is more efficient than testing guest CPUID
2026 * or CR4. Host SMEP is anyway a requirement for guest SMEP.
2027 */
2028 if (boot_cpu_has(X86_FEATURE_SMEP))
2029 guest_efer |= EFER_NX;
2030 else if (!(guest_efer & EFER_NX))
2031 ignore_bits |= EFER_NX;
2032 }
Roel Kluin3a34a882009-08-04 02:08:45 -07002033
Avi Kivity51c6cf62007-08-29 03:48:05 +03002034 /*
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002035 * LMA and LME handled by hardware; SCE meaningless outside long mode.
Avi Kivity51c6cf62007-08-29 03:48:05 +03002036 */
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002037 ignore_bits |= EFER_SCE;
Avi Kivity51c6cf62007-08-29 03:48:05 +03002038#ifdef CONFIG_X86_64
2039 ignore_bits |= EFER_LMA | EFER_LME;
2040 /* SCE is meaningful only in long mode on Intel */
2041 if (guest_efer & EFER_LMA)
2042 ignore_bits &= ~(u64)EFER_SCE;
2043#endif
Avi Kivity84ad33e2010-04-28 16:42:29 +03002044
2045 clear_atomic_switch_msr(vmx, MSR_EFER);
Andy Lutomirskif6577a5f2014-11-07 18:25:18 -08002046
2047 /*
2048 * On EPT, we can't emulate NX, so we must switch EFER atomically.
2049 * On CPUs that support "load IA32_EFER", always switch EFER
2050 * atomically, since it's faster than switching it manually.
2051 */
2052 if (cpu_has_load_ia32_efer ||
2053 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
Avi Kivity84ad33e2010-04-28 16:42:29 +03002054 if (!(guest_efer & EFER_LMA))
2055 guest_efer &= ~EFER_LME;
Andy Lutomirski54b98bf2014-11-10 11:19:15 -08002056 if (guest_efer != host_efer)
2057 add_atomic_switch_msr(vmx, MSR_EFER,
2058 guest_efer, host_efer);
Avi Kivity84ad33e2010-04-28 16:42:29 +03002059 return false;
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002060 } else {
2061 guest_efer &= ~ignore_bits;
2062 guest_efer |= host_efer & ignore_bits;
Avi Kivity84ad33e2010-04-28 16:42:29 +03002063
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002064 vmx->guest_msrs[efer_offset].data = guest_efer;
2065 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
2066
2067 return true;
2068 }
Avi Kivity51c6cf62007-08-29 03:48:05 +03002069}
2070
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002071#ifdef CONFIG_X86_32
2072/*
2073 * On 32-bit kernels, VM exits still load the FS and GS bases from the
2074 * VMCS rather than the segment table. KVM uses this helper to figure
2075 * out the current bases to poke them into the VMCS before entry.
2076 */
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002077static unsigned long segment_base(u16 selector)
2078{
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002079 struct desc_struct *table;
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002080 unsigned long v;
2081
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002082 if (!(selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002083 return 0;
2084
Thomas Garnier45fc8752017-03-14 10:05:08 -07002085 table = get_current_gdt_ro();
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002086
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002087 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002088 u16 ldt_selector = kvm_read_ldt();
2089
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002090 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002091 return 0;
2092
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002093 table = (struct desc_struct *)segment_base(ldt_selector);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002094 }
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002095 v = get_desc_base(&table[selector >> 3]);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002096 return v;
2097}
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002098#endif
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002099
Avi Kivity04d2cc72007-09-10 18:10:54 +03002100static void vmx_save_host_state(struct kvm_vcpu *vcpu)
Avi Kivity33ed6322007-05-02 16:54:03 +03002101{
Avi Kivity04d2cc72007-09-10 18:10:54 +03002102 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03002103 int i;
Avi Kivity04d2cc72007-09-10 18:10:54 +03002104
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002105 if (vmx->host_state.loaded)
Avi Kivity33ed6322007-05-02 16:54:03 +03002106 return;
2107
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002108 vmx->host_state.loaded = 1;
Avi Kivity33ed6322007-05-02 16:54:03 +03002109 /*
2110 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2111 * allow segment selectors with cpl > 0 or ti == 1.
2112 */
Avi Kivityd6e88ae2008-07-10 16:53:33 +03002113 vmx->host_state.ldt_sel = kvm_read_ldt();
Laurent Vivier152d3f22007-08-23 16:33:11 +02002114 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
Avi Kivity9581d442010-10-19 16:46:55 +02002115 savesegment(fs, vmx->host_state.fs_sel);
Laurent Vivier152d3f22007-08-23 16:33:11 +02002116 if (!(vmx->host_state.fs_sel & 7)) {
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002117 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
Laurent Vivier152d3f22007-08-23 16:33:11 +02002118 vmx->host_state.fs_reload_needed = 0;
2119 } else {
Avi Kivity33ed6322007-05-02 16:54:03 +03002120 vmcs_write16(HOST_FS_SELECTOR, 0);
Laurent Vivier152d3f22007-08-23 16:33:11 +02002121 vmx->host_state.fs_reload_needed = 1;
Avi Kivity33ed6322007-05-02 16:54:03 +03002122 }
Avi Kivity9581d442010-10-19 16:46:55 +02002123 savesegment(gs, vmx->host_state.gs_sel);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002124 if (!(vmx->host_state.gs_sel & 7))
2125 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002126 else {
2127 vmcs_write16(HOST_GS_SELECTOR, 0);
Laurent Vivier152d3f22007-08-23 16:33:11 +02002128 vmx->host_state.gs_ldt_reload_needed = 1;
Avi Kivity33ed6322007-05-02 16:54:03 +03002129 }
2130
2131#ifdef CONFIG_X86_64
Avi Kivityb2da15a2012-05-13 19:53:24 +03002132 savesegment(ds, vmx->host_state.ds_sel);
2133 savesegment(es, vmx->host_state.es_sel);
2134#endif
2135
2136#ifdef CONFIG_X86_64
Avi Kivity33ed6322007-05-02 16:54:03 +03002137 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
2138 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
2139#else
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002140 vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
2141 vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
Avi Kivity33ed6322007-05-02 16:54:03 +03002142#endif
Avi Kivity707c0872007-05-02 17:33:43 +03002143
2144#ifdef CONFIG_X86_64
Avi Kivityc8770e72010-11-11 12:37:26 +02002145 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
2146 if (is_long_mode(&vmx->vcpu))
Avi Kivity44ea2b12009-09-06 15:55:37 +03002147 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
Avi Kivity707c0872007-05-02 17:33:43 +03002148#endif
Liu, Jinsongda8999d2014-02-24 10:55:46 +00002149 if (boot_cpu_has(X86_FEATURE_MPX))
2150 rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
Avi Kivity26bb0982009-09-07 11:14:12 +03002151 for (i = 0; i < vmx->save_nmsrs; ++i)
2152 kvm_set_shared_msr(vmx->guest_msrs[i].index,
Avi Kivityd5696722009-12-02 12:28:47 +02002153 vmx->guest_msrs[i].data,
2154 vmx->guest_msrs[i].mask);
Avi Kivity33ed6322007-05-02 16:54:03 +03002155}
2156
Avi Kivitya9b21b62008-06-24 11:48:49 +03002157static void __vmx_load_host_state(struct vcpu_vmx *vmx)
Avi Kivity33ed6322007-05-02 16:54:03 +03002158{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002159 if (!vmx->host_state.loaded)
Avi Kivity33ed6322007-05-02 16:54:03 +03002160 return;
2161
Avi Kivitye1beb1d2007-11-18 13:50:24 +02002162 ++vmx->vcpu.stat.host_state_reload;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002163 vmx->host_state.loaded = 0;
Avi Kivityc8770e72010-11-11 12:37:26 +02002164#ifdef CONFIG_X86_64
2165 if (is_long_mode(&vmx->vcpu))
2166 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2167#endif
Laurent Vivier152d3f22007-08-23 16:33:11 +02002168 if (vmx->host_state.gs_ldt_reload_needed) {
Avi Kivityd6e88ae2008-07-10 16:53:33 +03002169 kvm_load_ldt(vmx->host_state.ldt_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002170#ifdef CONFIG_X86_64
Avi Kivity9581d442010-10-19 16:46:55 +02002171 load_gs_index(vmx->host_state.gs_sel);
Avi Kivity9581d442010-10-19 16:46:55 +02002172#else
2173 loadsegment(gs, vmx->host_state.gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002174#endif
Avi Kivity33ed6322007-05-02 16:54:03 +03002175 }
Avi Kivity0a77fe42010-10-19 18:48:35 +02002176 if (vmx->host_state.fs_reload_needed)
2177 loadsegment(fs, vmx->host_state.fs_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002178#ifdef CONFIG_X86_64
2179 if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
2180 loadsegment(ds, vmx->host_state.ds_sel);
2181 loadsegment(es, vmx->host_state.es_sel);
2182 }
Avi Kivityb2da15a2012-05-13 19:53:24 +03002183#endif
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002184 invalidate_tss_limit();
Avi Kivity44ea2b12009-09-06 15:55:37 +03002185#ifdef CONFIG_X86_64
Avi Kivityc8770e72010-11-11 12:37:26 +02002186 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
Avi Kivity44ea2b12009-09-06 15:55:37 +03002187#endif
Liu, Jinsongda8999d2014-02-24 10:55:46 +00002188 if (vmx->host_state.msr_host_bndcfgs)
2189 wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
Thomas Garnier45fc8752017-03-14 10:05:08 -07002190 load_fixmap_gdt(raw_smp_processor_id());
Avi Kivity33ed6322007-05-02 16:54:03 +03002191}
2192
Avi Kivitya9b21b62008-06-24 11:48:49 +03002193static void vmx_load_host_state(struct vcpu_vmx *vmx)
2194{
2195 preempt_disable();
2196 __vmx_load_host_state(vmx);
2197 preempt_enable();
2198}
2199
Feng Wu28b835d2015-09-18 22:29:54 +08002200static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2201{
2202 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2203 struct pi_desc old, new;
2204 unsigned int dest;
2205
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002206 /*
2207 * In case of hot-plug or hot-unplug, we may have to undo
2208 * vmx_vcpu_pi_put even if there is no assigned device. And we
2209 * always keep PI.NDST up to date for simplicity: it makes the
2210 * code easier, and CPU migration is not a fast path.
2211 */
2212 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
Feng Wu28b835d2015-09-18 22:29:54 +08002213 return;
2214
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002215 /*
2216 * First handle the simple case where no cmpxchg is necessary; just
2217 * allow posting non-urgent interrupts.
2218 *
2219 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2220 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
2221 * expects the VCPU to be on the blocked_vcpu_list that matches
2222 * PI.NDST.
2223 */
2224 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
2225 vcpu->cpu == cpu) {
2226 pi_clear_sn(pi_desc);
2227 return;
2228 }
2229
2230 /* The full case. */
Feng Wu28b835d2015-09-18 22:29:54 +08002231 do {
2232 old.control = new.control = pi_desc->control;
2233
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002234 dest = cpu_physical_id(cpu);
Feng Wu28b835d2015-09-18 22:29:54 +08002235
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02002236 if (x2apic_enabled())
2237 new.ndst = dest;
2238 else
2239 new.ndst = (dest << 8) & 0xFF00;
Feng Wu28b835d2015-09-18 22:29:54 +08002240
Feng Wu28b835d2015-09-18 22:29:54 +08002241 new.sn = 0;
Paolo Bonzinic0a16662017-09-28 17:58:41 +02002242 } while (cmpxchg64(&pi_desc->control, old.control,
2243 new.control) != old.control);
Feng Wu28b835d2015-09-18 22:29:54 +08002244}
Xiao Guangrong1be0e612016-03-22 16:51:18 +08002245
Peter Feinerc95ba922016-08-17 09:36:47 -07002246static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
2247{
2248 vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
2249 vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
2250}
2251
Avi Kivity6aa8b732006-12-10 02:21:36 -08002252/*
2253 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2254 * vcpu mutex is already taken.
2255 */
Avi Kivity15ad7142007-07-11 18:17:21 +03002256static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002257{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002258 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002259 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002260
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002261 if (!already_loaded) {
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01002262 loaded_vmcs_clear(vmx->loaded_vmcs);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002263 local_irq_disable();
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002264 crash_disable_local_vmclear(cpu);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08002265
2266 /*
2267 * Read loaded_vmcs->cpu should be before fetching
2268 * loaded_vmcs->loaded_vmcss_on_cpu_link.
2269 * See the comments in __loaded_vmcs_clear().
2270 */
2271 smp_rmb();
2272
Nadav Har'Eld462b812011-05-24 15:26:10 +03002273 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2274 &per_cpu(loaded_vmcss_on_cpu, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002275 crash_enable_local_vmclear(cpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002276 local_irq_enable();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002277 }
2278
2279 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2280 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2281 vmcs_load(vmx->loaded_vmcs->vmcs);
2282 }
2283
2284 if (!already_loaded) {
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07002285 void *gdt = get_current_gdt_ro();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07002286 unsigned long sysenter_esp;
2287
2288 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08002289
Avi Kivity6aa8b732006-12-10 02:21:36 -08002290 /*
2291 * Linux uses per-cpu TSS and GDT, so set these when switching
Andy Lutomirskie0c23062017-02-20 08:56:10 -08002292 * processors. See 22.2.4.
Avi Kivity6aa8b732006-12-10 02:21:36 -08002293 */
Andy Lutomirskie0c23062017-02-20 08:56:10 -08002294 vmcs_writel(HOST_TR_BASE,
2295 (unsigned long)this_cpu_ptr(&cpu_tss));
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07002296 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08002297
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002298 /*
2299 * VM exits change the host TR limit to 0x67 after a VM
2300 * exit. This is okay, since 0x67 covers everything except
2301 * the IO bitmap and have have code to handle the IO bitmap
2302 * being lost after a VM exit.
2303 */
2304 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
2305
Avi Kivity6aa8b732006-12-10 02:21:36 -08002306 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2307 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
Haozhong Zhangff2c3a12015-10-20 15:39:10 +08002308
Nadav Har'Eld462b812011-05-24 15:26:10 +03002309 vmx->loaded_vmcs->cpu = cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002310 }
Feng Wu28b835d2015-09-18 22:29:54 +08002311
Owen Hofmann2680d6d2016-03-01 13:36:13 -08002312 /* Setup TSC multiplier */
2313 if (kvm_has_tsc_control &&
Peter Feinerc95ba922016-08-17 09:36:47 -07002314 vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2315 decache_tsc_multiplier(vmx);
Owen Hofmann2680d6d2016-03-01 13:36:13 -08002316
Feng Wu28b835d2015-09-18 22:29:54 +08002317 vmx_vcpu_pi_load(vcpu, cpu);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08002318 vmx->host_pkru = read_pkru();
Wanpeng Li74c55932017-11-29 01:31:20 -08002319 vmx->host_debugctlmsr = get_debugctlmsr();
Feng Wu28b835d2015-09-18 22:29:54 +08002320}
2321
2322static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2323{
2324 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2325
2326 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +08002327 !irq_remapping_cap(IRQ_POSTING_CAP) ||
2328 !kvm_vcpu_apicv_active(vcpu))
Feng Wu28b835d2015-09-18 22:29:54 +08002329 return;
2330
2331 /* Set SN when the vCPU is preempted */
2332 if (vcpu->preempted)
2333 pi_set_sn(pi_desc);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002334}
2335
2336static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2337{
Feng Wu28b835d2015-09-18 22:29:54 +08002338 vmx_vcpu_pi_put(vcpu);
2339
Avi Kivitya9b21b62008-06-24 11:48:49 +03002340 __vmx_load_host_state(to_vmx(vcpu));
Avi Kivity6aa8b732006-12-10 02:21:36 -08002341}
2342
Wanpeng Lif244dee2017-07-20 01:11:54 -07002343static bool emulation_required(struct kvm_vcpu *vcpu)
2344{
2345 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2346}
2347
Avi Kivityedcafe32009-12-30 18:07:40 +02002348static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2349
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03002350/*
2351 * Return the cr0 value that a nested guest would read. This is a combination
2352 * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
2353 * its hypervisor (cr0_read_shadow).
2354 */
2355static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
2356{
2357 return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
2358 (fields->cr0_read_shadow & fields->cr0_guest_host_mask);
2359}
2360static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
2361{
2362 return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
2363 (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
2364}
2365
Avi Kivity6aa8b732006-12-10 02:21:36 -08002366static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2367{
Avi Kivity78ac8b42010-04-08 18:19:35 +03002368 unsigned long rflags, save_rflags;
Avi Kivity345dcaa2009-08-12 15:29:37 +03002369
Avi Kivity6de12732011-03-07 12:51:22 +02002370 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
2371 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
2372 rflags = vmcs_readl(GUEST_RFLAGS);
2373 if (to_vmx(vcpu)->rmode.vm86_active) {
2374 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2375 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
2376 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2377 }
2378 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03002379 }
Avi Kivity6de12732011-03-07 12:51:22 +02002380 return to_vmx(vcpu)->rflags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002381}
2382
2383static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2384{
Wanpeng Lif244dee2017-07-20 01:11:54 -07002385 unsigned long old_rflags = vmx_get_rflags(vcpu);
2386
Avi Kivity6de12732011-03-07 12:51:22 +02002387 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
2388 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03002389 if (to_vmx(vcpu)->rmode.vm86_active) {
2390 to_vmx(vcpu)->rmode.save_rflags = rflags;
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01002391 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity78ac8b42010-04-08 18:19:35 +03002392 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08002393 vmcs_writel(GUEST_RFLAGS, rflags);
Wanpeng Lif244dee2017-07-20 01:11:54 -07002394
2395 if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
2396 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002397}
2398
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02002399static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
Glauber Costa2809f5d2009-05-12 16:21:05 -04002400{
2401 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2402 int ret = 0;
2403
2404 if (interruptibility & GUEST_INTR_STATE_STI)
Jan Kiszka48005f62010-02-19 19:38:07 +01002405 ret |= KVM_X86_SHADOW_INT_STI;
Glauber Costa2809f5d2009-05-12 16:21:05 -04002406 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
Jan Kiszka48005f62010-02-19 19:38:07 +01002407 ret |= KVM_X86_SHADOW_INT_MOV_SS;
Glauber Costa2809f5d2009-05-12 16:21:05 -04002408
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02002409 return ret;
Glauber Costa2809f5d2009-05-12 16:21:05 -04002410}
2411
2412static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
2413{
2414 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2415 u32 interruptibility = interruptibility_old;
2416
2417 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
2418
Jan Kiszka48005f62010-02-19 19:38:07 +01002419 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
Glauber Costa2809f5d2009-05-12 16:21:05 -04002420 interruptibility |= GUEST_INTR_STATE_MOV_SS;
Jan Kiszka48005f62010-02-19 19:38:07 +01002421 else if (mask & KVM_X86_SHADOW_INT_STI)
Glauber Costa2809f5d2009-05-12 16:21:05 -04002422 interruptibility |= GUEST_INTR_STATE_STI;
2423
2424 if ((interruptibility != interruptibility_old))
2425 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
2426}
2427
Avi Kivity6aa8b732006-12-10 02:21:36 -08002428static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
2429{
2430 unsigned long rip;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002431
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002432 rip = kvm_rip_read(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002433 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002434 kvm_rip_write(vcpu, rip);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002435
Glauber Costa2809f5d2009-05-12 16:21:05 -04002436 /* skipping an emulated instruction also counts */
2437 vmx_set_interrupt_shadow(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002438}
2439
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002440static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
2441 unsigned long exit_qual)
2442{
2443 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2444 unsigned int nr = vcpu->arch.exception.nr;
2445 u32 intr_info = nr | INTR_INFO_VALID_MASK;
2446
2447 if (vcpu->arch.exception.has_error_code) {
2448 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
2449 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2450 }
2451
2452 if (kvm_exception_is_soft(nr))
2453 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2454 else
2455 intr_info |= INTR_TYPE_HARD_EXCEPTION;
2456
2457 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
2458 vmx_get_nmi_mask(vcpu))
2459 intr_info |= INTR_INFO_UNBLOCK_NMI;
2460
2461 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
2462}
2463
Nadav Har'El0b6ac342011-05-25 23:13:36 +03002464/*
2465 * KVM wants to inject page-faults which it got to the guest. This function
2466 * checks whether in a nested guest, we need to inject them to L1 or L2.
Nadav Har'El0b6ac342011-05-25 23:13:36 +03002467 */
Wanpeng Libfcf83b2017-08-24 03:35:11 -07002468static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
Nadav Har'El0b6ac342011-05-25 23:13:36 +03002469{
2470 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Liadfe20f2017-07-13 18:30:41 -07002471 unsigned int nr = vcpu->arch.exception.nr;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03002472
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002473 if (nr == PF_VECTOR) {
2474 if (vcpu->arch.exception.nested_apf) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07002475 *exit_qual = vcpu->arch.apf.nested_apf_token;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002476 return 1;
2477 }
2478 /*
2479 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
2480 * The fix is to add the ancillary datum (CR2 or DR6) to structs
2481 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
2482 * can be written only when inject_pending_event runs. This should be
2483 * conditional on a new capability---if the capability is disabled,
2484 * kvm_multiple_exception would write the ancillary information to
2485 * CR2 or DR6, for backwards ABI-compatibility.
2486 */
2487 if (nested_vmx_is_page_fault_vmexit(vmcs12,
2488 vcpu->arch.exception.error_code)) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07002489 *exit_qual = vcpu->arch.cr2;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002490 return 1;
2491 }
2492 } else {
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002493 if (vmcs12->exception_bitmap & (1u << nr)) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07002494 if (nr == DB_VECTOR)
2495 *exit_qual = vcpu->arch.dr6;
2496 else
2497 *exit_qual = 0;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002498 return 1;
2499 }
Wanpeng Liadfe20f2017-07-13 18:30:41 -07002500 }
2501
Paolo Bonzinib96fb432017-07-27 12:29:32 +02002502 return 0;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03002503}
2504
Wanpeng Licfcd20e2017-07-13 18:30:39 -07002505static void vmx_queue_exception(struct kvm_vcpu *vcpu)
Avi Kivity298101d2007-11-25 13:41:11 +02002506{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02002507 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Licfcd20e2017-07-13 18:30:39 -07002508 unsigned nr = vcpu->arch.exception.nr;
2509 bool has_error_code = vcpu->arch.exception.has_error_code;
Wanpeng Licfcd20e2017-07-13 18:30:39 -07002510 u32 error_code = vcpu->arch.exception.error_code;
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01002511 u32 intr_info = nr | INTR_INFO_VALID_MASK;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02002512
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01002513 if (has_error_code) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02002514 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01002515 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2516 }
Jan Kiszka77ab6db2008-07-14 12:28:51 +02002517
Avi Kivity7ffd92c2009-06-09 14:10:45 +03002518 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05002519 int inc_eip = 0;
2520 if (kvm_exception_is_soft(nr))
2521 inc_eip = vcpu->arch.event_exit_inst_len;
2522 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02002523 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka77ab6db2008-07-14 12:28:51 +02002524 return;
2525 }
2526
Gleb Natapov66fd3f72009-05-11 13:35:50 +03002527 if (kvm_exception_is_soft(nr)) {
2528 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2529 vmx->vcpu.arch.event_exit_inst_len);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01002530 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2531 } else
2532 intr_info |= INTR_TYPE_HARD_EXCEPTION;
2533
2534 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
Avi Kivity298101d2007-11-25 13:41:11 +02002535}
2536
Sheng Yang4e47c7a2009-12-18 16:48:47 +08002537static bool vmx_rdtscp_supported(void)
2538{
2539 return cpu_has_vmx_rdtscp();
2540}
2541
Mao, Junjiead756a12012-07-02 01:18:48 +00002542static bool vmx_invpcid_supported(void)
2543{
2544 return cpu_has_vmx_invpcid() && enable_ept;
2545}
2546
Avi Kivity6aa8b732006-12-10 02:21:36 -08002547/*
Eddie Donga75beee2007-05-17 18:55:15 +03002548 * Swap MSR entry in host/guest MSR entry array.
2549 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10002550static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
Eddie Donga75beee2007-05-17 18:55:15 +03002551{
Avi Kivity26bb0982009-09-07 11:14:12 +03002552 struct shared_msr_entry tmp;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002553
2554 tmp = vmx->guest_msrs[to];
2555 vmx->guest_msrs[to] = vmx->guest_msrs[from];
2556 vmx->guest_msrs[from] = tmp;
Eddie Donga75beee2007-05-17 18:55:15 +03002557}
2558
Yang Zhang8d146952013-01-25 10:18:50 +08002559static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
2560{
2561 unsigned long *msr_bitmap;
2562
Wincy Van670125b2015-03-04 14:31:56 +08002563 if (is_guest_mode(vcpu))
Radim Krčmářd048c092016-08-08 20:16:22 +02002564 msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
Roman Kagan3ce424e2016-05-18 17:48:20 +03002565 else if (cpu_has_secondary_exec_ctrls() &&
2566 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
2567 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
Wanpeng Lif6e90f92016-09-22 07:43:25 +08002568 if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
2569 if (is_long_mode(vcpu))
Wanpeng Lic63e4562016-09-23 19:17:16 +08002570 msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
2571 else
2572 msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
2573 } else {
2574 if (is_long_mode(vcpu))
Wanpeng Lif6e90f92016-09-22 07:43:25 +08002575 msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
2576 else
2577 msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
Wanpeng Lif6e90f92016-09-22 07:43:25 +08002578 }
Yang Zhang8d146952013-01-25 10:18:50 +08002579 } else {
2580 if (is_long_mode(vcpu))
2581 msr_bitmap = vmx_msr_bitmap_longmode;
2582 else
2583 msr_bitmap = vmx_msr_bitmap_legacy;
2584 }
2585
2586 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
2587}
2588
Eddie Donga75beee2007-05-17 18:55:15 +03002589/*
Avi Kivitye38aea32007-04-19 13:22:48 +03002590 * Set up the vmcs to automatically save and restore system
2591 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
2592 * mode, as fiddling with msrs is very expensive.
2593 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10002594static void setup_msrs(struct vcpu_vmx *vmx)
Avi Kivitye38aea32007-04-19 13:22:48 +03002595{
Avi Kivity26bb0982009-09-07 11:14:12 +03002596 int save_nmsrs, index;
Avi Kivitye38aea32007-04-19 13:22:48 +03002597
Eddie Donga75beee2007-05-17 18:55:15 +03002598 save_nmsrs = 0;
Avi Kivity4d56c8a2007-04-19 14:28:44 +03002599#ifdef CONFIG_X86_64
Rusty Russell8b9cf982007-07-30 16:31:43 +10002600 if (is_long_mode(&vmx->vcpu)) {
Rusty Russell8b9cf982007-07-30 16:31:43 +10002601 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
Eddie Donga75beee2007-05-17 18:55:15 +03002602 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10002603 move_msr_up(vmx, index, save_nmsrs++);
2604 index = __find_msr_index(vmx, MSR_LSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03002605 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10002606 move_msr_up(vmx, index, save_nmsrs++);
2607 index = __find_msr_index(vmx, MSR_CSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03002608 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10002609 move_msr_up(vmx, index, save_nmsrs++);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08002610 index = __find_msr_index(vmx, MSR_TSC_AUX);
Radim Krčmářd6321d42017-08-05 00:12:49 +02002611 if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08002612 move_msr_up(vmx, index, save_nmsrs++);
Eddie Donga75beee2007-05-17 18:55:15 +03002613 /*
Brian Gerst8c065852010-07-17 09:03:26 -04002614 * MSR_STAR is only needed on long mode guests, and only
Eddie Donga75beee2007-05-17 18:55:15 +03002615 * if efer.sce is enabled.
2616 */
Brian Gerst8c065852010-07-17 09:03:26 -04002617 index = __find_msr_index(vmx, MSR_STAR);
Avi Kivityf6801df2010-01-21 15:31:50 +02002618 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
Rusty Russell8b9cf982007-07-30 16:31:43 +10002619 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03002620 }
Eddie Donga75beee2007-05-17 18:55:15 +03002621#endif
Avi Kivity92c0d902009-10-29 11:00:16 +02002622 index = __find_msr_index(vmx, MSR_EFER);
2623 if (index >= 0 && update_transition_efer(vmx, index))
Avi Kivity26bb0982009-09-07 11:14:12 +03002624 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03002625
Avi Kivity26bb0982009-09-07 11:14:12 +03002626 vmx->save_nmsrs = save_nmsrs;
Avi Kivity58972972009-02-24 22:26:47 +02002627
Yang Zhang8d146952013-01-25 10:18:50 +08002628 if (cpu_has_vmx_msr_bitmap())
2629 vmx_set_msr_bitmap(&vmx->vcpu);
Avi Kivitye38aea32007-04-19 13:22:48 +03002630}
2631
2632/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08002633 * reads and returns guest's timestamp counter "register"
Haozhong Zhangbe7b2632015-10-20 15:39:11 +08002634 * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
2635 * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
Avi Kivity6aa8b732006-12-10 02:21:36 -08002636 */
Haozhong Zhangbe7b2632015-10-20 15:39:11 +08002637static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002638{
2639 u64 host_tsc, tsc_offset;
2640
Andy Lutomirski4ea16362015-06-25 18:44:07 +02002641 host_tsc = rdtsc();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002642 tsc_offset = vmcs_read64(TSC_OFFSET);
Haozhong Zhangbe7b2632015-10-20 15:39:11 +08002643 return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002644}
2645
2646/*
Zachary Amsden99e3e302010-08-19 22:07:17 -10002647 * writes 'offset' into guest's timestamp counter offset register
Avi Kivity6aa8b732006-12-10 02:21:36 -08002648 */
Zachary Amsden99e3e302010-08-19 22:07:17 -10002649static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002650{
Nadav Har'El27fc51b2011-08-02 15:54:52 +03002651 if (is_guest_mode(vcpu)) {
Nadav Har'El79918252011-05-25 23:15:39 +03002652 /*
Nadav Har'El27fc51b2011-08-02 15:54:52 +03002653 * We're here if L1 chose not to trap WRMSR to TSC. According
2654 * to the spec, this should set L1's TSC; The offset that L1
2655 * set for L2 remains unchanged, and still needs to be added
2656 * to the newly set TSC to get L2's TSC.
Nadav Har'El79918252011-05-25 23:15:39 +03002657 */
Nadav Har'El27fc51b2011-08-02 15:54:52 +03002658 struct vmcs12 *vmcs12;
Nadav Har'El27fc51b2011-08-02 15:54:52 +03002659 /* recalculate vmcs02.TSC_OFFSET: */
2660 vmcs12 = get_vmcs12(vcpu);
2661 vmcs_write64(TSC_OFFSET, offset +
2662 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
2663 vmcs12->tsc_offset : 0));
2664 } else {
Yoshihiro YUNOMAE489223e2013-06-12 16:43:44 +09002665 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2666 vmcs_read64(TSC_OFFSET), offset);
Nadav Har'El27fc51b2011-08-02 15:54:52 +03002667 vmcs_write64(TSC_OFFSET, offset);
2668 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08002669}
2670
Nadav Har'El801d3422011-05-25 23:02:23 +03002671/*
2672 * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
2673 * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
2674 * all guests if the "nested" module option is off, and can also be disabled
2675 * for a single guest by disabling its VMX cpuid bit.
2676 */
2677static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
2678{
Radim Krčmářd6321d42017-08-05 00:12:49 +02002679 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
Nadav Har'El801d3422011-05-25 23:02:23 +03002680}
2681
Avi Kivity6aa8b732006-12-10 02:21:36 -08002682/*
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002683 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
2684 * returned for the various VMX controls MSRs when nested VMX is enabled.
2685 * The same values should also be used to verify that vmcs12 control fields are
2686 * valid during nested entry from L1 to L2.
2687 * Each of these control msrs has a low and high 32-bit half: A low bit is on
2688 * if the corresponding bit in the (32-bit) control field *must* be on, and a
2689 * bit in the high half is on if the corresponding bit in the control field
2690 * may be on. See also vmx_control_verify().
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002691 */
Wincy Vanb9c237b2015-02-03 23:56:30 +08002692static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002693{
2694 /*
2695 * Note that as a general rule, the high half of the MSRs (bits in
2696 * the control fields which may be 1) should be initialized by the
2697 * intersection of the underlying hardware's MSR (i.e., features which
2698 * can be supported) and the list of features we want to expose -
2699 * because they are known to be properly supported in our code.
2700 * Also, usually, the low half of the MSRs (bits which must be 1) can
2701 * be set to 0, meaning that L1 may turn off any of these bits. The
2702 * reason is that if one of these bits is necessary, it will appear
2703 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
2704 * fields of vmcs01 and vmcs02, will turn these bits off - and
Paolo Bonzini7313c692017-07-27 10:31:25 +02002705 * nested_vmx_exit_reflected() will not pass related exits to L1.
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002706 * These rules have exceptions below.
2707 */
2708
2709 /* pin-based controls */
Jan Kiszkaeabeaac2013-03-13 11:30:50 +01002710 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
Wincy Vanb9c237b2015-02-03 23:56:30 +08002711 vmx->nested.nested_vmx_pinbased_ctls_low,
2712 vmx->nested.nested_vmx_pinbased_ctls_high);
2713 vmx->nested.nested_vmx_pinbased_ctls_low |=
2714 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2715 vmx->nested.nested_vmx_pinbased_ctls_high &=
2716 PIN_BASED_EXT_INTR_MASK |
2717 PIN_BASED_NMI_EXITING |
2718 PIN_BASED_VIRTUAL_NMIS;
2719 vmx->nested.nested_vmx_pinbased_ctls_high |=
2720 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka0238ea92013-03-13 11:31:24 +01002721 PIN_BASED_VMX_PREEMPTION_TIMER;
Andrey Smetanind62caab2015-11-10 15:36:33 +03002722 if (kvm_vcpu_apicv_active(&vmx->vcpu))
Wincy Van705699a2015-02-03 23:58:17 +08002723 vmx->nested.nested_vmx_pinbased_ctls_high |=
2724 PIN_BASED_POSTED_INTR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002725
Jan Kiszka3dbcd8d2014-06-16 13:59:40 +02002726 /* exit controls */
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08002727 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
Wincy Vanb9c237b2015-02-03 23:56:30 +08002728 vmx->nested.nested_vmx_exit_ctls_low,
2729 vmx->nested.nested_vmx_exit_ctls_high);
2730 vmx->nested.nested_vmx_exit_ctls_low =
2731 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Bandan Dase0ba1a62014-04-19 18:17:46 -04002732
Wincy Vanb9c237b2015-02-03 23:56:30 +08002733 vmx->nested.nested_vmx_exit_ctls_high &=
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002734#ifdef CONFIG_X86_64
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08002735 VM_EXIT_HOST_ADDR_SPACE_SIZE |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002736#endif
Jan Kiszkaf41245002014-03-07 20:03:13 +01002737 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
Wincy Vanb9c237b2015-02-03 23:56:30 +08002738 vmx->nested.nested_vmx_exit_ctls_high |=
2739 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszkaf41245002014-03-07 20:03:13 +01002740 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
Bandan Dase0ba1a62014-04-19 18:17:46 -04002741 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
2742
Paolo Bonzinia87036a2016-03-08 09:52:13 +01002743 if (kvm_mpx_supported())
Wincy Vanb9c237b2015-02-03 23:56:30 +08002744 vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002745
Jan Kiszka2996fca2014-06-16 13:59:43 +02002746 /* We support free control of debug control saving. */
David Matlack0115f9c2016-11-29 18:14:06 -08002747 vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02002748
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002749 /* entry controls */
2750 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
Wincy Vanb9c237b2015-02-03 23:56:30 +08002751 vmx->nested.nested_vmx_entry_ctls_low,
2752 vmx->nested.nested_vmx_entry_ctls_high);
2753 vmx->nested.nested_vmx_entry_ctls_low =
2754 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
2755 vmx->nested.nested_vmx_entry_ctls_high &=
Jan Kiszka57435342013-08-06 10:39:56 +02002756#ifdef CONFIG_X86_64
2757 VM_ENTRY_IA32E_MODE |
2758#endif
2759 VM_ENTRY_LOAD_IA32_PAT;
Wincy Vanb9c237b2015-02-03 23:56:30 +08002760 vmx->nested.nested_vmx_entry_ctls_high |=
2761 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
Paolo Bonzinia87036a2016-03-08 09:52:13 +01002762 if (kvm_mpx_supported())
Wincy Vanb9c237b2015-02-03 23:56:30 +08002763 vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
Jan Kiszka57435342013-08-06 10:39:56 +02002764
Jan Kiszka2996fca2014-06-16 13:59:43 +02002765 /* We support free control of debug control loading. */
David Matlack0115f9c2016-11-29 18:14:06 -08002766 vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02002767
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002768 /* cpu-based controls */
2769 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
Wincy Vanb9c237b2015-02-03 23:56:30 +08002770 vmx->nested.nested_vmx_procbased_ctls_low,
2771 vmx->nested.nested_vmx_procbased_ctls_high);
2772 vmx->nested.nested_vmx_procbased_ctls_low =
2773 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2774 vmx->nested.nested_vmx_procbased_ctls_high &=
Jan Kiszkaa294c9b2013-10-23 17:43:09 +01002775 CPU_BASED_VIRTUAL_INTR_PENDING |
2776 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002777 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
2778 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
2779 CPU_BASED_CR3_STORE_EXITING |
2780#ifdef CONFIG_X86_64
2781 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
2782#endif
2783 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03002784 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
2785 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
2786 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
2787 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002788 /*
2789 * We can allow some features even when not supported by the
2790 * hardware. For example, L1 can specify an MSR bitmap - and we
2791 * can use it to avoid exits to L1 - even when L0 runs L2
2792 * without MSR bitmaps.
2793 */
Wincy Vanb9c237b2015-02-03 23:56:30 +08002794 vmx->nested.nested_vmx_procbased_ctls_high |=
2795 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka560b7ee2014-06-16 13:59:42 +02002796 CPU_BASED_USE_MSR_BITMAPS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002797
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02002798 /* We support free control of CR3 access interception. */
David Matlack0115f9c2016-11-29 18:14:06 -08002799 vmx->nested.nested_vmx_procbased_ctls_low &=
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02002800 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
2801
Paolo Bonzini80154d72017-08-24 13:55:35 +02002802 /*
2803 * secondary cpu-based controls. Do not include those that
2804 * depend on CPUID bits, they are added later by vmx_cpuid_update.
2805 */
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002806 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
Wincy Vanb9c237b2015-02-03 23:56:30 +08002807 vmx->nested.nested_vmx_secondary_ctls_low,
2808 vmx->nested.nested_vmx_secondary_ctls_high);
2809 vmx->nested.nested_vmx_secondary_ctls_low = 0;
2810 vmx->nested.nested_vmx_secondary_ctls_high &=
Jan Kiszkad6851fb2013-02-23 22:34:39 +01002811 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini1b073042016-10-25 16:06:30 +02002812 SECONDARY_EXEC_DESC |
Wincy Vanf2b93282015-02-03 23:56:03 +08002813 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Wincy Van82f0dd42015-02-03 23:57:18 +08002814 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Wincy Van608406e2015-02-03 23:57:51 +08002815 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini3db13482017-08-24 14:48:03 +02002816 SECONDARY_EXEC_WBINVD_EXITING;
Jan Kiszkac18911a2013-03-13 16:06:41 +01002817
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02002818 if (enable_ept) {
2819 /* nested EPT: emulate EPT also to L1 */
Wincy Vanb9c237b2015-02-03 23:56:30 +08002820 vmx->nested.nested_vmx_secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01002821 SECONDARY_EXEC_ENABLE_EPT;
Wincy Vanb9c237b2015-02-03 23:56:30 +08002822 vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01002823 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
Bandan Das02120c42016-07-12 18:18:52 -04002824 if (cpu_has_vmx_ept_execute_only())
2825 vmx->nested.nested_vmx_ept_caps |=
2826 VMX_EPT_EXECUTE_ONLY_BIT;
Wincy Vanb9c237b2015-02-03 23:56:30 +08002827 vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
Bandan Das45e11812016-08-02 16:32:36 -04002828 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01002829 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
2830 VMX_EPT_1GB_PAGE_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04002831 if (enable_ept_ad_bits) {
2832 vmx->nested.nested_vmx_secondary_ctls_high |=
2833 SECONDARY_EXEC_ENABLE_PML;
Dan Carpenter7461fbc2017-05-18 10:41:15 +03002834 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04002835 }
David Hildenbrand1c13bff2017-08-24 20:51:33 +02002836 }
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02002837
Bandan Das27c42a12017-08-03 15:54:42 -04002838 if (cpu_has_vmx_vmfunc()) {
2839 vmx->nested.nested_vmx_secondary_ctls_high |=
2840 SECONDARY_EXEC_ENABLE_VMFUNC;
Bandan Das41ab9372017-08-03 15:54:43 -04002841 /*
2842 * Advertise EPTP switching unconditionally
2843 * since we emulate it
2844 */
Wanpeng Li575b3a22017-10-19 07:00:34 +08002845 if (enable_ept)
2846 vmx->nested.nested_vmx_vmfunc_controls =
2847 VMX_VMFUNC_EPTP_SWITCHING;
Bandan Das27c42a12017-08-03 15:54:42 -04002848 }
2849
Paolo Bonzinief697a72016-03-18 16:58:38 +01002850 /*
2851 * Old versions of KVM use the single-context version without
2852 * checking for support, so declare that it is supported even
2853 * though it is treated as global context. The alternative is
2854 * not failing the single-context invvpid, and it is worse.
2855 */
Wanpeng Li63cb6d52017-03-20 21:18:53 -07002856 if (enable_vpid) {
2857 vmx->nested.nested_vmx_secondary_ctls_high |=
2858 SECONDARY_EXEC_ENABLE_VPID;
Wanpeng Li089d7b62015-10-13 09:18:37 -07002859 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
Jan Dakinevichbcdde302016-10-28 07:00:30 +03002860 VMX_VPID_EXTENT_SUPPORTED_MASK;
David Hildenbrand1c13bff2017-08-24 20:51:33 +02002861 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07002862
Radim Krčmář0790ec12015-03-17 14:02:32 +01002863 if (enable_unrestricted_guest)
2864 vmx->nested.nested_vmx_secondary_ctls_high |=
2865 SECONDARY_EXEC_UNRESTRICTED_GUEST;
2866
Jan Kiszkac18911a2013-03-13 16:06:41 +01002867 /* miscellaneous data */
Wincy Vanb9c237b2015-02-03 23:56:30 +08002868 rdmsr(MSR_IA32_VMX_MISC,
2869 vmx->nested.nested_vmx_misc_low,
2870 vmx->nested.nested_vmx_misc_high);
2871 vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
2872 vmx->nested.nested_vmx_misc_low |=
2873 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Jan Kiszkaf41245002014-03-07 20:03:13 +01002874 VMX_MISC_ACTIVITY_HLT;
Wincy Vanb9c237b2015-02-03 23:56:30 +08002875 vmx->nested.nested_vmx_misc_high = 0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08002876
2877 /*
2878 * This MSR reports some information about VMX support. We
2879 * should return information about the VMX we emulate for the
2880 * guest, and the VMCS structure we give it - not about the
2881 * VMX support of the underlying hardware.
2882 */
2883 vmx->nested.nested_vmx_basic =
2884 VMCS12_REVISION |
2885 VMX_BASIC_TRUE_CTLS |
2886 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
2887 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
2888
2889 if (cpu_has_vmx_basic_inout())
2890 vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
2891
2892 /*
David Matlack8322ebb2016-11-29 18:14:09 -08002893 * These MSRs specify bits which the guest must keep fixed on
David Matlack62cc6b9d2016-11-29 18:14:07 -08002894 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
2895 * We picked the standard core2 setting.
2896 */
2897#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2898#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
2899 vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
David Matlack62cc6b9d2016-11-29 18:14:07 -08002900 vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
David Matlack8322ebb2016-11-29 18:14:09 -08002901
2902 /* These MSRs specify bits which the guest must keep fixed off. */
2903 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
2904 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
David Matlack62cc6b9d2016-11-29 18:14:07 -08002905
2906 /* highest index: VMX_PREEMPTION_TIMER_VALUE */
2907 vmx->nested.nested_vmx_vmcs_enum = 0x2e;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002908}
2909
David Matlack38991522016-11-29 18:14:08 -08002910/*
2911 * if fixed0[i] == 1: val[i] must be 1
2912 * if fixed1[i] == 0: val[i] must be 0
2913 */
2914static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
2915{
2916 return ((val & fixed1) | fixed0) == val;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002917}
2918
2919static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
2920{
David Matlack38991522016-11-29 18:14:08 -08002921 return fixed_bits_valid(control, low, high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03002922}
2923
2924static inline u64 vmx_control_msr(u32 low, u32 high)
2925{
2926 return low | ((u64)high << 32);
2927}
2928
David Matlack62cc6b9d2016-11-29 18:14:07 -08002929static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
2930{
2931 superset &= mask;
2932 subset &= mask;
2933
2934 return (superset | subset) == superset;
2935}
2936
2937static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
2938{
2939 const u64 feature_and_reserved =
2940 /* feature (except bit 48; see below) */
2941 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
2942 /* reserved */
2943 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
2944 u64 vmx_basic = vmx->nested.nested_vmx_basic;
2945
2946 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
2947 return -EINVAL;
2948
2949 /*
2950 * KVM does not emulate a version of VMX that constrains physical
2951 * addresses of VMX structures (e.g. VMCS) to 32-bits.
2952 */
2953 if (data & BIT_ULL(48))
2954 return -EINVAL;
2955
2956 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
2957 vmx_basic_vmcs_revision_id(data))
2958 return -EINVAL;
2959
2960 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
2961 return -EINVAL;
2962
2963 vmx->nested.nested_vmx_basic = data;
2964 return 0;
2965}
2966
2967static int
2968vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
2969{
2970 u64 supported;
2971 u32 *lowp, *highp;
2972
2973 switch (msr_index) {
2974 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2975 lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
2976 highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
2977 break;
2978 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2979 lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
2980 highp = &vmx->nested.nested_vmx_procbased_ctls_high;
2981 break;
2982 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2983 lowp = &vmx->nested.nested_vmx_exit_ctls_low;
2984 highp = &vmx->nested.nested_vmx_exit_ctls_high;
2985 break;
2986 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2987 lowp = &vmx->nested.nested_vmx_entry_ctls_low;
2988 highp = &vmx->nested.nested_vmx_entry_ctls_high;
2989 break;
2990 case MSR_IA32_VMX_PROCBASED_CTLS2:
2991 lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
2992 highp = &vmx->nested.nested_vmx_secondary_ctls_high;
2993 break;
2994 default:
2995 BUG();
2996 }
2997
2998 supported = vmx_control_msr(*lowp, *highp);
2999
3000 /* Check must-be-1 bits are still 1. */
3001 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
3002 return -EINVAL;
3003
3004 /* Check must-be-0 bits are still 0. */
3005 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
3006 return -EINVAL;
3007
3008 *lowp = data;
3009 *highp = data >> 32;
3010 return 0;
3011}
3012
3013static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
3014{
3015 const u64 feature_and_reserved_bits =
3016 /* feature */
3017 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
3018 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
3019 /* reserved */
3020 GENMASK_ULL(13, 9) | BIT_ULL(31);
3021 u64 vmx_misc;
3022
3023 vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
3024 vmx->nested.nested_vmx_misc_high);
3025
3026 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
3027 return -EINVAL;
3028
3029 if ((vmx->nested.nested_vmx_pinbased_ctls_high &
3030 PIN_BASED_VMX_PREEMPTION_TIMER) &&
3031 vmx_misc_preemption_timer_rate(data) !=
3032 vmx_misc_preemption_timer_rate(vmx_misc))
3033 return -EINVAL;
3034
3035 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
3036 return -EINVAL;
3037
3038 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
3039 return -EINVAL;
3040
3041 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
3042 return -EINVAL;
3043
3044 vmx->nested.nested_vmx_misc_low = data;
3045 vmx->nested.nested_vmx_misc_high = data >> 32;
3046 return 0;
3047}
3048
3049static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
3050{
3051 u64 vmx_ept_vpid_cap;
3052
3053 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
3054 vmx->nested.nested_vmx_vpid_caps);
3055
3056 /* Every bit is either reserved or a feature bit. */
3057 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
3058 return -EINVAL;
3059
3060 vmx->nested.nested_vmx_ept_caps = data;
3061 vmx->nested.nested_vmx_vpid_caps = data >> 32;
3062 return 0;
3063}
3064
3065static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3066{
3067 u64 *msr;
3068
3069 switch (msr_index) {
3070 case MSR_IA32_VMX_CR0_FIXED0:
3071 msr = &vmx->nested.nested_vmx_cr0_fixed0;
3072 break;
3073 case MSR_IA32_VMX_CR4_FIXED0:
3074 msr = &vmx->nested.nested_vmx_cr4_fixed0;
3075 break;
3076 default:
3077 BUG();
3078 }
3079
3080 /*
3081 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
3082 * must be 1 in the restored value.
3083 */
3084 if (!is_bitwise_subset(data, *msr, -1ULL))
3085 return -EINVAL;
3086
3087 *msr = data;
3088 return 0;
3089}
3090
3091/*
3092 * Called when userspace is restoring VMX MSRs.
3093 *
3094 * Returns 0 on success, non-0 otherwise.
3095 */
3096static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
3097{
3098 struct vcpu_vmx *vmx = to_vmx(vcpu);
3099
3100 switch (msr_index) {
3101 case MSR_IA32_VMX_BASIC:
3102 return vmx_restore_vmx_basic(vmx, data);
3103 case MSR_IA32_VMX_PINBASED_CTLS:
3104 case MSR_IA32_VMX_PROCBASED_CTLS:
3105 case MSR_IA32_VMX_EXIT_CTLS:
3106 case MSR_IA32_VMX_ENTRY_CTLS:
3107 /*
3108 * The "non-true" VMX capability MSRs are generated from the
3109 * "true" MSRs, so we do not support restoring them directly.
3110 *
3111 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
3112 * should restore the "true" MSRs with the must-be-1 bits
3113 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3114 * DEFAULT SETTINGS".
3115 */
3116 return -EINVAL;
3117 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3118 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3119 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3120 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3121 case MSR_IA32_VMX_PROCBASED_CTLS2:
3122 return vmx_restore_control_msr(vmx, msr_index, data);
3123 case MSR_IA32_VMX_MISC:
3124 return vmx_restore_vmx_misc(vmx, data);
3125 case MSR_IA32_VMX_CR0_FIXED0:
3126 case MSR_IA32_VMX_CR4_FIXED0:
3127 return vmx_restore_fixed0_msr(vmx, msr_index, data);
3128 case MSR_IA32_VMX_CR0_FIXED1:
3129 case MSR_IA32_VMX_CR4_FIXED1:
3130 /*
3131 * These MSRs are generated based on the vCPU's CPUID, so we
3132 * do not support restoring them directly.
3133 */
3134 return -EINVAL;
3135 case MSR_IA32_VMX_EPT_VPID_CAP:
3136 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
3137 case MSR_IA32_VMX_VMCS_ENUM:
3138 vmx->nested.nested_vmx_vmcs_enum = data;
3139 return 0;
3140 default:
3141 /*
3142 * The rest of the VMX capability MSRs do not support restore.
3143 */
3144 return -EINVAL;
3145 }
3146}
3147
Jan Kiszkacae50132014-01-04 18:47:22 +01003148/* Returns 0 on success, non-0 otherwise. */
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003149static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
3150{
Wincy Vanb9c237b2015-02-03 23:56:30 +08003151 struct vcpu_vmx *vmx = to_vmx(vcpu);
3152
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003153 switch (msr_index) {
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003154 case MSR_IA32_VMX_BASIC:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003155 *pdata = vmx->nested.nested_vmx_basic;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003156 break;
3157 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3158 case MSR_IA32_VMX_PINBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003159 *pdata = vmx_control_msr(
3160 vmx->nested.nested_vmx_pinbased_ctls_low,
3161 vmx->nested.nested_vmx_pinbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003162 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
3163 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003164 break;
3165 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3166 case MSR_IA32_VMX_PROCBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003167 *pdata = vmx_control_msr(
3168 vmx->nested.nested_vmx_procbased_ctls_low,
3169 vmx->nested.nested_vmx_procbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003170 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
3171 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003172 break;
3173 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3174 case MSR_IA32_VMX_EXIT_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003175 *pdata = vmx_control_msr(
3176 vmx->nested.nested_vmx_exit_ctls_low,
3177 vmx->nested.nested_vmx_exit_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003178 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
3179 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003180 break;
3181 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3182 case MSR_IA32_VMX_ENTRY_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003183 *pdata = vmx_control_msr(
3184 vmx->nested.nested_vmx_entry_ctls_low,
3185 vmx->nested.nested_vmx_entry_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003186 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
3187 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003188 break;
3189 case MSR_IA32_VMX_MISC:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003190 *pdata = vmx_control_msr(
3191 vmx->nested.nested_vmx_misc_low,
3192 vmx->nested.nested_vmx_misc_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003193 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003194 case MSR_IA32_VMX_CR0_FIXED0:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003195 *pdata = vmx->nested.nested_vmx_cr0_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003196 break;
3197 case MSR_IA32_VMX_CR0_FIXED1:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003198 *pdata = vmx->nested.nested_vmx_cr0_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003199 break;
3200 case MSR_IA32_VMX_CR4_FIXED0:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003201 *pdata = vmx->nested.nested_vmx_cr4_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003202 break;
3203 case MSR_IA32_VMX_CR4_FIXED1:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003204 *pdata = vmx->nested.nested_vmx_cr4_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003205 break;
3206 case MSR_IA32_VMX_VMCS_ENUM:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003207 *pdata = vmx->nested.nested_vmx_vmcs_enum;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003208 break;
3209 case MSR_IA32_VMX_PROCBASED_CTLS2:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003210 *pdata = vmx_control_msr(
3211 vmx->nested.nested_vmx_secondary_ctls_low,
3212 vmx->nested.nested_vmx_secondary_ctls_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003213 break;
3214 case MSR_IA32_VMX_EPT_VPID_CAP:
Wanpeng Li089d7b62015-10-13 09:18:37 -07003215 *pdata = vmx->nested.nested_vmx_ept_caps |
3216 ((u64)vmx->nested.nested_vmx_vpid_caps << 32);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003217 break;
Bandan Das27c42a12017-08-03 15:54:42 -04003218 case MSR_IA32_VMX_VMFUNC:
3219 *pdata = vmx->nested.nested_vmx_vmfunc_controls;
3220 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003221 default:
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003222 return 1;
Nadav Har'Elb3897a42013-07-08 19:12:35 +08003223 }
3224
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003225 return 0;
3226}
3227
Haozhong Zhang37e4c992016-06-22 14:59:55 +08003228static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
3229 uint64_t val)
3230{
3231 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
3232
3233 return !(val & ~valid_bits);
3234}
3235
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003236/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08003237 * Reads an msr value (of 'msr_index') into 'pdata'.
3238 * Returns 0 on success, non-0 otherwise.
3239 * Assumes vcpu_load() was already called.
3240 */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003241static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003242{
Avi Kivity26bb0982009-09-07 11:14:12 +03003243 struct shared_msr_entry *msr;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003244
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003245 switch (msr_info->index) {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08003246#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08003247 case MSR_FS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003248 msr_info->data = vmcs_readl(GUEST_FS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003249 break;
3250 case MSR_GS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003251 msr_info->data = vmcs_readl(GUEST_GS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003252 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03003253 case MSR_KERNEL_GS_BASE:
3254 vmx_load_host_state(to_vmx(vcpu));
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003255 msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
Avi Kivity44ea2b12009-09-06 15:55:37 +03003256 break;
Avi Kivity26bb0982009-09-07 11:14:12 +03003257#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -08003258 case MSR_EFER:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003259 return kvm_get_msr_common(vcpu, msr_info);
Jaswinder Singh Rajputaf24a4e2009-05-15 18:42:05 +05303260 case MSR_IA32_TSC:
Haozhong Zhangbe7b2632015-10-20 15:39:11 +08003261 msr_info->data = guest_read_tsc(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003262 break;
3263 case MSR_IA32_SYSENTER_CS:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003264 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003265 break;
3266 case MSR_IA32_SYSENTER_EIP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003267 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003268 break;
3269 case MSR_IA32_SYSENTER_ESP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003270 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003271 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00003272 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08003273 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02003274 (!msr_info->host_initiated &&
3275 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01003276 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003277 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00003278 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08003279 case MSR_IA32_MCG_EXT_CTL:
3280 if (!msr_info->host_initiated &&
3281 !(to_vmx(vcpu)->msr_ia32_feature_control &
3282 FEATURE_CONTROL_LMCE))
Jan Kiszkacae50132014-01-04 18:47:22 +01003283 return 1;
Ashok Rajc45dcc72016-06-22 14:59:56 +08003284 msr_info->data = vcpu->arch.mcg_ext_ctl;
3285 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01003286 case MSR_IA32_FEATURE_CONTROL:
Haozhong Zhang3b840802016-06-22 14:59:54 +08003287 msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
Jan Kiszkacae50132014-01-04 18:47:22 +01003288 break;
3289 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3290 if (!nested_vmx_allowed(vcpu))
3291 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003292 return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
Wanpeng Li20300092014-12-02 19:14:59 +08003293 case MSR_IA32_XSS:
3294 if (!vmx_xsaves_supported())
3295 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003296 msr_info->data = vcpu->arch.ia32_xss;
Wanpeng Li20300092014-12-02 19:14:59 +08003297 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003298 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02003299 if (!msr_info->host_initiated &&
3300 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003301 return 1;
3302 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003303 default:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003304 msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08003305 if (msr) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003306 msr_info->data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08003307 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003308 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +02003309 return kvm_get_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003310 }
3311
Avi Kivity6aa8b732006-12-10 02:21:36 -08003312 return 0;
3313}
3314
Jan Kiszkacae50132014-01-04 18:47:22 +01003315static void vmx_leave_nested(struct kvm_vcpu *vcpu);
3316
Avi Kivity6aa8b732006-12-10 02:21:36 -08003317/*
3318 * Writes msr value into into the appropriate "register".
3319 * Returns 0 on success, non-0 otherwise.
3320 * Assumes vcpu_load() was already called.
3321 */
Will Auld8fe8ab42012-11-29 12:42:12 -08003322static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003323{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003324 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03003325 struct shared_msr_entry *msr;
Eddie Dong2cc51562007-05-21 07:28:09 +03003326 int ret = 0;
Will Auld8fe8ab42012-11-29 12:42:12 -08003327 u32 msr_index = msr_info->index;
3328 u64 data = msr_info->data;
Eddie Dong2cc51562007-05-21 07:28:09 +03003329
Avi Kivity6aa8b732006-12-10 02:21:36 -08003330 switch (msr_index) {
Avi Kivity3bab1f52006-12-29 16:49:48 -08003331 case MSR_EFER:
Will Auld8fe8ab42012-11-29 12:42:12 -08003332 ret = kvm_set_msr_common(vcpu, msr_info);
Eddie Dong2cc51562007-05-21 07:28:09 +03003333 break;
Avi Kivity16175a72009-03-23 22:13:44 +02003334#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08003335 case MSR_FS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03003336 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003337 vmcs_writel(GUEST_FS_BASE, data);
3338 break;
3339 case MSR_GS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03003340 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003341 vmcs_writel(GUEST_GS_BASE, data);
3342 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03003343 case MSR_KERNEL_GS_BASE:
3344 vmx_load_host_state(vmx);
3345 vmx->msr_guest_kernel_gs_base = data;
3346 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003347#endif
3348 case MSR_IA32_SYSENTER_CS:
3349 vmcs_write32(GUEST_SYSENTER_CS, data);
3350 break;
3351 case MSR_IA32_SYSENTER_EIP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02003352 vmcs_writel(GUEST_SYSENTER_EIP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003353 break;
3354 case MSR_IA32_SYSENTER_ESP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02003355 vmcs_writel(GUEST_SYSENTER_ESP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003356 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00003357 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08003358 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02003359 (!msr_info->host_initiated &&
3360 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01003361 return 1;
Yu Zhangfd8cb432017-08-24 20:27:56 +08003362 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
Jim Mattson45316622017-05-23 11:52:54 -07003363 (data & MSR_IA32_BNDCFGS_RSVD))
Avi Kivity6aa8b732006-12-10 02:21:36 -08003364 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08003365 vmcs_write64(GUEST_BNDCFGS, data);
3366 break;
3367 case MSR_IA32_TSC:
3368 kvm_write_tsc(vcpu, msr_info);
3369 break;
3370 case MSR_IA32_CR_PAT:
Will Auld8fe8ab42012-11-29 12:42:12 -08003371 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Amit45666542014-09-18 22:39:44 +03003372 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
3373 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08003374 vmcs_write64(GUEST_IA32_PAT, data);
3375 vcpu->arch.pat = data;
3376 break;
3377 }
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003378 ret = kvm_set_msr_common(vcpu, msr_info);
3379 break;
Will Auldba904632012-11-29 12:42:50 -08003380 case MSR_IA32_TSC_ADJUST:
3381 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003382 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08003383 case MSR_IA32_MCG_EXT_CTL:
3384 if ((!msr_info->host_initiated &&
3385 !(to_vmx(vcpu)->msr_ia32_feature_control &
3386 FEATURE_CONTROL_LMCE)) ||
3387 (data & ~MCG_EXT_CTL_LMCE_EN))
3388 return 1;
3389 vcpu->arch.mcg_ext_ctl = data;
3390 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01003391 case MSR_IA32_FEATURE_CONTROL:
Haozhong Zhang37e4c992016-06-22 14:59:55 +08003392 if (!vmx_feature_control_msr_valid(vcpu, data) ||
Haozhong Zhang3b840802016-06-22 14:59:54 +08003393 (to_vmx(vcpu)->msr_ia32_feature_control &
Jan Kiszkacae50132014-01-04 18:47:22 +01003394 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
3395 return 1;
Haozhong Zhang3b840802016-06-22 14:59:54 +08003396 vmx->msr_ia32_feature_control = data;
Jan Kiszkacae50132014-01-04 18:47:22 +01003397 if (msr_info->host_initiated && data == 0)
3398 vmx_leave_nested(vcpu);
3399 break;
3400 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
David Matlack62cc6b9d2016-11-29 18:14:07 -08003401 if (!msr_info->host_initiated)
3402 return 1; /* they are read-only */
3403 if (!nested_vmx_allowed(vcpu))
3404 return 1;
3405 return vmx_set_vmx_msr(vcpu, msr_index, data);
Wanpeng Li20300092014-12-02 19:14:59 +08003406 case MSR_IA32_XSS:
3407 if (!vmx_xsaves_supported())
3408 return 1;
3409 /*
3410 * The only supported bit as of Skylake is bit 8, but
3411 * it is not supported on KVM.
3412 */
3413 if (data != 0)
3414 return 1;
3415 vcpu->arch.ia32_xss = data;
3416 if (vcpu->arch.ia32_xss != host_xss)
3417 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
3418 vcpu->arch.ia32_xss, host_xss);
3419 else
3420 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
3421 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003422 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02003423 if (!msr_info->host_initiated &&
3424 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003425 return 1;
3426 /* Check reserved bit, higher 32 bits should be zero */
3427 if ((data >> 32) != 0)
3428 return 1;
3429 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003430 default:
Rusty Russell8b9cf982007-07-30 16:31:43 +10003431 msr = find_msr_entry(vmx, msr_index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08003432 if (msr) {
Andy Honig8b3c3102014-08-27 11:16:44 -07003433 u64 old_msr_data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08003434 msr->data = data;
Avi Kivity2225fd52012-04-18 15:03:04 +03003435 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
3436 preempt_disable();
Andy Honig8b3c3102014-08-27 11:16:44 -07003437 ret = kvm_set_shared_msr(msr->index, msr->data,
3438 msr->mask);
Avi Kivity2225fd52012-04-18 15:03:04 +03003439 preempt_enable();
Andy Honig8b3c3102014-08-27 11:16:44 -07003440 if (ret)
3441 msr->data = old_msr_data;
Avi Kivity2225fd52012-04-18 15:03:04 +03003442 }
Avi Kivity3bab1f52006-12-29 16:49:48 -08003443 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003444 }
Will Auld8fe8ab42012-11-29 12:42:12 -08003445 ret = kvm_set_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003446 }
3447
Eddie Dong2cc51562007-05-21 07:28:09 +03003448 return ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003449}
3450
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003451static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003452{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003453 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
3454 switch (reg) {
3455 case VCPU_REGS_RSP:
3456 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
3457 break;
3458 case VCPU_REGS_RIP:
3459 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
3460 break;
Avi Kivity6de4f3a2009-05-31 22:58:47 +03003461 case VCPU_EXREG_PDPTR:
3462 if (enable_ept)
3463 ept_save_pdptrs(vcpu);
3464 break;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003465 default:
3466 break;
3467 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003468}
3469
Avi Kivity6aa8b732006-12-10 02:21:36 -08003470static __init int cpu_has_kvm_support(void)
3471{
Eduardo Habkost6210e372008-11-17 19:03:16 -02003472 return cpu_has_vmx();
Avi Kivity6aa8b732006-12-10 02:21:36 -08003473}
3474
3475static __init int vmx_disabled_by_bios(void)
3476{
3477 u64 msr;
3478
3479 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
Shane Wangcafd6652010-04-29 12:09:01 -04003480 if (msr & FEATURE_CONTROL_LOCKED) {
Joseph Cihula23f3e992011-02-08 11:45:56 -08003481 /* launched w/ TXT and VMX disabled */
Shane Wangcafd6652010-04-29 12:09:01 -04003482 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3483 && tboot_enabled())
3484 return 1;
Joseph Cihula23f3e992011-02-08 11:45:56 -08003485 /* launched w/o TXT and VMX only enabled w/ TXT */
Shane Wangcafd6652010-04-29 12:09:01 -04003486 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
Joseph Cihula23f3e992011-02-08 11:45:56 -08003487 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
Shane Wangf9335af2010-11-17 11:40:17 +08003488 && !tboot_enabled()) {
3489 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
Joseph Cihula23f3e992011-02-08 11:45:56 -08003490 "activate TXT before enabling KVM\n");
Shane Wangcafd6652010-04-29 12:09:01 -04003491 return 1;
Shane Wangf9335af2010-11-17 11:40:17 +08003492 }
Joseph Cihula23f3e992011-02-08 11:45:56 -08003493 /* launched w/o TXT and VMX disabled */
3494 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3495 && !tboot_enabled())
3496 return 1;
Shane Wangcafd6652010-04-29 12:09:01 -04003497 }
3498
3499 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003500}
3501
Dongxiao Xu7725b892010-05-11 18:29:38 +08003502static void kvm_cpu_vmxon(u64 addr)
3503{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003504 cr4_set_bits(X86_CR4_VMXE);
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03003505 intel_pt_handle_vmx(1);
3506
Dongxiao Xu7725b892010-05-11 18:29:38 +08003507 asm volatile (ASM_VMX_VMXON_RAX
3508 : : "a"(&addr), "m"(addr)
3509 : "memory", "cc");
3510}
3511
Radim Krčmář13a34e02014-08-28 15:13:03 +02003512static int hardware_enable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003513{
3514 int cpu = raw_smp_processor_id();
3515 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
Shane Wangcafd6652010-04-29 12:09:01 -04003516 u64 old, test_bits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003517
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07003518 if (cr4_read_shadow() & X86_CR4_VMXE)
Alexander Graf10474ae2009-09-15 11:37:46 +02003519 return -EBUSY;
3520
Nadav Har'Eld462b812011-05-24 15:26:10 +03003521 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
Feng Wubf9f6ac2015-09-18 22:29:55 +08003522 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
3523 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08003524
3525 /*
3526 * Now we can enable the vmclear operation in kdump
3527 * since the loaded_vmcss_on_cpu list on this cpu
3528 * has been initialized.
3529 *
3530 * Though the cpu is not in VMX operation now, there
3531 * is no problem to enable the vmclear operation
3532 * for the loaded_vmcss_on_cpu list is empty!
3533 */
3534 crash_enable_local_vmclear(cpu);
3535
Avi Kivity6aa8b732006-12-10 02:21:36 -08003536 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
Shane Wangcafd6652010-04-29 12:09:01 -04003537
3538 test_bits = FEATURE_CONTROL_LOCKED;
3539 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
3540 if (tboot_enabled())
3541 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
3542
3543 if ((old & test_bits) != test_bits) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08003544 /* enable and lock */
Shane Wangcafd6652010-04-29 12:09:01 -04003545 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
3546 }
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003547 kvm_cpu_vmxon(phys_addr);
David Hildenbrandfdf288b2017-08-24 20:51:29 +02003548 if (enable_ept)
3549 ept_sync_global();
Alexander Graf10474ae2009-09-15 11:37:46 +02003550
3551 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003552}
3553
Nadav Har'Eld462b812011-05-24 15:26:10 +03003554static void vmclear_local_loaded_vmcss(void)
Avi Kivity543e4242008-05-13 16:22:47 +03003555{
3556 int cpu = raw_smp_processor_id();
Nadav Har'Eld462b812011-05-24 15:26:10 +03003557 struct loaded_vmcs *v, *n;
Avi Kivity543e4242008-05-13 16:22:47 +03003558
Nadav Har'Eld462b812011-05-24 15:26:10 +03003559 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
3560 loaded_vmcss_on_cpu_link)
3561 __loaded_vmcs_clear(v);
Avi Kivity543e4242008-05-13 16:22:47 +03003562}
3563
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02003564
3565/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
3566 * tricks.
3567 */
3568static void kvm_cpu_vmxoff(void)
3569{
3570 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03003571
3572 intel_pt_handle_vmx(0);
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003573 cr4_clear_bits(X86_CR4_VMXE);
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02003574}
3575
Radim Krčmář13a34e02014-08-28 15:13:03 +02003576static void hardware_disable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003577{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003578 vmclear_local_loaded_vmcss();
3579 kvm_cpu_vmxoff();
Avi Kivity6aa8b732006-12-10 02:21:36 -08003580}
3581
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003582static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
Mike Dayd77c26f2007-10-08 09:02:08 -04003583 u32 msr, u32 *result)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003584{
3585 u32 vmx_msr_low, vmx_msr_high;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003586 u32 ctl = ctl_min | ctl_opt;
3587
3588 rdmsr(msr, vmx_msr_low, vmx_msr_high);
3589
3590 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
3591 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
3592
3593 /* Ensure minimum (required) set of control bits are supported. */
3594 if (ctl_min & ~ctl)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003595 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003596
3597 *result = ctl;
3598 return 0;
3599}
3600
Avi Kivity110312c2010-12-21 12:54:20 +02003601static __init bool allow_1_setting(u32 msr, u32 ctl)
3602{
3603 u32 vmx_msr_low, vmx_msr_high;
3604
3605 rdmsr(msr, vmx_msr_low, vmx_msr_high);
3606 return vmx_msr_high & ctl;
3607}
3608
Yang, Sheng002c7f72007-07-31 14:23:01 +03003609static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003610{
3611 u32 vmx_msr_low, vmx_msr_high;
Sheng Yangd56f5462008-04-25 10:13:16 +08003612 u32 min, opt, min2, opt2;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003613 u32 _pin_based_exec_control = 0;
3614 u32 _cpu_based_exec_control = 0;
Sheng Yangf78e0e22007-10-29 09:40:42 +08003615 u32 _cpu_based_2nd_exec_control = 0;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003616 u32 _vmexit_control = 0;
3617 u32 _vmentry_control = 0;
3618
Raghavendra K T10166742012-02-07 23:19:20 +05303619 min = CPU_BASED_HLT_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003620#ifdef CONFIG_X86_64
3621 CPU_BASED_CR8_LOAD_EXITING |
3622 CPU_BASED_CR8_STORE_EXITING |
3623#endif
Sheng Yangd56f5462008-04-25 10:13:16 +08003624 CPU_BASED_CR3_LOAD_EXITING |
3625 CPU_BASED_CR3_STORE_EXITING |
Quan Xu8eb73e22017-12-12 16:44:21 +08003626 CPU_BASED_UNCOND_IO_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003627 CPU_BASED_MOV_DR_EXITING |
Marcelo Tosattia7052892008-09-23 13:18:35 -03003628 CPU_BASED_USE_TSC_OFFSETING |
Avi Kivityfee84b02011-11-10 14:57:25 +02003629 CPU_BASED_INVLPG_EXITING |
3630 CPU_BASED_RDPMC_EXITING;
Anthony Liguori443381a2010-12-06 10:53:38 -06003631
Michael S. Tsirkin668fffa2017-04-21 12:27:17 +02003632 if (!kvm_mwait_in_guest())
3633 min |= CPU_BASED_MWAIT_EXITING |
3634 CPU_BASED_MONITOR_EXITING;
3635
Sheng Yangf78e0e22007-10-29 09:40:42 +08003636 opt = CPU_BASED_TPR_SHADOW |
Sheng Yang25c5f222008-03-28 13:18:56 +08003637 CPU_BASED_USE_MSR_BITMAPS |
Sheng Yangf78e0e22007-10-29 09:40:42 +08003638 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003639 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
3640 &_cpu_based_exec_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003641 return -EIO;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08003642#ifdef CONFIG_X86_64
3643 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3644 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
3645 ~CPU_BASED_CR8_STORE_EXITING;
3646#endif
Sheng Yangf78e0e22007-10-29 09:40:42 +08003647 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
Sheng Yangd56f5462008-04-25 10:13:16 +08003648 min2 = 0;
3649 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Yang Zhang8d146952013-01-25 10:18:50 +08003650 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sheng Yang2384d2b2008-01-17 15:14:33 +08003651 SECONDARY_EXEC_WBINVD_EXITING |
Sheng Yangd56f5462008-04-25 10:13:16 +08003652 SECONDARY_EXEC_ENABLE_VPID |
Nitin A Kamble3a624e22009-06-08 11:34:16 -07003653 SECONDARY_EXEC_ENABLE_EPT |
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08003654 SECONDARY_EXEC_UNRESTRICTED_GUEST |
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003655 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
Paolo Bonzini0367f202016-07-12 10:44:55 +02003656 SECONDARY_EXEC_DESC |
Mao, Junjiead756a12012-07-02 01:18:48 +00003657 SECONDARY_EXEC_RDTSCP |
Yang Zhang83d4c282013-01-25 10:18:49 +08003658 SECONDARY_EXEC_ENABLE_INVPCID |
Yang Zhangc7c9c562013-01-25 10:18:51 +08003659 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Abel Gordonabc4fc52013-04-18 14:35:25 +03003660 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Wanpeng Li20300092014-12-02 19:14:59 +08003661 SECONDARY_EXEC_SHADOW_VMCS |
Kai Huang843e4332015-01-28 10:54:28 +08003662 SECONDARY_EXEC_XSAVES |
David Hildenbrand736fdf72017-08-24 20:51:37 +02003663 SECONDARY_EXEC_RDSEED_EXITING |
3664 SECONDARY_EXEC_RDRAND_EXITING |
Xiao Guangrong8b3e34e2015-09-09 14:05:51 +08003665 SECONDARY_EXEC_ENABLE_PML |
Bandan Das2a499e42017-08-03 15:54:41 -04003666 SECONDARY_EXEC_TSC_SCALING |
3667 SECONDARY_EXEC_ENABLE_VMFUNC;
Sheng Yangd56f5462008-04-25 10:13:16 +08003668 if (adjust_vmx_controls(min2, opt2,
3669 MSR_IA32_VMX_PROCBASED_CTLS2,
Sheng Yangf78e0e22007-10-29 09:40:42 +08003670 &_cpu_based_2nd_exec_control) < 0)
3671 return -EIO;
3672 }
3673#ifndef CONFIG_X86_64
3674 if (!(_cpu_based_2nd_exec_control &
3675 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
3676 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
3677#endif
Yang Zhang83d4c282013-01-25 10:18:49 +08003678
3679 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3680 _cpu_based_2nd_exec_control &= ~(
Yang Zhang8d146952013-01-25 10:18:50 +08003681 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Yang Zhangc7c9c562013-01-25 10:18:51 +08003682 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3683 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang83d4c282013-01-25 10:18:49 +08003684
Wanpeng Li61f1dd92017-10-18 16:02:19 -07003685 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
3686 &vmx_capability.ept, &vmx_capability.vpid);
3687
Sheng Yangd56f5462008-04-25 10:13:16 +08003688 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
Marcelo Tosattia7052892008-09-23 13:18:35 -03003689 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
3690 enabled */
Gleb Natapov5fff7d22009-08-27 18:41:30 +03003691 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
3692 CPU_BASED_CR3_STORE_EXITING |
3693 CPU_BASED_INVLPG_EXITING);
Wanpeng Li61f1dd92017-10-18 16:02:19 -07003694 } else if (vmx_capability.ept) {
3695 vmx_capability.ept = 0;
3696 pr_warn_once("EPT CAP should not exist if not support "
3697 "1-setting enable EPT VM-execution control\n");
3698 }
3699 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
3700 vmx_capability.vpid) {
3701 vmx_capability.vpid = 0;
3702 pr_warn_once("VPID CAP should not exist if not support "
3703 "1-setting enable VPID VM-execution control\n");
Sheng Yangd56f5462008-04-25 10:13:16 +08003704 }
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003705
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02003706 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003707#ifdef CONFIG_X86_64
3708 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
3709#endif
Yang Zhanga547c6d2013-04-11 19:25:10 +08003710 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02003711 VM_EXIT_CLEAR_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003712 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
3713 &_vmexit_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003714 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003715
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01003716 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
3717 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
3718 PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08003719 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
3720 &_pin_based_exec_control) < 0)
3721 return -EIO;
3722
Paolo Bonzini1c17c3e2016-07-08 11:53:38 +02003723 if (cpu_has_broken_vmx_preemption_timer())
3724 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08003725 if (!(_cpu_based_2nd_exec_control &
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02003726 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
Yang Zhang01e439b2013-04-11 19:25:12 +08003727 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
3728
Paolo Bonzinic845f9c2014-02-21 10:55:44 +01003729 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
Liu, Jinsongda8999d2014-02-24 10:55:46 +00003730 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003731 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
3732 &_vmentry_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003733 return -EIO;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003734
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08003735 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003736
3737 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
3738 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003739 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003740
3741#ifdef CONFIG_X86_64
3742 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
3743 if (vmx_msr_high & (1u<<16))
Yang, Sheng002c7f72007-07-31 14:23:01 +03003744 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003745#endif
3746
3747 /* Require Write-Back (WB) memory type for VMCS accesses. */
3748 if (((vmx_msr_high >> 18) & 15) != 6)
Yang, Sheng002c7f72007-07-31 14:23:01 +03003749 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003750
Yang, Sheng002c7f72007-07-31 14:23:01 +03003751 vmcs_conf->size = vmx_msr_high & 0x1fff;
Paolo Bonzini16cb0252016-09-05 15:57:00 +02003752 vmcs_conf->order = get_order(vmcs_conf->size);
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03003753 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
Yang, Sheng002c7f72007-07-31 14:23:01 +03003754 vmcs_conf->revision_id = vmx_msr_low;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003755
Yang, Sheng002c7f72007-07-31 14:23:01 +03003756 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
3757 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
Sheng Yangf78e0e22007-10-29 09:40:42 +08003758 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
Yang, Sheng002c7f72007-07-31 14:23:01 +03003759 vmcs_conf->vmexit_ctrl = _vmexit_control;
3760 vmcs_conf->vmentry_ctrl = _vmentry_control;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003761
Avi Kivity110312c2010-12-21 12:54:20 +02003762 cpu_has_load_ia32_efer =
3763 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
3764 VM_ENTRY_LOAD_IA32_EFER)
3765 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
3766 VM_EXIT_LOAD_IA32_EFER);
3767
Gleb Natapov8bf00a52011-10-05 14:01:22 +02003768 cpu_has_load_perf_global_ctrl =
3769 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
3770 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
3771 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
3772 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
3773
3774 /*
3775 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
Andrea Gelminibb3541f2016-05-21 14:14:44 +02003776 * but due to errata below it can't be used. Workaround is to use
Gleb Natapov8bf00a52011-10-05 14:01:22 +02003777 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
3778 *
3779 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
3780 *
3781 * AAK155 (model 26)
3782 * AAP115 (model 30)
3783 * AAT100 (model 37)
3784 * BC86,AAY89,BD102 (model 44)
3785 * BA97 (model 46)
3786 *
3787 */
3788 if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
3789 switch (boot_cpu_data.x86_model) {
3790 case 26:
3791 case 30:
3792 case 37:
3793 case 44:
3794 case 46:
3795 cpu_has_load_perf_global_ctrl = false;
3796 printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
3797 "does not work properly. Using workaround\n");
3798 break;
3799 default:
3800 break;
3801 }
3802 }
3803
Borislav Petkov782511b2016-04-04 22:25:03 +02003804 if (boot_cpu_has(X86_FEATURE_XSAVES))
Wanpeng Li20300092014-12-02 19:14:59 +08003805 rdmsrl(MSR_IA32_XSS, host_xss);
3806
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003807 return 0;
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08003808}
Avi Kivity6aa8b732006-12-10 02:21:36 -08003809
3810static struct vmcs *alloc_vmcs_cpu(int cpu)
3811{
3812 int node = cpu_to_node(cpu);
3813 struct page *pages;
3814 struct vmcs *vmcs;
3815
Vlastimil Babka96db8002015-09-08 15:03:50 -07003816 pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003817 if (!pages)
3818 return NULL;
3819 vmcs = page_address(pages);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003820 memset(vmcs, 0, vmcs_config.size);
3821 vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003822 return vmcs;
3823}
3824
3825static struct vmcs *alloc_vmcs(void)
3826{
Ingo Molnard3b2c332007-01-05 16:36:23 -08003827 return alloc_vmcs_cpu(raw_smp_processor_id());
Avi Kivity6aa8b732006-12-10 02:21:36 -08003828}
3829
3830static void free_vmcs(struct vmcs *vmcs)
3831{
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03003832 free_pages((unsigned long)vmcs, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003833}
3834
Nadav Har'Eld462b812011-05-24 15:26:10 +03003835/*
3836 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
3837 */
3838static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3839{
3840 if (!loaded_vmcs->vmcs)
3841 return;
3842 loaded_vmcs_clear(loaded_vmcs);
3843 free_vmcs(loaded_vmcs->vmcs);
3844 loaded_vmcs->vmcs = NULL;
Jim Mattson355f4fb2016-10-28 08:29:39 -07003845 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
Nadav Har'Eld462b812011-05-24 15:26:10 +03003846}
3847
Mark Kanda276c7962017-11-27 17:22:26 -06003848static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx)
3849{
3850 struct loaded_vmcs *loaded_vmcs = &vmx->nested.vmcs02;
3851
3852 /*
3853 * Just leak the VMCS02 if the WARN triggers. Better than
3854 * a use-after-free.
3855 */
3856 if (WARN_ON(vmx->loaded_vmcs == loaded_vmcs))
3857 return;
3858 free_loaded_vmcs(loaded_vmcs);
3859}
3860
Sam Ravnborg39959582007-06-01 00:47:13 -07003861static void free_kvm_area(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003862{
3863 int cpu;
3864
Zachary Amsden3230bb42009-09-29 11:38:37 -10003865 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08003866 free_vmcs(per_cpu(vmxarea, cpu));
Zachary Amsden3230bb42009-09-29 11:38:37 -10003867 per_cpu(vmxarea, cpu) = NULL;
3868 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003869}
3870
Jim Mattson85fd5142017-07-07 12:51:41 -07003871enum vmcs_field_type {
3872 VMCS_FIELD_TYPE_U16 = 0,
3873 VMCS_FIELD_TYPE_U64 = 1,
3874 VMCS_FIELD_TYPE_U32 = 2,
3875 VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
3876};
3877
3878static inline int vmcs_field_type(unsigned long field)
3879{
3880 if (0x1 & field) /* the *_HIGH fields are all 32 bit */
3881 return VMCS_FIELD_TYPE_U32;
3882 return (field >> 13) & 0x3 ;
3883}
3884
3885static inline int vmcs_field_readonly(unsigned long field)
3886{
3887 return (((field >> 10) & 0x3) == 1);
3888}
3889
Bandan Dasfe2b2012014-04-21 15:20:14 -04003890static void init_vmcs_shadow_fields(void)
3891{
3892 int i, j;
3893
3894 /* No checks for read only fields yet */
3895
3896 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
3897 switch (shadow_read_write_fields[i]) {
3898 case GUEST_BNDCFGS:
Paolo Bonzinia87036a2016-03-08 09:52:13 +01003899 if (!kvm_mpx_supported())
Bandan Dasfe2b2012014-04-21 15:20:14 -04003900 continue;
3901 break;
3902 default:
3903 break;
3904 }
3905
3906 if (j < i)
3907 shadow_read_write_fields[j] =
3908 shadow_read_write_fields[i];
3909 j++;
3910 }
3911 max_shadow_read_write_fields = j;
3912
3913 /* shadowed fields guest access without vmexit */
3914 for (i = 0; i < max_shadow_read_write_fields; i++) {
Jim Mattson85fd5142017-07-07 12:51:41 -07003915 unsigned long field = shadow_read_write_fields[i];
3916
3917 clear_bit(field, vmx_vmwrite_bitmap);
3918 clear_bit(field, vmx_vmread_bitmap);
3919 if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
3920 clear_bit(field + 1, vmx_vmwrite_bitmap);
3921 clear_bit(field + 1, vmx_vmread_bitmap);
3922 }
Bandan Dasfe2b2012014-04-21 15:20:14 -04003923 }
Jim Mattson85fd5142017-07-07 12:51:41 -07003924 for (i = 0; i < max_shadow_read_only_fields; i++) {
3925 unsigned long field = shadow_read_only_fields[i];
3926
3927 clear_bit(field, vmx_vmread_bitmap);
3928 if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
3929 clear_bit(field + 1, vmx_vmread_bitmap);
3930 }
Bandan Dasfe2b2012014-04-21 15:20:14 -04003931}
3932
Avi Kivity6aa8b732006-12-10 02:21:36 -08003933static __init int alloc_kvm_area(void)
3934{
3935 int cpu;
3936
Zachary Amsden3230bb42009-09-29 11:38:37 -10003937 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08003938 struct vmcs *vmcs;
3939
3940 vmcs = alloc_vmcs_cpu(cpu);
3941 if (!vmcs) {
3942 free_kvm_area();
3943 return -ENOMEM;
3944 }
3945
3946 per_cpu(vmxarea, cpu) = vmcs;
3947 }
3948 return 0;
3949}
3950
Gleb Natapov91b0aa22013-01-21 15:36:47 +02003951static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
Gleb Natapovd99e4152012-12-20 16:57:45 +02003952 struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003953{
Gleb Natapovd99e4152012-12-20 16:57:45 +02003954 if (!emulate_invalid_guest_state) {
3955 /*
3956 * CS and SS RPL should be equal during guest entry according
3957 * to VMX spec, but in reality it is not always so. Since vcpu
3958 * is in the middle of the transition from real mode to
3959 * protected mode it is safe to assume that RPL 0 is a good
3960 * default value.
3961 */
3962 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
Nadav Amitb32a9912015-03-29 16:33:04 +03003963 save->selector &= ~SEGMENT_RPL_MASK;
3964 save->dpl = save->selector & SEGMENT_RPL_MASK;
Gleb Natapovd99e4152012-12-20 16:57:45 +02003965 save->s = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003966 }
Gleb Natapovd99e4152012-12-20 16:57:45 +02003967 vmx_set_segment(vcpu, save, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003968}
3969
3970static void enter_pmode(struct kvm_vcpu *vcpu)
3971{
3972 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03003973 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003974
Gleb Natapovd99e4152012-12-20 16:57:45 +02003975 /*
3976 * Update real mode segment cache. It may be not up-to-date if sement
3977 * register was written while vcpu was in a guest mode.
3978 */
3979 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
3980 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
3981 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
3982 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
3983 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
3984 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
3985
Avi Kivity7ffd92c2009-06-09 14:10:45 +03003986 vmx->rmode.vm86_active = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003987
Avi Kivity2fb92db2011-04-27 19:42:18 +03003988 vmx_segment_cache_clear(vmx);
3989
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03003990 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003991
3992 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03003993 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
3994 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003995 vmcs_writel(GUEST_RFLAGS, flags);
3996
Rusty Russell66aee912007-07-17 23:34:16 +10003997 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
3998 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
Avi Kivity6aa8b732006-12-10 02:21:36 -08003999
4000 update_exception_bitmap(vcpu);
4001
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004002 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4003 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4004 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4005 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4006 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4007 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004008}
4009
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004010static void fix_rmode_seg(int seg, struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004011{
Mathias Krause772e0312012-08-30 01:30:19 +02004012 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Gleb Natapovd99e4152012-12-20 16:57:45 +02004013 struct kvm_segment var = *save;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004014
Gleb Natapovd99e4152012-12-20 16:57:45 +02004015 var.dpl = 0x3;
4016 if (seg == VCPU_SREG_CS)
4017 var.type = 0x3;
4018
4019 if (!emulate_invalid_guest_state) {
4020 var.selector = var.base >> 4;
4021 var.base = var.base & 0xffff0;
4022 var.limit = 0xffff;
4023 var.g = 0;
4024 var.db = 0;
4025 var.present = 1;
4026 var.s = 1;
4027 var.l = 0;
4028 var.unusable = 0;
4029 var.type = 0x3;
4030 var.avl = 0;
4031 if (save->base & 0xf)
4032 printk_once(KERN_WARNING "kvm: segment base is not "
4033 "paragraph aligned when entering "
4034 "protected mode (seg=%d)", seg);
4035 }
4036
4037 vmcs_write16(sf->selector, var.selector);
Chao Peng96794e42017-02-21 03:50:01 -05004038 vmcs_writel(sf->base, var.base);
Gleb Natapovd99e4152012-12-20 16:57:45 +02004039 vmcs_write32(sf->limit, var.limit);
4040 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
Avi Kivity6aa8b732006-12-10 02:21:36 -08004041}
4042
4043static void enter_rmode(struct kvm_vcpu *vcpu)
4044{
4045 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004046 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004047
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004048 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
4049 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4050 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4051 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4052 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
Gleb Natapovc6ad11532012-12-12 19:10:51 +02004053 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4054 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004055
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004056 vmx->rmode.vm86_active = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004057
Gleb Natapov776e58e2011-03-13 12:34:27 +02004058 /*
4059 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
Jan Kiszka4918c6c2013-03-15 08:38:56 +01004060 * vcpu. Warn the user that an update is overdue.
Gleb Natapov776e58e2011-03-13 12:34:27 +02004061 */
Jan Kiszka4918c6c2013-03-15 08:38:56 +01004062 if (!vcpu->kvm->arch.tss_addr)
Gleb Natapov776e58e2011-03-13 12:34:27 +02004063 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
4064 "called before entering vcpu\n");
Gleb Natapov776e58e2011-03-13 12:34:27 +02004065
Avi Kivity2fb92db2011-04-27 19:42:18 +03004066 vmx_segment_cache_clear(vmx);
4067
Jan Kiszka4918c6c2013-03-15 08:38:56 +01004068 vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004069 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004070 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4071
4072 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03004073 vmx->rmode.save_rflags = flags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004074
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01004075 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004076
4077 vmcs_writel(GUEST_RFLAGS, flags);
Rusty Russell66aee912007-07-17 23:34:16 +10004078 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004079 update_exception_bitmap(vcpu);
4080
Gleb Natapovd99e4152012-12-20 16:57:45 +02004081 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4082 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4083 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4084 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4085 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
4086 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004087
Eddie Dong8668a3c2007-10-10 14:26:45 +08004088 kvm_mmu_reset_context(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004089}
4090
Amit Shah401d10d2009-02-20 22:53:37 +05304091static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
4092{
4093 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004094 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
4095
4096 if (!msr)
4097 return;
Amit Shah401d10d2009-02-20 22:53:37 +05304098
Avi Kivity44ea2b12009-09-06 15:55:37 +03004099 /*
4100 * Force kernel_gs_base reloading before EFER changes, as control
4101 * of this msr depends on is_long_mode().
4102 */
4103 vmx_load_host_state(to_vmx(vcpu));
Avi Kivityf6801df2010-01-21 15:31:50 +02004104 vcpu->arch.efer = efer;
Amit Shah401d10d2009-02-20 22:53:37 +05304105 if (efer & EFER_LMA) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02004106 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05304107 msr->data = efer;
4108 } else {
Gleb Natapov2961e8762013-11-25 15:37:13 +02004109 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05304110
4111 msr->data = efer & ~EFER_LME;
4112 }
4113 setup_msrs(vmx);
4114}
4115
Avi Kivity05b3e0c2006-12-13 00:33:45 -08004116#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004117
4118static void enter_lmode(struct kvm_vcpu *vcpu)
4119{
4120 u32 guest_tr_ar;
4121
Avi Kivity2fb92db2011-04-27 19:42:18 +03004122 vmx_segment_cache_clear(to_vmx(vcpu));
4123
Avi Kivity6aa8b732006-12-10 02:21:36 -08004124 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004125 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
Jan Kiszkabd801582011-09-12 11:26:22 +02004126 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
4127 __func__);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004128 vmcs_write32(GUEST_TR_AR_BYTES,
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004129 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
4130 | VMX_AR_TYPE_BUSY_64_TSS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004131 }
Avi Kivityda38f432010-07-06 11:30:49 +03004132 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004133}
4134
4135static void exit_lmode(struct kvm_vcpu *vcpu)
4136{
Gleb Natapov2961e8762013-11-25 15:37:13 +02004137 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Avi Kivityda38f432010-07-06 11:30:49 +03004138 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004139}
4140
4141#endif
4142
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004143static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
4144 bool invalidate_gpa)
Sheng Yang2384d2b2008-01-17 15:14:33 +08004145{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004146 if (enable_ept && (invalidate_gpa || !enable_vpid)) {
Xiao Guangrongdd180b32010-07-03 16:02:42 +08004147 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4148 return;
Peter Feiner995f00a2017-06-30 17:26:32 -07004149 ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
Jim Mattsonf0b98c02017-03-15 07:56:11 -07004150 } else {
4151 vpid_sync_context(vpid);
Xiao Guangrongdd180b32010-07-03 16:02:42 +08004152 }
Sheng Yang2384d2b2008-01-17 15:14:33 +08004153}
4154
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004155static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
Wanpeng Lidd5f5342015-09-23 18:26:57 +08004156{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004157 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
Wanpeng Lidd5f5342015-09-23 18:26:57 +08004158}
4159
Jim Mattsonfb6c8192017-03-16 13:53:59 -07004160static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
4161{
4162 if (enable_ept)
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004163 vmx_flush_tlb(vcpu, true);
Jim Mattsonfb6c8192017-03-16 13:53:59 -07004164}
4165
Avi Kivitye8467fd2009-12-29 18:43:06 +02004166static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
4167{
4168 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
4169
4170 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
4171 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
4172}
4173
Avi Kivityaff48ba2010-12-05 18:56:11 +02004174static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
4175{
4176 if (enable_ept && is_paging(vcpu))
4177 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
4178 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
4179}
4180
Anthony Liguori25c4c272007-04-27 09:29:21 +03004181static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
Avi Kivity399badf2007-01-05 16:36:38 -08004182{
Avi Kivityfc78f512009-12-07 12:16:48 +02004183 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
4184
4185 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
4186 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
Avi Kivity399badf2007-01-05 16:36:38 -08004187}
4188
Sheng Yang14394422008-04-28 12:24:45 +08004189static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
4190{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03004191 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4192
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004193 if (!test_bit(VCPU_EXREG_PDPTR,
4194 (unsigned long *)&vcpu->arch.regs_dirty))
4195 return;
4196
Sheng Yang14394422008-04-28 12:24:45 +08004197 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03004198 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
4199 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
4200 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
4201 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
Sheng Yang14394422008-04-28 12:24:45 +08004202 }
4203}
4204
Avi Kivity8f5d5492009-05-31 18:41:29 +03004205static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
4206{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03004207 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4208
Avi Kivity8f5d5492009-05-31 18:41:29 +03004209 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03004210 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
4211 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
4212 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
4213 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
Avi Kivity8f5d5492009-05-31 18:41:29 +03004214 }
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004215
4216 __set_bit(VCPU_EXREG_PDPTR,
4217 (unsigned long *)&vcpu->arch.regs_avail);
4218 __set_bit(VCPU_EXREG_PDPTR,
4219 (unsigned long *)&vcpu->arch.regs_dirty);
Avi Kivity8f5d5492009-05-31 18:41:29 +03004220}
4221
David Matlack38991522016-11-29 18:14:08 -08004222static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
4223{
4224 u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
4225 u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
4226 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4227
4228 if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
4229 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
4230 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
4231 fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
4232
4233 return fixed_bits_valid(val, fixed0, fixed1);
4234}
4235
4236static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
4237{
4238 u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
4239 u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
4240
4241 return fixed_bits_valid(val, fixed0, fixed1);
4242}
4243
4244static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
4245{
4246 u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
4247 u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
4248
4249 return fixed_bits_valid(val, fixed0, fixed1);
4250}
4251
4252/* No difference in the restrictions on guest and host CR4 in VMX operation. */
4253#define nested_guest_cr4_valid nested_cr4_valid
4254#define nested_host_cr4_valid nested_cr4_valid
4255
Nadav Har'El5e1746d2011-05-25 23:03:24 +03004256static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
Sheng Yang14394422008-04-28 12:24:45 +08004257
4258static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
4259 unsigned long cr0,
4260 struct kvm_vcpu *vcpu)
4261{
Marcelo Tosatti5233dd52011-06-06 14:27:47 -03004262 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
4263 vmx_decache_cr3(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08004264 if (!(cr0 & X86_CR0_PG)) {
4265 /* From paging/starting to nonpaging */
4266 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08004267 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
Sheng Yang14394422008-04-28 12:24:45 +08004268 (CPU_BASED_CR3_LOAD_EXITING |
4269 CPU_BASED_CR3_STORE_EXITING));
4270 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02004271 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08004272 } else if (!is_paging(vcpu)) {
4273 /* From nonpaging to paging */
4274 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08004275 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
Sheng Yang14394422008-04-28 12:24:45 +08004276 ~(CPU_BASED_CR3_LOAD_EXITING |
4277 CPU_BASED_CR3_STORE_EXITING));
4278 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02004279 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08004280 }
Sheng Yang95eb84a2009-08-19 09:52:18 +08004281
4282 if (!(cr0 & X86_CR0_WP))
4283 *hw_cr0 &= ~X86_CR0_WP;
Sheng Yang14394422008-04-28 12:24:45 +08004284}
4285
Avi Kivity6aa8b732006-12-10 02:21:36 -08004286static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4287{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004288 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004289 unsigned long hw_cr0;
4290
Gleb Natapov50378782013-02-04 16:00:28 +02004291 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004292 if (enable_unrestricted_guest)
Gleb Natapov50378782013-02-04 16:00:28 +02004293 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
Gleb Natapov218e7632013-01-21 15:36:45 +02004294 else {
Gleb Natapov50378782013-02-04 16:00:28 +02004295 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08004296
Gleb Natapov218e7632013-01-21 15:36:45 +02004297 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
4298 enter_pmode(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004299
Gleb Natapov218e7632013-01-21 15:36:45 +02004300 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
4301 enter_rmode(vcpu);
4302 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004303
Avi Kivity05b3e0c2006-12-13 00:33:45 -08004304#ifdef CONFIG_X86_64
Avi Kivityf6801df2010-01-21 15:31:50 +02004305 if (vcpu->arch.efer & EFER_LME) {
Rusty Russell707d92fa2007-07-17 23:19:08 +10004306 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08004307 enter_lmode(vcpu);
Rusty Russell707d92fa2007-07-17 23:19:08 +10004308 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08004309 exit_lmode(vcpu);
4310 }
4311#endif
4312
Avi Kivity089d0342009-03-23 18:26:32 +02004313 if (enable_ept)
Sheng Yang14394422008-04-28 12:24:45 +08004314 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
4315
Avi Kivity6aa8b732006-12-10 02:21:36 -08004316 vmcs_writel(CR0_READ_SHADOW, cr0);
Sheng Yang14394422008-04-28 12:24:45 +08004317 vmcs_writel(GUEST_CR0, hw_cr0);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004318 vcpu->arch.cr0 = cr0;
Gleb Natapov14168782013-01-21 15:36:49 +02004319
4320 /* depends on vcpu->arch.cr0 to be set to a new value */
4321 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004322}
4323
Yu Zhang855feb62017-08-24 20:27:55 +08004324static int get_ept_level(struct kvm_vcpu *vcpu)
4325{
4326 if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
4327 return 5;
4328 return 4;
4329}
4330
Peter Feiner995f00a2017-06-30 17:26:32 -07004331static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
Sheng Yang14394422008-04-28 12:24:45 +08004332{
Yu Zhang855feb62017-08-24 20:27:55 +08004333 u64 eptp = VMX_EPTP_MT_WB;
Sheng Yang14394422008-04-28 12:24:45 +08004334
Yu Zhang855feb62017-08-24 20:27:55 +08004335 eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
Sheng Yang14394422008-04-28 12:24:45 +08004336
Peter Feiner995f00a2017-06-30 17:26:32 -07004337 if (enable_ept_ad_bits &&
4338 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
David Hildenbrandbb97a012017-08-10 23:15:28 +02004339 eptp |= VMX_EPTP_AD_ENABLE_BIT;
Sheng Yang14394422008-04-28 12:24:45 +08004340 eptp |= (root_hpa & PAGE_MASK);
4341
4342 return eptp;
4343}
4344
Avi Kivity6aa8b732006-12-10 02:21:36 -08004345static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
4346{
Sheng Yang14394422008-04-28 12:24:45 +08004347 unsigned long guest_cr3;
4348 u64 eptp;
4349
4350 guest_cr3 = cr3;
Avi Kivity089d0342009-03-23 18:26:32 +02004351 if (enable_ept) {
Peter Feiner995f00a2017-06-30 17:26:32 -07004352 eptp = construct_eptp(vcpu, cr3);
Sheng Yang14394422008-04-28 12:24:45 +08004353 vmcs_write64(EPT_POINTER, eptp);
Jan Kiszka59ab5a82013-08-08 16:26:29 +02004354 if (is_paging(vcpu) || is_guest_mode(vcpu))
4355 guest_cr3 = kvm_read_cr3(vcpu);
4356 else
4357 guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
Marcelo Tosatti7c93be442009-10-26 16:48:33 -02004358 ept_load_pdptrs(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08004359 }
4360
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08004361 vmx_flush_tlb(vcpu, true);
Sheng Yang14394422008-04-28 12:24:45 +08004362 vmcs_writel(GUEST_CR3, guest_cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004363}
4364
Nadav Har'El5e1746d2011-05-25 23:03:24 +03004365static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004366{
Ben Serebrin085e68e2015-04-16 11:58:05 -07004367 /*
4368 * Pass through host's Machine Check Enable value to hw_cr4, which
4369 * is in force while we are in guest mode. Do not let guests control
4370 * this bit, even if host CR4.MCE == 0.
4371 */
4372 unsigned long hw_cr4 =
4373 (cr4_read_shadow() & X86_CR4_MCE) |
4374 (cr4 & ~X86_CR4_MCE) |
4375 (to_vmx(vcpu)->rmode.vm86_active ?
4376 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
Sheng Yang14394422008-04-28 12:24:45 +08004377
Paolo Bonzini0367f202016-07-12 10:44:55 +02004378 if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
4379 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
4380 SECONDARY_EXEC_DESC);
4381 hw_cr4 &= ~X86_CR4_UMIP;
4382 } else
4383 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
4384 SECONDARY_EXEC_DESC);
4385
Nadav Har'El5e1746d2011-05-25 23:03:24 +03004386 if (cr4 & X86_CR4_VMXE) {
4387 /*
4388 * To use VMXON (and later other VMX instructions), a guest
4389 * must first be able to turn on cr4.VMXE (see handle_vmon()).
4390 * So basically the check on whether to allow nested VMX
4391 * is here.
4392 */
4393 if (!nested_vmx_allowed(vcpu))
4394 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01004395 }
David Matlack38991522016-11-29 18:14:08 -08004396
4397 if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03004398 return 1;
4399
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004400 vcpu->arch.cr4 = cr4;
Avi Kivitybc230082009-12-08 12:14:42 +02004401 if (enable_ept) {
4402 if (!is_paging(vcpu)) {
4403 hw_cr4 &= ~X86_CR4_PAE;
4404 hw_cr4 |= X86_CR4_PSE;
4405 } else if (!(cr4 & X86_CR4_PAE)) {
4406 hw_cr4 &= ~X86_CR4_PAE;
4407 }
4408 }
Sheng Yang14394422008-04-28 12:24:45 +08004409
Radim Krčmář656ec4a2015-11-02 22:20:00 +01004410 if (!enable_unrestricted_guest && !is_paging(vcpu))
4411 /*
Huaitong Handdba2622016-03-22 16:51:15 +08004412 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
4413 * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
4414 * to be manually disabled when guest switches to non-paging
4415 * mode.
4416 *
4417 * If !enable_unrestricted_guest, the CPU is always running
4418 * with CR0.PG=1 and CR4 needs to be modified.
4419 * If enable_unrestricted_guest, the CPU automatically
4420 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
Radim Krčmář656ec4a2015-11-02 22:20:00 +01004421 */
Huaitong Handdba2622016-03-22 16:51:15 +08004422 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
Radim Krčmář656ec4a2015-11-02 22:20:00 +01004423
Sheng Yang14394422008-04-28 12:24:45 +08004424 vmcs_writel(CR4_READ_SHADOW, cr4);
4425 vmcs_writel(GUEST_CR4, hw_cr4);
Nadav Har'El5e1746d2011-05-25 23:03:24 +03004426 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004427}
4428
Avi Kivity6aa8b732006-12-10 02:21:36 -08004429static void vmx_get_segment(struct kvm_vcpu *vcpu,
4430 struct kvm_segment *var, int seg)
4431{
Avi Kivitya9179492011-01-03 14:28:52 +02004432 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004433 u32 ar;
4434
Gleb Natapovc6ad11532012-12-12 19:10:51 +02004435 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004436 *var = vmx->rmode.segs[seg];
Avi Kivitya9179492011-01-03 14:28:52 +02004437 if (seg == VCPU_SREG_TR
Avi Kivity2fb92db2011-04-27 19:42:18 +03004438 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004439 return;
Avi Kivity1390a282012-08-21 17:07:08 +03004440 var->base = vmx_read_guest_seg_base(vmx, seg);
4441 var->selector = vmx_read_guest_seg_selector(vmx, seg);
4442 return;
Avi Kivitya9179492011-01-03 14:28:52 +02004443 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03004444 var->base = vmx_read_guest_seg_base(vmx, seg);
4445 var->limit = vmx_read_guest_seg_limit(vmx, seg);
4446 var->selector = vmx_read_guest_seg_selector(vmx, seg);
4447 ar = vmx_read_guest_seg_ar(vmx, seg);
Gleb Natapov03617c12013-06-28 13:17:18 +03004448 var->unusable = (ar >> 16) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004449 var->type = ar & 15;
4450 var->s = (ar >> 4) & 1;
4451 var->dpl = (ar >> 5) & 3;
Gleb Natapov03617c12013-06-28 13:17:18 +03004452 /*
4453 * Some userspaces do not preserve unusable property. Since usable
4454 * segment has to be present according to VMX spec we can use present
4455 * property to amend userspace bug by making unusable segment always
4456 * nonpresent. vmx_segment_access_rights() already marks nonpresent
4457 * segment as unusable.
4458 */
4459 var->present = !var->unusable;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004460 var->avl = (ar >> 12) & 1;
4461 var->l = (ar >> 13) & 1;
4462 var->db = (ar >> 14) & 1;
4463 var->g = (ar >> 15) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004464}
4465
Avi Kivitya9179492011-01-03 14:28:52 +02004466static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
4467{
Avi Kivitya9179492011-01-03 14:28:52 +02004468 struct kvm_segment s;
4469
4470 if (to_vmx(vcpu)->rmode.vm86_active) {
4471 vmx_get_segment(vcpu, &s, seg);
4472 return s.base;
4473 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03004474 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
Avi Kivitya9179492011-01-03 14:28:52 +02004475}
4476
Marcelo Tosattib09408d2013-01-07 19:27:06 -02004477static int vmx_get_cpl(struct kvm_vcpu *vcpu)
Izik Eidus2e4d2652008-03-24 19:38:34 +02004478{
Marcelo Tosattib09408d2013-01-07 19:27:06 -02004479 struct vcpu_vmx *vmx = to_vmx(vcpu);
4480
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02004481 if (unlikely(vmx->rmode.vm86_active))
Izik Eidus2e4d2652008-03-24 19:38:34 +02004482 return 0;
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02004483 else {
4484 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004485 return VMX_AR_DPL(ar);
Avi Kivity69c73022011-03-07 15:26:44 +02004486 }
Avi Kivity69c73022011-03-07 15:26:44 +02004487}
4488
Avi Kivity653e3102007-05-07 10:55:37 +03004489static u32 vmx_segment_access_rights(struct kvm_segment *var)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004490{
Avi Kivity6aa8b732006-12-10 02:21:36 -08004491 u32 ar;
4492
Avi Kivityf0495f92012-06-07 17:06:10 +03004493 if (var->unusable || !var->present)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004494 ar = 1 << 16;
4495 else {
4496 ar = var->type & 15;
4497 ar |= (var->s & 1) << 4;
4498 ar |= (var->dpl & 3) << 5;
4499 ar |= (var->present & 1) << 7;
4500 ar |= (var->avl & 1) << 12;
4501 ar |= (var->l & 1) << 13;
4502 ar |= (var->db & 1) << 14;
4503 ar |= (var->g & 1) << 15;
4504 }
Avi Kivity653e3102007-05-07 10:55:37 +03004505
4506 return ar;
4507}
4508
4509static void vmx_set_segment(struct kvm_vcpu *vcpu,
4510 struct kvm_segment *var, int seg)
4511{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004512 struct vcpu_vmx *vmx = to_vmx(vcpu);
Mathias Krause772e0312012-08-30 01:30:19 +02004513 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Avi Kivity653e3102007-05-07 10:55:37 +03004514
Avi Kivity2fb92db2011-04-27 19:42:18 +03004515 vmx_segment_cache_clear(vmx);
4516
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02004517 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
4518 vmx->rmode.segs[seg] = *var;
4519 if (seg == VCPU_SREG_TR)
4520 vmcs_write16(sf->selector, var->selector);
4521 else if (var->s)
4522 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
Gleb Natapovd99e4152012-12-20 16:57:45 +02004523 goto out;
Avi Kivity653e3102007-05-07 10:55:37 +03004524 }
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02004525
Avi Kivity653e3102007-05-07 10:55:37 +03004526 vmcs_writel(sf->base, var->base);
4527 vmcs_write32(sf->limit, var->limit);
4528 vmcs_write16(sf->selector, var->selector);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004529
4530 /*
4531 * Fix the "Accessed" bit in AR field of segment registers for older
4532 * qemu binaries.
4533 * IA32 arch specifies that at the time of processor reset the
4534 * "Accessed" bit in the AR field of segment registers is 1. And qemu
Guo Chao0fa06072012-06-28 15:16:19 +08004535 * is setting it to 0 in the userland code. This causes invalid guest
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004536 * state vmexit when "unrestricted guest" mode is turned on.
4537 * Fix for this setup issue in cpu_reset is being pushed in the qemu
4538 * tree. Newer qemu binaries with that qemu fix would not need this
4539 * kvm hack.
4540 */
4541 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
Gleb Natapovf924d662012-12-12 19:10:55 +02004542 var->type |= 0x1; /* Accessed */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004543
Gleb Natapovf924d662012-12-12 19:10:55 +02004544 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
Gleb Natapovd99e4152012-12-20 16:57:45 +02004545
4546out:
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01004547 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004548}
4549
Avi Kivity6aa8b732006-12-10 02:21:36 -08004550static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4551{
Avi Kivity2fb92db2011-04-27 19:42:18 +03004552 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004553
4554 *db = (ar >> 14) & 1;
4555 *l = (ar >> 13) & 1;
4556}
4557
Gleb Natapov89a27f42010-02-16 10:51:48 +02004558static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004559{
Gleb Natapov89a27f42010-02-16 10:51:48 +02004560 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
4561 dt->address = vmcs_readl(GUEST_IDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004562}
4563
Gleb Natapov89a27f42010-02-16 10:51:48 +02004564static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004565{
Gleb Natapov89a27f42010-02-16 10:51:48 +02004566 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
4567 vmcs_writel(GUEST_IDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004568}
4569
Gleb Natapov89a27f42010-02-16 10:51:48 +02004570static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004571{
Gleb Natapov89a27f42010-02-16 10:51:48 +02004572 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
4573 dt->address = vmcs_readl(GUEST_GDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004574}
4575
Gleb Natapov89a27f42010-02-16 10:51:48 +02004576static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004577{
Gleb Natapov89a27f42010-02-16 10:51:48 +02004578 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
4579 vmcs_writel(GUEST_GDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004580}
4581
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004582static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
4583{
4584 struct kvm_segment var;
4585 u32 ar;
4586
4587 vmx_get_segment(vcpu, &var, seg);
Gleb Natapov07f42f52012-12-12 19:10:49 +02004588 var.dpl = 0x3;
Gleb Natapov0647f4a2012-12-12 19:10:50 +02004589 if (seg == VCPU_SREG_CS)
4590 var.type = 0x3;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004591 ar = vmx_segment_access_rights(&var);
4592
4593 if (var.base != (var.selector << 4))
4594 return false;
Gleb Natapov89efbed2012-12-20 16:57:44 +02004595 if (var.limit != 0xffff)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004596 return false;
Gleb Natapov07f42f52012-12-12 19:10:49 +02004597 if (ar != 0xf3)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004598 return false;
4599
4600 return true;
4601}
4602
4603static bool code_segment_valid(struct kvm_vcpu *vcpu)
4604{
4605 struct kvm_segment cs;
4606 unsigned int cs_rpl;
4607
4608 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
Nadav Amitb32a9912015-03-29 16:33:04 +03004609 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004610
Avi Kivity1872a3f2009-01-04 23:26:52 +02004611 if (cs.unusable)
4612 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004613 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004614 return false;
4615 if (!cs.s)
4616 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004617 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004618 if (cs.dpl > cs_rpl)
4619 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02004620 } else {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004621 if (cs.dpl != cs_rpl)
4622 return false;
4623 }
4624 if (!cs.present)
4625 return false;
4626
4627 /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
4628 return true;
4629}
4630
4631static bool stack_segment_valid(struct kvm_vcpu *vcpu)
4632{
4633 struct kvm_segment ss;
4634 unsigned int ss_rpl;
4635
4636 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
Nadav Amitb32a9912015-03-29 16:33:04 +03004637 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004638
Avi Kivity1872a3f2009-01-04 23:26:52 +02004639 if (ss.unusable)
4640 return true;
4641 if (ss.type != 3 && ss.type != 7)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004642 return false;
4643 if (!ss.s)
4644 return false;
4645 if (ss.dpl != ss_rpl) /* DPL != RPL */
4646 return false;
4647 if (!ss.present)
4648 return false;
4649
4650 return true;
4651}
4652
4653static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
4654{
4655 struct kvm_segment var;
4656 unsigned int rpl;
4657
4658 vmx_get_segment(vcpu, &var, seg);
Nadav Amitb32a9912015-03-29 16:33:04 +03004659 rpl = var.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004660
Avi Kivity1872a3f2009-01-04 23:26:52 +02004661 if (var.unusable)
4662 return true;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004663 if (!var.s)
4664 return false;
4665 if (!var.present)
4666 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07004667 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004668 if (var.dpl < rpl) /* DPL < RPL */
4669 return false;
4670 }
4671
4672 /* TODO: Add other members to kvm_segment_field to allow checking for other access
4673 * rights flags
4674 */
4675 return true;
4676}
4677
4678static bool tr_valid(struct kvm_vcpu *vcpu)
4679{
4680 struct kvm_segment tr;
4681
4682 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
4683
Avi Kivity1872a3f2009-01-04 23:26:52 +02004684 if (tr.unusable)
4685 return false;
Nadav Amitb32a9912015-03-29 16:33:04 +03004686 if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004687 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02004688 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004689 return false;
4690 if (!tr.present)
4691 return false;
4692
4693 return true;
4694}
4695
4696static bool ldtr_valid(struct kvm_vcpu *vcpu)
4697{
4698 struct kvm_segment ldtr;
4699
4700 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
4701
Avi Kivity1872a3f2009-01-04 23:26:52 +02004702 if (ldtr.unusable)
4703 return true;
Nadav Amitb32a9912015-03-29 16:33:04 +03004704 if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004705 return false;
4706 if (ldtr.type != 2)
4707 return false;
4708 if (!ldtr.present)
4709 return false;
4710
4711 return true;
4712}
4713
4714static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
4715{
4716 struct kvm_segment cs, ss;
4717
4718 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4719 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4720
Nadav Amitb32a9912015-03-29 16:33:04 +03004721 return ((cs.selector & SEGMENT_RPL_MASK) ==
4722 (ss.selector & SEGMENT_RPL_MASK));
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004723}
4724
4725/*
4726 * Check if guest state is valid. Returns true if valid, false if
4727 * not.
4728 * We assume that registers are always usable
4729 */
4730static bool guest_state_valid(struct kvm_vcpu *vcpu)
4731{
Gleb Natapovc5e97c82013-01-21 15:36:43 +02004732 if (enable_unrestricted_guest)
4733 return true;
4734
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004735 /* real mode guest state checks */
Gleb Natapovf13882d2013-04-14 16:07:37 +03004736 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03004737 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
4738 return false;
4739 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
4740 return false;
4741 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
4742 return false;
4743 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
4744 return false;
4745 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
4746 return false;
4747 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
4748 return false;
4749 } else {
4750 /* protected mode guest state checks */
4751 if (!cs_ss_rpl_check(vcpu))
4752 return false;
4753 if (!code_segment_valid(vcpu))
4754 return false;
4755 if (!stack_segment_valid(vcpu))
4756 return false;
4757 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
4758 return false;
4759 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
4760 return false;
4761 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
4762 return false;
4763 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
4764 return false;
4765 if (!tr_valid(vcpu))
4766 return false;
4767 if (!ldtr_valid(vcpu))
4768 return false;
4769 }
4770 /* TODO:
4771 * - Add checks on RIP
4772 * - Add checks on RFLAGS
4773 */
4774
4775 return true;
4776}
4777
Jim Mattson5fa99cb2017-07-06 16:33:07 -07004778static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
4779{
4780 return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
4781}
4782
Mike Dayd77c26f2007-10-08 09:02:08 -04004783static int init_rmode_tss(struct kvm *kvm)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004784{
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08004785 gfn_t fn;
Izik Eidus195aefd2007-10-01 22:14:18 +02004786 u16 data = 0;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02004787 int idx, r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004788
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08004789 idx = srcu_read_lock(&kvm->srcu);
Jan Kiszka4918c6c2013-03-15 08:38:56 +01004790 fn = kvm->arch.tss_addr >> PAGE_SHIFT;
Izik Eidus195aefd2007-10-01 22:14:18 +02004791 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4792 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004793 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02004794 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
Sheng Yang464d17c2008-08-13 14:10:33 +08004795 r = kvm_write_guest_page(kvm, fn++, &data,
4796 TSS_IOPB_BASE_OFFSET, sizeof(u16));
Izik Eidus195aefd2007-10-01 22:14:18 +02004797 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004798 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02004799 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
4800 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004801 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02004802 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4803 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004804 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02004805 data = ~0;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004806 r = kvm_write_guest_page(kvm, fn, &data,
4807 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
4808 sizeof(u8));
Marcelo Tosatti10589a42007-12-20 19:18:22 -05004809out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08004810 srcu_read_unlock(&kvm->srcu, idx);
Paolo Bonzini1f755a82014-09-16 13:37:40 +02004811 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004812}
4813
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004814static int init_rmode_identity_map(struct kvm *kvm)
4815{
Tang Chenf51770e2014-09-16 18:41:59 +08004816 int i, idx, r = 0;
Dan Williamsba049e92016-01-15 16:56:11 -08004817 kvm_pfn_t identity_map_pfn;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004818 u32 tmp;
4819
Tang Chena255d472014-09-16 18:41:58 +08004820 /* Protect kvm->arch.ept_identity_pagetable_done. */
4821 mutex_lock(&kvm->slots_lock);
4822
Tang Chenf51770e2014-09-16 18:41:59 +08004823 if (likely(kvm->arch.ept_identity_pagetable_done))
Tang Chena255d472014-09-16 18:41:58 +08004824 goto out2;
Tang Chena255d472014-09-16 18:41:58 +08004825
David Hildenbrandd8a6e362017-08-24 20:51:34 +02004826 if (!kvm->arch.ept_identity_map_addr)
4827 kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
Sheng Yangb927a3c2009-07-21 10:42:48 +08004828 identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
Tang Chena255d472014-09-16 18:41:58 +08004829
David Hildenbrandd8a6e362017-08-24 20:51:34 +02004830 r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
4831 kvm->arch.ept_identity_map_addr, PAGE_SIZE);
Tang Chenf51770e2014-09-16 18:41:59 +08004832 if (r < 0)
Tang Chena255d472014-09-16 18:41:58 +08004833 goto out2;
4834
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08004835 idx = srcu_read_lock(&kvm->srcu);
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004836 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
4837 if (r < 0)
4838 goto out;
4839 /* Set up identity-mapping pagetable for EPT in real mode */
4840 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
4841 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
4842 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
4843 r = kvm_write_guest_page(kvm, identity_map_pfn,
4844 &tmp, i * sizeof(tmp), sizeof(tmp));
4845 if (r < 0)
4846 goto out;
4847 }
4848 kvm->arch.ept_identity_pagetable_done = true;
Tang Chenf51770e2014-09-16 18:41:59 +08004849
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004850out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08004851 srcu_read_unlock(&kvm->srcu, idx);
Tang Chena255d472014-09-16 18:41:58 +08004852
4853out2:
4854 mutex_unlock(&kvm->slots_lock);
Tang Chenf51770e2014-09-16 18:41:59 +08004855 return r;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004856}
4857
Avi Kivity6aa8b732006-12-10 02:21:36 -08004858static void seg_setup(int seg)
4859{
Mathias Krause772e0312012-08-30 01:30:19 +02004860 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004861 unsigned int ar;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004862
4863 vmcs_write16(sf->selector, 0);
4864 vmcs_writel(sf->base, 0);
4865 vmcs_write32(sf->limit, 0xffff);
Gleb Natapovd54d07b2012-12-20 16:57:46 +02004866 ar = 0x93;
4867 if (seg == VCPU_SREG_CS)
4868 ar |= 0x08; /* code segment */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004869
4870 vmcs_write32(sf->ar_bytes, ar);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004871}
4872
Sheng Yangf78e0e22007-10-29 09:40:42 +08004873static int alloc_apic_access_page(struct kvm *kvm)
4874{
Xiao Guangrong44841412012-09-07 14:14:20 +08004875 struct page *page;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004876 int r = 0;
4877
Marcelo Tosatti79fac952009-12-23 14:35:26 -02004878 mutex_lock(&kvm->slots_lock);
Tang Chenc24ae0d2014-09-24 15:57:58 +08004879 if (kvm->arch.apic_access_page_done)
Sheng Yangf78e0e22007-10-29 09:40:42 +08004880 goto out;
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02004881 r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
4882 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
Sheng Yangf78e0e22007-10-29 09:40:42 +08004883 if (r)
4884 goto out;
Izik Eidus72dc67a2008-02-10 18:04:15 +02004885
Tang Chen73a6d942014-09-11 13:38:00 +08004886 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
Xiao Guangrong44841412012-09-07 14:14:20 +08004887 if (is_error_page(page)) {
4888 r = -EFAULT;
4889 goto out;
4890 }
4891
Tang Chenc24ae0d2014-09-24 15:57:58 +08004892 /*
4893 * Do not pin the page in memory, so that memory hot-unplug
4894 * is able to migrate it.
4895 */
4896 put_page(page);
4897 kvm->arch.apic_access_page_done = true;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004898out:
Marcelo Tosatti79fac952009-12-23 14:35:26 -02004899 mutex_unlock(&kvm->slots_lock);
Sheng Yangf78e0e22007-10-29 09:40:42 +08004900 return r;
4901}
4902
Wanpeng Li991e7a02015-09-16 17:30:05 +08004903static int allocate_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08004904{
4905 int vpid;
4906
Avi Kivity919818a2009-03-23 18:01:29 +02004907 if (!enable_vpid)
Wanpeng Li991e7a02015-09-16 17:30:05 +08004908 return 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08004909 spin_lock(&vmx_vpid_lock);
4910 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
Wanpeng Li991e7a02015-09-16 17:30:05 +08004911 if (vpid < VMX_NR_VPIDS)
Sheng Yang2384d2b2008-01-17 15:14:33 +08004912 __set_bit(vpid, vmx_vpid_bitmap);
Wanpeng Li991e7a02015-09-16 17:30:05 +08004913 else
4914 vpid = 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08004915 spin_unlock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08004916 return vpid;
Sheng Yang2384d2b2008-01-17 15:14:33 +08004917}
4918
Wanpeng Li991e7a02015-09-16 17:30:05 +08004919static void free_vpid(int vpid)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08004920{
Wanpeng Li991e7a02015-09-16 17:30:05 +08004921 if (!enable_vpid || vpid == 0)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08004922 return;
4923 spin_lock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08004924 __clear_bit(vpid, vmx_vpid_bitmap);
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08004925 spin_unlock(&vmx_vpid_lock);
4926}
4927
Yang Zhang8d146952013-01-25 10:18:50 +08004928#define MSR_TYPE_R 1
4929#define MSR_TYPE_W 2
4930static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
4931 u32 msr, int type)
Sheng Yang25c5f222008-03-28 13:18:56 +08004932{
Avi Kivity3e7c73e2009-02-24 21:46:19 +02004933 int f = sizeof(unsigned long);
Sheng Yang25c5f222008-03-28 13:18:56 +08004934
4935 if (!cpu_has_vmx_msr_bitmap())
4936 return;
4937
4938 /*
4939 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4940 * have the write-low and read-high bitmap offsets the wrong way round.
4941 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4942 */
Sheng Yang25c5f222008-03-28 13:18:56 +08004943 if (msr <= 0x1fff) {
Yang Zhang8d146952013-01-25 10:18:50 +08004944 if (type & MSR_TYPE_R)
4945 /* read-low */
4946 __clear_bit(msr, msr_bitmap + 0x000 / f);
4947
4948 if (type & MSR_TYPE_W)
4949 /* write-low */
4950 __clear_bit(msr, msr_bitmap + 0x800 / f);
4951
Sheng Yang25c5f222008-03-28 13:18:56 +08004952 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4953 msr &= 0x1fff;
Yang Zhang8d146952013-01-25 10:18:50 +08004954 if (type & MSR_TYPE_R)
4955 /* read-high */
4956 __clear_bit(msr, msr_bitmap + 0x400 / f);
4957
4958 if (type & MSR_TYPE_W)
4959 /* write-high */
4960 __clear_bit(msr, msr_bitmap + 0xc00 / f);
4961
4962 }
4963}
4964
Wincy Vanf2b93282015-02-03 23:56:03 +08004965/*
4966 * If a msr is allowed by L0, we should check whether it is allowed by L1.
4967 * The corresponding bit will be cleared unless both of L0 and L1 allow it.
4968 */
4969static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
4970 unsigned long *msr_bitmap_nested,
4971 u32 msr, int type)
4972{
4973 int f = sizeof(unsigned long);
4974
4975 if (!cpu_has_vmx_msr_bitmap()) {
4976 WARN_ON(1);
4977 return;
4978 }
4979
4980 /*
4981 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4982 * have the write-low and read-high bitmap offsets the wrong way round.
4983 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4984 */
4985 if (msr <= 0x1fff) {
4986 if (type & MSR_TYPE_R &&
4987 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
4988 /* read-low */
4989 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
4990
4991 if (type & MSR_TYPE_W &&
4992 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
4993 /* write-low */
4994 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
4995
4996 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4997 msr &= 0x1fff;
4998 if (type & MSR_TYPE_R &&
4999 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
5000 /* read-high */
5001 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
5002
5003 if (type & MSR_TYPE_W &&
5004 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
5005 /* write-high */
5006 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
5007
5008 }
5009}
5010
Avi Kivity58972972009-02-24 22:26:47 +02005011static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
5012{
5013 if (!longmode_only)
Yang Zhang8d146952013-01-25 10:18:50 +08005014 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
5015 msr, MSR_TYPE_R | MSR_TYPE_W);
5016 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
5017 msr, MSR_TYPE_R | MSR_TYPE_W);
5018}
5019
Radim Krčmář2e69f862016-09-29 22:41:32 +02005020static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
Yang Zhang8d146952013-01-25 10:18:50 +08005021{
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005022 if (apicv_active) {
Wanpeng Lic63e4562016-09-23 19:17:16 +08005023 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
Radim Krčmář2e69f862016-09-29 22:41:32 +02005024 msr, type);
Wanpeng Lic63e4562016-09-23 19:17:16 +08005025 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
Radim Krčmář2e69f862016-09-29 22:41:32 +02005026 msr, type);
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005027 } else {
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005028 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
Radim Krčmář2e69f862016-09-29 22:41:32 +02005029 msr, type);
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005030 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
Radim Krčmář2e69f862016-09-29 22:41:32 +02005031 msr, type);
Wanpeng Lif6e90f92016-09-22 07:43:25 +08005032 }
Avi Kivity58972972009-02-24 22:26:47 +02005033}
5034
Suravee Suthikulpanitb2a05fe2017-09-12 10:42:41 -05005035static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02005036{
Andrey Smetanind62caab2015-11-10 15:36:33 +03005037 return enable_apicv;
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02005038}
5039
David Matlackc9f04402017-08-01 14:00:40 -07005040static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
5041{
5042 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5043 gfn_t gfn;
5044
5045 /*
5046 * Don't need to mark the APIC access page dirty; it is never
5047 * written to by the CPU during APIC virtualization.
5048 */
5049
5050 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
5051 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
5052 kvm_vcpu_mark_page_dirty(vcpu, gfn);
5053 }
5054
5055 if (nested_cpu_has_posted_intr(vmcs12)) {
5056 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
5057 kvm_vcpu_mark_page_dirty(vcpu, gfn);
5058 }
5059}
5060
5061
David Hildenbrand6342c502017-01-25 11:58:58 +01005062static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Wincy Van705699a2015-02-03 23:58:17 +08005063{
5064 struct vcpu_vmx *vmx = to_vmx(vcpu);
5065 int max_irr;
5066 void *vapic_page;
5067 u16 status;
5068
David Matlackc9f04402017-08-01 14:00:40 -07005069 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
5070 return;
Wincy Van705699a2015-02-03 23:58:17 +08005071
David Matlackc9f04402017-08-01 14:00:40 -07005072 vmx->nested.pi_pending = false;
5073 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
5074 return;
Wincy Van705699a2015-02-03 23:58:17 +08005075
David Matlackc9f04402017-08-01 14:00:40 -07005076 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
5077 if (max_irr != 256) {
Wincy Van705699a2015-02-03 23:58:17 +08005078 vapic_page = kmap(vmx->nested.virtual_apic_page);
Wincy Van705699a2015-02-03 23:58:17 +08005079 __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
5080 kunmap(vmx->nested.virtual_apic_page);
5081
5082 status = vmcs_read16(GUEST_INTR_STATUS);
5083 if ((u8)max_irr > ((u8)status & 0xff)) {
5084 status &= ~0xff;
5085 status |= (u8)max_irr;
5086 vmcs_write16(GUEST_INTR_STATUS, status);
5087 }
5088 }
David Matlackc9f04402017-08-01 14:00:40 -07005089
5090 nested_mark_vmcs12_pages_dirty(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08005091}
5092
Wincy Van06a55242017-04-28 13:13:59 +08005093static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
5094 bool nested)
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01005095{
5096#ifdef CONFIG_SMP
Wincy Van06a55242017-04-28 13:13:59 +08005097 int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
5098
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01005099 if (vcpu->mode == IN_GUEST_MODE) {
Feng Wu28b835d2015-09-18 22:29:54 +08005100 /*
Haozhong Zhang5753743f2017-09-18 09:56:50 +08005101 * The vector of interrupt to be delivered to vcpu had
5102 * been set in PIR before this function.
Feng Wu28b835d2015-09-18 22:29:54 +08005103 *
Haozhong Zhang5753743f2017-09-18 09:56:50 +08005104 * Following cases will be reached in this block, and
5105 * we always send a notification event in all cases as
5106 * explained below.
5107 *
5108 * Case 1: vcpu keeps in non-root mode. Sending a
5109 * notification event posts the interrupt to vcpu.
5110 *
5111 * Case 2: vcpu exits to root mode and is still
5112 * runnable. PIR will be synced to vIRR before the
5113 * next vcpu entry. Sending a notification event in
5114 * this case has no effect, as vcpu is not in root
5115 * mode.
5116 *
5117 * Case 3: vcpu exits to root mode and is blocked.
5118 * vcpu_block() has already synced PIR to vIRR and
5119 * never blocks vcpu if vIRR is not cleared. Therefore,
5120 * a blocked vcpu here does not wait for any requested
5121 * interrupts in PIR, and sending a notification event
5122 * which has no effect is safe here.
Feng Wu28b835d2015-09-18 22:29:54 +08005123 */
Feng Wu28b835d2015-09-18 22:29:54 +08005124
Wincy Van06a55242017-04-28 13:13:59 +08005125 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01005126 return true;
5127 }
5128#endif
5129 return false;
5130}
5131
Wincy Van705699a2015-02-03 23:58:17 +08005132static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
5133 int vector)
5134{
5135 struct vcpu_vmx *vmx = to_vmx(vcpu);
5136
5137 if (is_guest_mode(vcpu) &&
5138 vector == vmx->nested.posted_intr_nv) {
5139 /* the PIR and ON have been set by L1. */
Wincy Van06a55242017-04-28 13:13:59 +08005140 kvm_vcpu_trigger_posted_interrupt(vcpu, true);
Wincy Van705699a2015-02-03 23:58:17 +08005141 /*
5142 * If a posted intr is not recognized by hardware,
5143 * we will accomplish it in the next vmentry.
5144 */
5145 vmx->nested.pi_pending = true;
5146 kvm_make_request(KVM_REQ_EVENT, vcpu);
5147 return 0;
5148 }
5149 return -1;
5150}
Avi Kivity6aa8b732006-12-10 02:21:36 -08005151/*
Yang Zhanga20ed542013-04-11 19:25:15 +08005152 * Send interrupt to vcpu via posted interrupt way.
5153 * 1. If target vcpu is running(non-root mode), send posted interrupt
5154 * notification to vcpu and hardware will sync PIR to vIRR atomically.
5155 * 2. If target vcpu isn't running(root mode), kick it to pick up the
5156 * interrupt from PIR in next vmentry.
5157 */
5158static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
5159{
5160 struct vcpu_vmx *vmx = to_vmx(vcpu);
5161 int r;
5162
Wincy Van705699a2015-02-03 23:58:17 +08005163 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
5164 if (!r)
5165 return;
5166
Yang Zhanga20ed542013-04-11 19:25:15 +08005167 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
5168 return;
5169
Paolo Bonzinib95234c2016-12-19 13:57:33 +01005170 /* If a previous notification has sent the IPI, nothing to do. */
5171 if (pi_test_and_set_on(&vmx->pi_desc))
5172 return;
5173
Wincy Van06a55242017-04-28 13:13:59 +08005174 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
Yang Zhanga20ed542013-04-11 19:25:15 +08005175 kvm_vcpu_kick(vcpu);
5176}
5177
Avi Kivity6aa8b732006-12-10 02:21:36 -08005178/*
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005179 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
5180 * will not change in the lifetime of the guest.
5181 * Note that host-state that does change is set elsewhere. E.g., host-state
5182 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
5183 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08005184static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005185{
5186 u32 low32, high32;
5187 unsigned long tmpl;
5188 struct desc_ptr dt;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07005189 unsigned long cr0, cr3, cr4;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005190
Andy Lutomirski04ac88a2016-10-31 15:18:45 -07005191 cr0 = read_cr0();
5192 WARN_ON(cr0 & X86_CR0_TS);
5193 vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07005194
5195 /*
5196 * Save the most likely value for this task's CR3 in the VMCS.
5197 * We can't use __get_current_cr3_fast() because we're not atomic.
5198 */
Andy Lutomirski6c690ee2017-06-12 10:26:14 -07005199 cr3 = __read_cr3();
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07005200 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
Ladi Prosek44889942017-09-22 07:53:15 +02005201 vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005202
Andy Lutomirskid974baa2014-10-08 09:02:13 -07005203 /* Save the most likely value for this task's CR4 in the VMCS. */
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07005204 cr4 = cr4_read_shadow();
Andy Lutomirskid974baa2014-10-08 09:02:13 -07005205 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
Ladi Prosek44889942017-09-22 07:53:15 +02005206 vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -07005207
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005208 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03005209#ifdef CONFIG_X86_64
5210 /*
5211 * Load null selectors, so we can avoid reloading them in
5212 * __vmx_load_host_state(), in case userspace uses the null selectors
5213 * too (the expected case).
5214 */
5215 vmcs_write16(HOST_DS_SELECTOR, 0);
5216 vmcs_write16(HOST_ES_SELECTOR, 0);
5217#else
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005218 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
5219 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03005220#endif
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005221 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
5222 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
5223
Juergen Gross87930012017-09-04 12:25:27 +02005224 store_idt(&dt);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005225 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08005226 vmx->host_idt_base = dt.address;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005227
Avi Kivity83287ea422012-09-16 15:10:57 +03005228 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005229
5230 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
5231 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
5232 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
5233 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
5234
5235 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
5236 rdmsr(MSR_IA32_CR_PAT, low32, high32);
5237 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
5238 }
5239}
5240
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005241static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
5242{
5243 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
5244 if (enable_ept)
5245 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03005246 if (is_guest_mode(&vmx->vcpu))
5247 vmx->vcpu.arch.cr4_guest_owned_bits &=
5248 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005249 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
5250}
5251
Yang Zhang01e439b2013-04-11 19:25:12 +08005252static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
5253{
5254 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
5255
Andrey Smetanind62caab2015-11-10 15:36:33 +03005256 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
Yang Zhang01e439b2013-04-11 19:25:12 +08005257 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005258
5259 if (!enable_vnmi)
5260 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
5261
Yunhong Jiang64672c92016-06-13 14:19:59 -07005262 /* Enable the preemption timer dynamically */
5263 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08005264 return pin_based_exec_ctrl;
5265}
5266
Andrey Smetanind62caab2015-11-10 15:36:33 +03005267static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5268{
5269 struct vcpu_vmx *vmx = to_vmx(vcpu);
5270
5271 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Roman Kagan3ce424e2016-05-18 17:48:20 +03005272 if (cpu_has_secondary_exec_ctrls()) {
5273 if (kvm_vcpu_apicv_active(vcpu))
5274 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
5275 SECONDARY_EXEC_APIC_REGISTER_VIRT |
5276 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
5277 else
5278 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
5279 SECONDARY_EXEC_APIC_REGISTER_VIRT |
5280 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
5281 }
5282
5283 if (cpu_has_vmx_msr_bitmap())
5284 vmx_set_msr_bitmap(vcpu);
Andrey Smetanind62caab2015-11-10 15:36:33 +03005285}
5286
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005287static u32 vmx_exec_control(struct vcpu_vmx *vmx)
5288{
5289 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
Paolo Bonzinid16c2932014-02-21 10:36:37 +01005290
5291 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
5292 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
5293
Paolo Bonzini35754c92015-07-29 12:05:37 +02005294 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005295 exec_control &= ~CPU_BASED_TPR_SHADOW;
5296#ifdef CONFIG_X86_64
5297 exec_control |= CPU_BASED_CR8_STORE_EXITING |
5298 CPU_BASED_CR8_LOAD_EXITING;
5299#endif
5300 }
5301 if (!enable_ept)
5302 exec_control |= CPU_BASED_CR3_STORE_EXITING |
5303 CPU_BASED_CR3_LOAD_EXITING |
5304 CPU_BASED_INVLPG_EXITING;
5305 return exec_control;
5306}
5307
Jim Mattson45ec3682017-08-23 16:32:04 -07005308static bool vmx_rdrand_supported(void)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005309{
Jim Mattson45ec3682017-08-23 16:32:04 -07005310 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02005311 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07005312}
5313
Jim Mattson75f4fc82017-08-23 16:32:03 -07005314static bool vmx_rdseed_supported(void)
5315{
5316 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02005317 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07005318}
5319
Paolo Bonzini80154d72017-08-24 13:55:35 +02005320static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005321{
Paolo Bonzini80154d72017-08-24 13:55:35 +02005322 struct kvm_vcpu *vcpu = &vmx->vcpu;
5323
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005324 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
Paolo Bonzini0367f202016-07-12 10:44:55 +02005325
Paolo Bonzini80154d72017-08-24 13:55:35 +02005326 if (!cpu_need_virtualize_apic_accesses(vcpu))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005327 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
5328 if (vmx->vpid == 0)
5329 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
5330 if (!enable_ept) {
5331 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
5332 enable_unrestricted_guest = 0;
Mao, Junjiead756a12012-07-02 01:18:48 +00005333 /* Enable INVPCID for non-ept guests may cause performance regression. */
5334 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005335 }
5336 if (!enable_unrestricted_guest)
5337 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
5338 if (!ple_gap)
5339 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
Paolo Bonzini80154d72017-08-24 13:55:35 +02005340 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhangc7c9c562013-01-25 10:18:51 +08005341 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
5342 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang8d146952013-01-25 10:18:50 +08005343 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02005344
5345 /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
5346 * in vmx_set_cr4. */
5347 exec_control &= ~SECONDARY_EXEC_DESC;
5348
Abel Gordonabc4fc52013-04-18 14:35:25 +03005349 /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
5350 (handle_vmptrld).
5351 We can NOT enable shadow_vmcs here because we don't have yet
5352 a current VMCS12
5353 */
5354 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
Kai Huanga3eaa862015-11-04 13:46:05 +08005355
5356 if (!enable_pml)
5357 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Kai Huang843e4332015-01-28 10:54:28 +08005358
Paolo Bonzini3db13482017-08-24 14:48:03 +02005359 if (vmx_xsaves_supported()) {
5360 /* Exposing XSAVES only when XSAVE is exposed */
5361 bool xsaves_enabled =
5362 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
5363 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
5364
5365 if (!xsaves_enabled)
5366 exec_control &= ~SECONDARY_EXEC_XSAVES;
5367
5368 if (nested) {
5369 if (xsaves_enabled)
5370 vmx->nested.nested_vmx_secondary_ctls_high |=
5371 SECONDARY_EXEC_XSAVES;
5372 else
5373 vmx->nested.nested_vmx_secondary_ctls_high &=
5374 ~SECONDARY_EXEC_XSAVES;
5375 }
5376 }
5377
Paolo Bonzini80154d72017-08-24 13:55:35 +02005378 if (vmx_rdtscp_supported()) {
5379 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
5380 if (!rdtscp_enabled)
5381 exec_control &= ~SECONDARY_EXEC_RDTSCP;
5382
5383 if (nested) {
5384 if (rdtscp_enabled)
5385 vmx->nested.nested_vmx_secondary_ctls_high |=
5386 SECONDARY_EXEC_RDTSCP;
5387 else
5388 vmx->nested.nested_vmx_secondary_ctls_high &=
5389 ~SECONDARY_EXEC_RDTSCP;
5390 }
5391 }
5392
5393 if (vmx_invpcid_supported()) {
5394 /* Exposing INVPCID only when PCID is exposed */
5395 bool invpcid_enabled =
5396 guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
5397 guest_cpuid_has(vcpu, X86_FEATURE_PCID);
5398
5399 if (!invpcid_enabled) {
5400 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
5401 guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
5402 }
5403
5404 if (nested) {
5405 if (invpcid_enabled)
5406 vmx->nested.nested_vmx_secondary_ctls_high |=
5407 SECONDARY_EXEC_ENABLE_INVPCID;
5408 else
5409 vmx->nested.nested_vmx_secondary_ctls_high &=
5410 ~SECONDARY_EXEC_ENABLE_INVPCID;
5411 }
5412 }
5413
Jim Mattson45ec3682017-08-23 16:32:04 -07005414 if (vmx_rdrand_supported()) {
5415 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
5416 if (rdrand_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02005417 exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07005418
5419 if (nested) {
5420 if (rdrand_enabled)
5421 vmx->nested.nested_vmx_secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02005422 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07005423 else
5424 vmx->nested.nested_vmx_secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02005425 ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07005426 }
5427 }
5428
Jim Mattson75f4fc82017-08-23 16:32:03 -07005429 if (vmx_rdseed_supported()) {
5430 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
5431 if (rdseed_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02005432 exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07005433
5434 if (nested) {
5435 if (rdseed_enabled)
5436 vmx->nested.nested_vmx_secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02005437 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07005438 else
5439 vmx->nested.nested_vmx_secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02005440 ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07005441 }
5442 }
5443
Paolo Bonzini80154d72017-08-24 13:55:35 +02005444 vmx->secondary_exec_control = exec_control;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005445}
5446
Xiao Guangrongce88dec2011-07-12 03:33:44 +08005447static void ept_set_mmio_spte_mask(void)
5448{
5449 /*
5450 * EPT Misconfigurations can be generated if the value of bits 2:0
5451 * of an EPT paging-structure entry is 110b (write/execute).
Xiao Guangrongce88dec2011-07-12 03:33:44 +08005452 */
Peter Feinerdcdca5f2017-06-30 17:26:30 -07005453 kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
5454 VMX_EPT_MISCONFIG_WX_VALUE);
Xiao Guangrongce88dec2011-07-12 03:33:44 +08005455}
5456
Wanpeng Lif53cd632014-12-02 19:14:58 +08005457#define VMX_XSS_EXIT_BITMAP 0
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03005458/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08005459 * Sets up the vmcs for emulated real mode.
5460 */
David Hildenbrand12d79912017-08-24 20:51:26 +02005461static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005462{
Jan Kiszka2e4ce7f2011-06-01 12:57:30 +02005463#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08005464 unsigned long a;
Jan Kiszka2e4ce7f2011-06-01 12:57:30 +02005465#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -08005466 int i;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005467
Abel Gordon4607c2d2013-04-18 14:35:55 +03005468 if (enable_shadow_vmcs) {
5469 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
5470 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
5471 }
Sheng Yang25c5f222008-03-28 13:18:56 +08005472 if (cpu_has_vmx_msr_bitmap())
Avi Kivity58972972009-02-24 22:26:47 +02005473 vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
Sheng Yang25c5f222008-03-28 13:18:56 +08005474
Avi Kivity6aa8b732006-12-10 02:21:36 -08005475 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
5476
Avi Kivity6aa8b732006-12-10 02:21:36 -08005477 /* Control */
Yang Zhang01e439b2013-04-11 19:25:12 +08005478 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Yunhong Jiang64672c92016-06-13 14:19:59 -07005479 vmx->hv_deadline_tsc = -1;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08005480
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005481 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
Avi Kivity6aa8b732006-12-10 02:21:36 -08005482
Dan Williamsdfa169b2016-06-02 11:17:24 -07005483 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +02005484 vmx_compute_secondary_exec_control(vmx);
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005485 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini80154d72017-08-24 13:55:35 +02005486 vmx->secondary_exec_control);
Dan Williamsdfa169b2016-06-02 11:17:24 -07005487 }
Sheng Yangf78e0e22007-10-29 09:40:42 +08005488
Andrey Smetanind62caab2015-11-10 15:36:33 +03005489 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08005490 vmcs_write64(EOI_EXIT_BITMAP0, 0);
5491 vmcs_write64(EOI_EXIT_BITMAP1, 0);
5492 vmcs_write64(EOI_EXIT_BITMAP2, 0);
5493 vmcs_write64(EOI_EXIT_BITMAP3, 0);
5494
5495 vmcs_write16(GUEST_INTR_STATUS, 0);
Yang Zhang01e439b2013-04-11 19:25:12 +08005496
Li RongQing0bcf2612015-12-03 13:29:34 +08005497 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
Yang Zhang01e439b2013-04-11 19:25:12 +08005498 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
Yang Zhangc7c9c562013-01-25 10:18:51 +08005499 }
5500
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08005501 if (ple_gap) {
5502 vmcs_write32(PLE_GAP, ple_gap);
Radim Krčmářa7653ec2014-08-21 18:08:07 +02005503 vmx->ple_window = ple_window;
5504 vmx->ple_window_dirty = true;
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08005505 }
5506
Xiao Guangrongc3707952011-07-12 03:28:04 +08005507 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
5508 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005509 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
5510
Avi Kivity9581d442010-10-19 16:46:55 +02005511 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
5512 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08005513 vmx_set_constant_host_state(vmx);
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005514#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08005515 rdmsrl(MSR_FS_BASE, a);
5516 vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
5517 rdmsrl(MSR_GS_BASE, a);
5518 vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
5519#else
5520 vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
5521 vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
5522#endif
5523
Bandan Das2a499e42017-08-03 15:54:41 -04005524 if (cpu_has_vmx_vmfunc())
5525 vmcs_write64(VM_FUNCTION_CONTROL, 0);
5526
Eddie Dong2cc51562007-05-21 07:28:09 +03005527 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
5528 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
Avi Kivity61d2ef22010-04-28 16:40:38 +03005529 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
Eddie Dong2cc51562007-05-21 07:28:09 +03005530 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
Avi Kivity61d2ef22010-04-28 16:40:38 +03005531 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
Avi Kivity6aa8b732006-12-10 02:21:36 -08005532
Radim Krčmář74545702015-04-27 15:11:25 +02005533 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
5534 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Sheng Yang468d4722008-10-09 16:01:55 +08005535
Paolo Bonzini03916db2014-07-24 14:21:57 +02005536 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08005537 u32 index = vmx_msr_index[i];
5538 u32 data_low, data_high;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04005539 int j = vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005540
5541 if (rdmsr_safe(index, &data_low, &data_high) < 0)
5542 continue;
Avi Kivity432bd6c2007-01-31 23:48:13 -08005543 if (wrmsr_safe(index, data_low, data_high) < 0)
5544 continue;
Avi Kivity26bb0982009-09-07 11:14:12 +03005545 vmx->guest_msrs[j].index = i;
5546 vmx->guest_msrs[j].data = 0;
Avi Kivityd5696722009-12-02 12:28:47 +02005547 vmx->guest_msrs[j].mask = -1ull;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04005548 ++vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005549 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08005550
Gleb Natapov2961e8762013-11-25 15:37:13 +02005551
5552 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005553
5554 /* 22.2.1, 20.8.1 */
Gleb Natapov2961e8762013-11-25 15:37:13 +02005555 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03005556
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08005557 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
5558 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
5559
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03005560 set_cr4_guest_host_mask(vmx);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005561
Wanpeng Lif53cd632014-12-02 19:14:58 +08005562 if (vmx_xsaves_supported())
5563 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
5564
Peter Feiner4e595162016-07-07 14:49:58 -07005565 if (enable_pml) {
5566 ASSERT(vmx->pml_pg);
5567 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
5568 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5569 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005570}
5571
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005572static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005573{
5574 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka58cb6282014-01-24 16:48:44 +01005575 struct msr_data apic_base_msr;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005576 u64 cr0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005577
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005578 vmx->rmode.vm86_active = 0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005579
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005580 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005581 kvm_set_cr8(vcpu, 0);
5582
5583 if (!init_event) {
5584 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
5585 MSR_IA32_APICBASE_ENABLE;
5586 if (kvm_vcpu_is_reset_bsp(vcpu))
5587 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
5588 apic_base_msr.host_initiated = true;
5589 kvm_set_apic_base(vcpu, &apic_base_msr);
5590 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005591
Avi Kivity2fb92db2011-04-27 19:42:18 +03005592 vmx_segment_cache_clear(vmx);
5593
Avi Kivity5706be02008-08-20 15:07:31 +03005594 seg_setup(VCPU_SREG_CS);
Jan Kiszka66450a22013-03-13 12:42:34 +01005595 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
Paolo Bonzinif3531052015-12-03 15:49:56 +01005596 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005597
5598 seg_setup(VCPU_SREG_DS);
5599 seg_setup(VCPU_SREG_ES);
5600 seg_setup(VCPU_SREG_FS);
5601 seg_setup(VCPU_SREG_GS);
5602 seg_setup(VCPU_SREG_SS);
5603
5604 vmcs_write16(GUEST_TR_SELECTOR, 0);
5605 vmcs_writel(GUEST_TR_BASE, 0);
5606 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
5607 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5608
5609 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
5610 vmcs_writel(GUEST_LDTR_BASE, 0);
5611 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
5612 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
5613
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005614 if (!init_event) {
5615 vmcs_write32(GUEST_SYSENTER_CS, 0);
5616 vmcs_writel(GUEST_SYSENTER_ESP, 0);
5617 vmcs_writel(GUEST_SYSENTER_EIP, 0);
5618 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5619 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005620
Wanpeng Lic37c2872017-11-20 14:52:21 -08005621 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
Jan Kiszka66450a22013-03-13 12:42:34 +01005622 kvm_rip_write(vcpu, 0xfff0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005623
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005624 vmcs_writel(GUEST_GDTR_BASE, 0);
5625 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
5626
5627 vmcs_writel(GUEST_IDTR_BASE, 0);
5628 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
5629
Anthony Liguori443381a2010-12-06 10:53:38 -06005630 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005631 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
Paolo Bonzinif3531052015-12-03 15:49:56 +01005632 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
Wanpeng Lia554d202017-10-11 05:10:19 -07005633 if (kvm_mpx_supported())
5634 vmcs_write64(GUEST_BNDCFGS, 0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005635
Avi Kivitye00c8cf2007-10-21 11:00:39 +02005636 setup_msrs(vmx);
5637
Avi Kivity6aa8b732006-12-10 02:21:36 -08005638 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
5639
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005640 if (cpu_has_vmx_tpr_shadow() && !init_event) {
Sheng Yangf78e0e22007-10-29 09:40:42 +08005641 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
Paolo Bonzini35754c92015-07-29 12:05:37 +02005642 if (cpu_need_tpr_shadow(vcpu))
Sheng Yangf78e0e22007-10-29 09:40:42 +08005643 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005644 __pa(vcpu->arch.apic->regs));
Sheng Yangf78e0e22007-10-29 09:40:42 +08005645 vmcs_write32(TPR_THRESHOLD, 0);
5646 }
5647
Paolo Bonzinia73896c2014-11-02 07:54:30 +01005648 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005649
Sheng Yang2384d2b2008-01-17 15:14:33 +08005650 if (vmx->vpid != 0)
5651 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
5652
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005653 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005654 vmx->vcpu.arch.cr0 = cr0;
Bruce Rogersf2463242016-04-28 14:49:21 -06005655 vmx_set_cr0(vcpu, cr0); /* enter rmode */
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005656 vmx_set_cr4(vcpu, 0);
Paolo Bonzini56908912015-10-19 11:30:19 +02005657 vmx_set_efer(vcpu, 0);
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08005658
Nadav Amitd28bc9d2015-04-13 14:34:08 +03005659 update_exception_bitmap(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005660
Wanpeng Lidd5f5342015-09-23 18:26:57 +08005661 vpid_sync_context(vmx->vpid);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005662}
5663
Nadav Har'Elb6f12502011-05-25 23:13:06 +03005664/*
5665 * In nested virtualization, check if L1 asked to exit on external interrupts.
5666 * For most existing hypervisors, this will always return true.
5667 */
5668static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
5669{
5670 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
5671 PIN_BASED_EXT_INTR_MASK;
5672}
5673
Bandan Das77b0f5d2014-04-19 18:17:45 -04005674/*
5675 * In nested virtualization, check if L1 has set
5676 * VM_EXIT_ACK_INTR_ON_EXIT
5677 */
5678static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
5679{
5680 return get_vmcs12(vcpu)->vm_exit_controls &
5681 VM_EXIT_ACK_INTR_ON_EXIT;
5682}
5683
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02005684static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
5685{
5686 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
5687 PIN_BASED_NMI_EXITING;
5688}
5689
Jan Kiszkac9a79532014-03-07 20:03:15 +01005690static void enable_irq_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02005691{
Paolo Bonzini47c01522016-12-19 11:44:07 +01005692 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
5693 CPU_BASED_VIRTUAL_INTR_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02005694}
5695
Jan Kiszkac9a79532014-03-07 20:03:15 +01005696static void enable_nmi_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02005697{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005698 if (!enable_vnmi ||
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01005699 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
Jan Kiszkac9a79532014-03-07 20:03:15 +01005700 enable_irq_window(vcpu);
5701 return;
5702 }
Jan Kiszka03b28f82013-04-29 16:46:42 +02005703
Paolo Bonzini47c01522016-12-19 11:44:07 +01005704 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
5705 CPU_BASED_VIRTUAL_NMI_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02005706}
5707
Gleb Natapov66fd3f72009-05-11 13:35:50 +03005708static void vmx_inject_irq(struct kvm_vcpu *vcpu)
Eddie Dong85f455f2007-07-06 12:20:49 +03005709{
Avi Kivity9c8cba32007-11-22 11:42:59 +02005710 struct vcpu_vmx *vmx = to_vmx(vcpu);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03005711 uint32_t intr;
5712 int irq = vcpu->arch.interrupt.nr;
Avi Kivity9c8cba32007-11-22 11:42:59 +02005713
Marcelo Tosatti229456f2009-06-17 09:22:14 -03005714 trace_kvm_inj_virq(irq);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04005715
Avi Kivityfa89a812008-09-01 15:57:51 +03005716 ++vcpu->stat.irq_injections;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005717 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05005718 int inc_eip = 0;
5719 if (vcpu->arch.interrupt.soft)
5720 inc_eip = vcpu->arch.event_exit_inst_len;
5721 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02005722 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03005723 return;
5724 }
Gleb Natapov66fd3f72009-05-11 13:35:50 +03005725 intr = irq | INTR_INFO_VALID_MASK;
5726 if (vcpu->arch.interrupt.soft) {
5727 intr |= INTR_TYPE_SOFT_INTR;
5728 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
5729 vmx->vcpu.arch.event_exit_inst_len);
5730 } else
5731 intr |= INTR_TYPE_EXT_INTR;
5732 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
Eddie Dong85f455f2007-07-06 12:20:49 +03005733}
5734
Sheng Yangf08864b2008-05-15 18:23:25 +08005735static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5736{
Jan Kiszka66a5a342008-09-26 09:30:51 +02005737 struct vcpu_vmx *vmx = to_vmx(vcpu);
5738
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005739 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01005740 /*
5741 * Tracking the NMI-blocked state in software is built upon
5742 * finding the next open IRQ window. This, in turn, depends on
5743 * well-behaving guests: They have to keep IRQs disabled at
5744 * least as long as the NMI handler runs. Otherwise we may
5745 * cause NMI nesting, maybe breaking the guest. But as this is
5746 * highly unlikely, we can live with the residual risk.
5747 */
5748 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
5749 vmx->loaded_vmcs->vnmi_blocked_time = 0;
5750 }
5751
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02005752 ++vcpu->stat.nmi_injections;
5753 vmx->loaded_vmcs->nmi_known_unmasked = false;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02005754
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005755 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05005756 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02005757 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka66a5a342008-09-26 09:30:51 +02005758 return;
5759 }
Wanpeng Lic5a6d5f2016-09-22 17:55:54 +08005760
Sheng Yangf08864b2008-05-15 18:23:25 +08005761 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5762 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
Sheng Yangf08864b2008-05-15 18:23:25 +08005763}
5764
Jan Kiszka3cfc3092009-11-12 01:04:25 +01005765static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
5766{
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02005767 struct vcpu_vmx *vmx = to_vmx(vcpu);
5768 bool masked;
5769
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005770 if (!enable_vnmi)
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01005771 return vmx->loaded_vmcs->soft_vnmi_blocked;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02005772 if (vmx->loaded_vmcs->nmi_known_unmasked)
Avi Kivity9d58b932011-03-07 16:52:07 +02005773 return false;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02005774 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
5775 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5776 return masked;
Jan Kiszka3cfc3092009-11-12 01:04:25 +01005777}
5778
5779static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5780{
5781 struct vcpu_vmx *vmx = to_vmx(vcpu);
5782
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005783 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01005784 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
5785 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
5786 vmx->loaded_vmcs->vnmi_blocked_time = 0;
5787 }
5788 } else {
5789 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5790 if (masked)
5791 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5792 GUEST_INTR_STATE_NMI);
5793 else
5794 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5795 GUEST_INTR_STATE_NMI);
5796 }
Jan Kiszka3cfc3092009-11-12 01:04:25 +01005797}
5798
Jan Kiszka2505dc92013-04-14 12:12:47 +02005799static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
5800{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01005801 if (to_vmx(vcpu)->nested.nested_run_pending)
5802 return 0;
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02005803
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01005804 if (!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01005805 to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
5806 return 0;
5807
Jan Kiszka2505dc92013-04-14 12:12:47 +02005808 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5809 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
5810 | GUEST_INTR_STATE_NMI));
5811}
5812
Gleb Natapov78646122009-03-23 12:12:11 +02005813static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
5814{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01005815 return (!to_vmx(vcpu)->nested.nested_run_pending &&
5816 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
Gleb Natapovc4282df2009-04-21 17:45:07 +03005817 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5818 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
Gleb Natapov78646122009-03-23 12:12:11 +02005819}
5820
Izik Eiduscbc94022007-10-25 00:29:55 +02005821static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
5822{
5823 int ret;
Izik Eiduscbc94022007-10-25 00:29:55 +02005824
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02005825 ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
5826 PAGE_SIZE * 3);
Izik Eiduscbc94022007-10-25 00:29:55 +02005827 if (ret)
5828 return ret;
Zhang Xiantaobfc6d222007-12-14 10:20:16 +08005829 kvm->arch.tss_addr = addr;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005830 return init_rmode_tss(kvm);
Izik Eiduscbc94022007-10-25 00:29:55 +02005831}
5832
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005833static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005834{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02005835 switch (vec) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02005836 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01005837 /*
5838 * Update instruction length as we may reinject the exception
5839 * from user space while in guest debugging mode.
5840 */
5841 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
5842 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01005843 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005844 return false;
5845 /* fall through */
5846 case DB_VECTOR:
5847 if (vcpu->guest_debug &
5848 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5849 return false;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01005850 /* fall through */
5851 case DE_VECTOR:
Jan Kiszka77ab6db2008-07-14 12:28:51 +02005852 case OF_VECTOR:
5853 case BR_VECTOR:
5854 case UD_VECTOR:
5855 case DF_VECTOR:
5856 case SS_VECTOR:
5857 case GP_VECTOR:
5858 case MF_VECTOR:
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005859 return true;
5860 break;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02005861 }
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005862 return false;
5863}
5864
5865static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5866 int vec, u32 err_code)
5867{
5868 /*
5869 * Instruction with address size override prefix opcode 0x67
5870 * Cause the #SS fault with 0 error code in VM86 mode.
5871 */
5872 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
5873 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
5874 if (vcpu->arch.halt_request) {
5875 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06005876 return kvm_vcpu_halt(vcpu);
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005877 }
5878 return 1;
5879 }
5880 return 0;
5881 }
5882
5883 /*
5884 * Forward all other exceptions that are valid in real mode.
5885 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
5886 * the required debugging infrastructure rework.
5887 */
5888 kvm_queue_exception(vcpu, vec);
5889 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005890}
5891
Andi Kleena0861c02009-06-08 17:37:09 +08005892/*
5893 * Trigger machine check on the host. We assume all the MSRs are already set up
5894 * by the CPU and that we still run on the same CPU as the MCE occurred on.
5895 * We pass a fake environment to the machine check handler because we want
5896 * the guest to be always treated like user space, no matter what context
5897 * it used internally.
5898 */
5899static void kvm_machine_check(void)
5900{
5901#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
5902 struct pt_regs regs = {
5903 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
5904 .flags = X86_EFLAGS_IF,
5905 };
5906
5907 do_machine_check(&regs, 0);
5908#endif
5909}
5910
Avi Kivity851ba692009-08-24 11:10:17 +03005911static int handle_machine_check(struct kvm_vcpu *vcpu)
Andi Kleena0861c02009-06-08 17:37:09 +08005912{
5913 /* already handled by vcpu_run */
5914 return 1;
5915}
5916
Avi Kivity851ba692009-08-24 11:10:17 +03005917static int handle_exception(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005918{
Avi Kivity1155f762007-11-22 11:30:47 +02005919 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03005920 struct kvm_run *kvm_run = vcpu->run;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01005921 u32 intr_info, ex_no, error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01005922 unsigned long cr2, rip, dr6;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005923 u32 vect_info;
5924 enum emulation_result er;
5925
Avi Kivity1155f762007-11-22 11:30:47 +02005926 vect_info = vmx->idt_vectoring_info;
Avi Kivity88786472011-03-07 17:39:45 +02005927 intr_info = vmx->exit_intr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005928
Andi Kleena0861c02009-06-08 17:37:09 +08005929 if (is_machine_check(intr_info))
Avi Kivity851ba692009-08-24 11:10:17 +03005930 return handle_machine_check(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08005931
Jim Mattsonef85b672016-12-12 11:01:37 -08005932 if (is_nmi(intr_info))
Avi Kivity1b6269d2007-10-09 12:12:19 +02005933 return 1; /* already handled by vmx_vcpu_run() */
Anthony Liguori2ab455c2007-04-27 09:29:49 +03005934
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05005935 if (is_invalid_opcode(intr_info)) {
Liran Alonac9b3052017-11-06 16:15:10 +02005936 WARN_ON_ONCE(is_guest_mode(vcpu));
Andre Przywara51d8b662010-12-21 11:12:02 +01005937 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
Liran Alon61cb57c2017-11-05 16:56:32 +02005938 if (er == EMULATE_USER_EXIT)
5939 return 0;
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05005940 if (er != EMULATE_DONE)
Avi Kivity7ee5d9402007-11-25 15:22:50 +02005941 kvm_queue_exception(vcpu, UD_VECTOR);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05005942 return 1;
5943 }
5944
Avi Kivity6aa8b732006-12-10 02:21:36 -08005945 error_code = 0;
Ryan Harper2e113842008-02-11 10:26:38 -06005946 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005947 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08005948
5949 /*
5950 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
5951 * MMIO, it is better to report an internal error.
5952 * See the comments in vmx_handle_exit.
5953 */
5954 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5955 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
5956 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5957 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
Radim Krčmář80f0e952015-04-02 21:11:05 +02005958 vcpu->run->internal.ndata = 3;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08005959 vcpu->run->internal.data[0] = vect_info;
5960 vcpu->run->internal.data[1] = intr_info;
Radim Krčmář80f0e952015-04-02 21:11:05 +02005961 vcpu->run->internal.data[2] = error_code;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08005962 return 0;
5963 }
5964
Avi Kivity6aa8b732006-12-10 02:21:36 -08005965 if (is_page_fault(intr_info)) {
5966 cr2 = vmcs_readl(EXIT_QUALIFICATION);
Wanpeng Li1261bfa2017-07-13 18:30:40 -07005967 /* EPT won't cause page fault directly */
5968 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
Paolo Bonzinid0006532017-08-11 18:36:43 +02005969 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005970 }
5971
Jan Kiszkad0bfb942008-12-15 13:52:10 +01005972 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02005973
5974 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
5975 return handle_rmode_exception(vcpu, ex_no, error_code);
5976
Jan Kiszka42dbaa52008-12-15 13:52:10 +01005977 switch (ex_no) {
Eric Northup54a20552015-11-03 18:03:53 +01005978 case AC_VECTOR:
5979 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
5980 return 1;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01005981 case DB_VECTOR:
5982 dr6 = vmcs_readl(EXIT_QUALIFICATION);
5983 if (!(vcpu->guest_debug &
5984 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
Jan Kiszka8246bf52014-01-04 18:47:17 +01005985 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03005986 vcpu->arch.dr6 |= dr6 | DR6_RTM;
Huw Daviesfd2a4452014-04-16 10:02:51 +01005987 if (!(dr6 & ~DR6_RESERVED)) /* icebp */
5988 skip_emulated_instruction(vcpu);
5989
Jan Kiszka42dbaa52008-12-15 13:52:10 +01005990 kvm_queue_exception(vcpu, DB_VECTOR);
5991 return 1;
5992 }
5993 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
5994 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
5995 /* fall through */
5996 case BP_VECTOR:
Jan Kiszkac573cd22010-02-23 17:47:53 +01005997 /*
5998 * Update instruction length as we may reinject #BP from
5999 * user space while in guest debugging mode. Reading it for
6000 * #DB as well causes no harm, it is not used in that case.
6001 */
6002 vmx->vcpu.arch.event_exit_inst_len =
6003 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006004 kvm_run->exit_reason = KVM_EXIT_DEBUG;
Avi Kivity0a434bb2011-04-28 15:59:33 +03006005 rip = kvm_rip_read(vcpu);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006006 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
6007 kvm_run->debug.arch.exception = ex_no;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006008 break;
6009 default:
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006010 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
6011 kvm_run->ex.exception = ex_no;
6012 kvm_run->ex.error_code = error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006013 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006014 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006015 return 0;
6016}
6017
Avi Kivity851ba692009-08-24 11:10:17 +03006018static int handle_external_interrupt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006019{
Avi Kivity1165f5f2007-04-19 17:27:43 +03006020 ++vcpu->stat.irq_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006021 return 1;
6022}
6023
Avi Kivity851ba692009-08-24 11:10:17 +03006024static int handle_triple_fault(struct kvm_vcpu *vcpu)
Avi Kivity988ad742007-02-12 00:54:36 -08006025{
Avi Kivity851ba692009-08-24 11:10:17 +03006026 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
Wanpeng Libbeac282017-08-09 22:33:12 -07006027 vcpu->mmio_needed = 0;
Avi Kivity988ad742007-02-12 00:54:36 -08006028 return 0;
6029}
Avi Kivity6aa8b732006-12-10 02:21:36 -08006030
Avi Kivity851ba692009-08-24 11:10:17 +03006031static int handle_io(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006032{
He, Qingbfdaab02007-09-12 14:18:28 +08006033 unsigned long exit_qualification;
Kyle Huey6affcbe2016-11-29 12:40:40 -08006034 int size, in, string, ret;
Avi Kivity039576c2007-03-20 12:46:50 +02006035 unsigned port;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006036
He, Qingbfdaab02007-09-12 14:18:28 +08006037 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity039576c2007-03-20 12:46:50 +02006038 string = (exit_qualification & 16) != 0;
Laurent Viviere70669a2007-08-05 10:36:40 +03006039 in = (exit_qualification & 8) != 0;
Laurent Viviere70669a2007-08-05 10:36:40 +03006040
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006041 ++vcpu->stat.io_exits;
6042
6043 if (string || in)
Andre Przywara51d8b662010-12-21 11:12:02 +01006044 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006045
6046 port = exit_qualification >> 16;
6047 size = (exit_qualification & 7) + 1;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02006048
Kyle Huey6affcbe2016-11-29 12:40:40 -08006049 ret = kvm_skip_emulated_instruction(vcpu);
6050
6051 /*
6052 * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
6053 * KVM_EXIT_DEBUG here.
6054 */
6055 return kvm_fast_pio_out(vcpu, size, port) && ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006056}
6057
Ingo Molnar102d8322007-02-19 14:37:47 +02006058static void
6059vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
6060{
6061 /*
6062 * Patch in the VMCALL instruction:
6063 */
6064 hypercall[0] = 0x0f;
6065 hypercall[1] = 0x01;
6066 hypercall[2] = 0xc1;
Ingo Molnar102d8322007-02-19 14:37:47 +02006067}
6068
Guo Chao0fa06072012-06-28 15:16:19 +08006069/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006070static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
6071{
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006072 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006073 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6074 unsigned long orig_val = val;
6075
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006076 /*
6077 * We get here when L2 changed cr0 in a way that did not change
6078 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006079 * but did change L0 shadowed bits. So we first calculate the
6080 * effective cr0 value that L1 would like to write into the
6081 * hardware. It consists of the L2-owned bits from the new
6082 * value combined with the L1-owned bits from L1's guest_cr0.
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006083 */
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006084 val = (val & ~vmcs12->cr0_guest_host_mask) |
6085 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
6086
David Matlack38991522016-11-29 18:14:08 -08006087 if (!nested_guest_cr0_valid(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006088 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006089
6090 if (kvm_set_cr0(vcpu, val))
6091 return 1;
6092 vmcs_writel(CR0_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006093 return 0;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006094 } else {
6095 if (to_vmx(vcpu)->nested.vmxon &&
David Matlack38991522016-11-29 18:14:08 -08006096 !nested_host_cr0_valid(vcpu, val))
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006097 return 1;
David Matlack38991522016-11-29 18:14:08 -08006098
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006099 return kvm_set_cr0(vcpu, val);
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006100 }
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006101}
6102
6103static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
6104{
6105 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006106 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6107 unsigned long orig_val = val;
6108
6109 /* analogously to handle_set_cr0 */
6110 val = (val & ~vmcs12->cr4_guest_host_mask) |
6111 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
6112 if (kvm_set_cr4(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006113 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01006114 vmcs_writel(CR4_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006115 return 0;
6116 } else
6117 return kvm_set_cr4(vcpu, val);
6118}
6119
Paolo Bonzini0367f202016-07-12 10:44:55 +02006120static int handle_desc(struct kvm_vcpu *vcpu)
6121{
6122 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
6123 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
6124}
6125
Avi Kivity851ba692009-08-24 11:10:17 +03006126static int handle_cr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006127{
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006128 unsigned long exit_qualification, val;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006129 int cr;
6130 int reg;
Avi Kivity49a9b072010-06-10 17:02:14 +03006131 int err;
Kyle Huey6affcbe2016-11-29 12:40:40 -08006132 int ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006133
He, Qingbfdaab02007-09-12 14:18:28 +08006134 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006135 cr = exit_qualification & 15;
6136 reg = (exit_qualification >> 8) & 15;
6137 switch ((exit_qualification >> 4) & 3) {
6138 case 0: /* mov to cr */
Nadav Amit1e32c072014-06-18 17:19:25 +03006139 val = kvm_register_readl(vcpu, reg);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006140 trace_kvm_cr_write(cr, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006141 switch (cr) {
6142 case 0:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006143 err = handle_set_cr0(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006144 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006145 case 3:
Avi Kivity23902182010-06-10 17:02:16 +03006146 err = kvm_set_cr3(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006147 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006148 case 4:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03006149 err = handle_set_cr4(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006150 return kvm_complete_insn_gp(vcpu, err);
Gleb Natapov0a5fff192009-04-21 17:45:06 +03006151 case 8: {
6152 u8 cr8_prev = kvm_get_cr8(vcpu);
Nadav Amit1e32c072014-06-18 17:19:25 +03006153 u8 cr8 = (u8)val;
Andre Przywaraeea1cff2010-12-21 11:12:00 +01006154 err = kvm_set_cr8(vcpu, cr8);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006155 ret = kvm_complete_insn_gp(vcpu, err);
Paolo Bonzini35754c92015-07-29 12:05:37 +02006156 if (lapic_in_kernel(vcpu))
Kyle Huey6affcbe2016-11-29 12:40:40 -08006157 return ret;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03006158 if (cr8_prev <= cr8)
Kyle Huey6affcbe2016-11-29 12:40:40 -08006159 return ret;
6160 /*
6161 * TODO: we might be squashing a
6162 * KVM_GUESTDBG_SINGLESTEP-triggered
6163 * KVM_EXIT_DEBUG here.
6164 */
Avi Kivity851ba692009-08-24 11:10:17 +03006165 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03006166 return 0;
6167 }
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +02006168 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006169 break;
Anthony Liguori25c4c272007-04-27 09:29:21 +03006170 case 2: /* clts */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006171 WARN_ONCE(1, "Guest should always own CR0.TS");
6172 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
Avi Kivity4d4ec082009-12-29 18:07:30 +02006173 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
Kyle Huey6affcbe2016-11-29 12:40:40 -08006174 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006175 case 1: /*mov from cr*/
6176 switch (cr) {
6177 case 3:
Avi Kivity9f8fe502010-12-05 17:30:00 +02006178 val = kvm_read_cr3(vcpu);
6179 kvm_register_write(vcpu, reg, val);
6180 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006181 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006182 case 8:
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006183 val = kvm_get_cr8(vcpu);
6184 kvm_register_write(vcpu, reg, val);
6185 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006186 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006187 }
6188 break;
6189 case 3: /* lmsw */
Avi Kivitya1f83a72009-12-29 17:33:58 +02006190 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Avi Kivity4d4ec082009-12-29 18:07:30 +02006191 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
Avi Kivitya1f83a72009-12-29 17:33:58 +02006192 kvm_lmsw(vcpu, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006193
Kyle Huey6affcbe2016-11-29 12:40:40 -08006194 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006195 default:
6196 break;
6197 }
Avi Kivity851ba692009-08-24 11:10:17 +03006198 vcpu->run->exit_reason = 0;
Christoffer Dalla737f252012-06-03 21:17:48 +03006199 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
Avi Kivity6aa8b732006-12-10 02:21:36 -08006200 (int)(exit_qualification >> 4) & 3, cr);
6201 return 0;
6202}
6203
Avi Kivity851ba692009-08-24 11:10:17 +03006204static int handle_dr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006205{
He, Qingbfdaab02007-09-12 14:18:28 +08006206 unsigned long exit_qualification;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03006207 int dr, dr7, reg;
6208
6209 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6210 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
6211
6212 /* First, if DR does not exist, trigger UD */
6213 if (!kvm_require_dr(vcpu, dr))
6214 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006215
Jan Kiszkaf2483412010-01-20 18:20:20 +01006216 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
Avi Kivity0a79b002009-09-01 12:03:25 +03006217 if (!kvm_require_cpl(vcpu, 0))
6218 return 1;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03006219 dr7 = vmcs_readl(GUEST_DR7);
6220 if (dr7 & DR7_GD) {
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006221 /*
6222 * As the vm-exit takes precedence over the debug trap, we
6223 * need to emulate the latter, either for the host or the
6224 * guest debugging itself.
6225 */
6226 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
Avi Kivity851ba692009-08-24 11:10:17 +03006227 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03006228 vcpu->run->debug.arch.dr7 = dr7;
Nadav Amit82b32772014-11-02 11:54:45 +02006229 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03006230 vcpu->run->debug.arch.exception = DB_VECTOR;
6231 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006232 return 0;
6233 } else {
Nadav Amit7305eb52014-11-02 11:54:44 +02006234 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03006235 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006236 kvm_queue_exception(vcpu, DB_VECTOR);
6237 return 1;
6238 }
6239 }
6240
Paolo Bonzini81908bf2014-02-21 10:32:27 +01006241 if (vcpu->guest_debug == 0) {
Paolo Bonzini8f223722016-02-26 12:09:49 +01006242 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
6243 CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01006244
6245 /*
6246 * No more DR vmexits; force a reload of the debug registers
6247 * and reenter on this instruction. The next vmexit will
6248 * retrieve the full state of the debug registers.
6249 */
6250 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
6251 return 1;
6252 }
6253
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006254 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
6255 if (exit_qualification & TYPE_MOV_FROM_DR) {
Gleb Natapov020df072010-04-13 10:05:23 +03006256 unsigned long val;
Jan Kiszka4c4d5632013-12-18 19:16:24 +01006257
6258 if (kvm_get_dr(vcpu, dr, &val))
6259 return 1;
6260 kvm_register_write(vcpu, reg, val);
Gleb Natapov020df072010-04-13 10:05:23 +03006261 } else
Nadav Amit57773922014-06-18 17:19:23 +03006262 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
Jan Kiszka4c4d5632013-12-18 19:16:24 +01006263 return 1;
6264
Kyle Huey6affcbe2016-11-29 12:40:40 -08006265 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006266}
6267
Jan Kiszka73aaf249e2014-01-04 18:47:16 +01006268static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
6269{
6270 return vcpu->arch.dr6;
6271}
6272
6273static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
6274{
6275}
6276
Paolo Bonzini81908bf2014-02-21 10:32:27 +01006277static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
6278{
Paolo Bonzini81908bf2014-02-21 10:32:27 +01006279 get_debugreg(vcpu->arch.db[0], 0);
6280 get_debugreg(vcpu->arch.db[1], 1);
6281 get_debugreg(vcpu->arch.db[2], 2);
6282 get_debugreg(vcpu->arch.db[3], 3);
6283 get_debugreg(vcpu->arch.dr6, 6);
6284 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
6285
6286 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
Paolo Bonzini8f223722016-02-26 12:09:49 +01006287 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01006288}
6289
Gleb Natapov020df072010-04-13 10:05:23 +03006290static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
6291{
6292 vmcs_writel(GUEST_DR7, val);
6293}
6294
Avi Kivity851ba692009-08-24 11:10:17 +03006295static int handle_cpuid(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006296{
Kyle Huey6a908b62016-11-29 12:40:37 -08006297 return kvm_emulate_cpuid(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006298}
6299
Avi Kivity851ba692009-08-24 11:10:17 +03006300static int handle_rdmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006301{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08006302 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
Paolo Bonzini609e36d2015-04-08 15:30:38 +02006303 struct msr_data msr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006304
Paolo Bonzini609e36d2015-04-08 15:30:38 +02006305 msr_info.index = ecx;
6306 msr_info.host_initiated = false;
6307 if (vmx_get_msr(vcpu, &msr_info)) {
Avi Kivity59200272010-01-25 19:47:02 +02006308 trace_kvm_msr_read_ex(ecx);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02006309 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006310 return 1;
6311 }
6312
Paolo Bonzini609e36d2015-04-08 15:30:38 +02006313 trace_kvm_msr_read(ecx, msr_info.data);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04006314
Avi Kivity6aa8b732006-12-10 02:21:36 -08006315 /* FIXME: handling of bits 32:63 of rax, rdx */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02006316 vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
6317 vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
Kyle Huey6affcbe2016-11-29 12:40:40 -08006318 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006319}
6320
Avi Kivity851ba692009-08-24 11:10:17 +03006321static int handle_wrmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006322{
Will Auld8fe8ab42012-11-29 12:42:12 -08006323 struct msr_data msr;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08006324 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
6325 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
6326 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006327
Will Auld8fe8ab42012-11-29 12:42:12 -08006328 msr.data = data;
6329 msr.index = ecx;
6330 msr.host_initiated = false;
Nadav Amit854e8bb2014-09-16 03:24:05 +03006331 if (kvm_set_msr(vcpu, &msr) != 0) {
Avi Kivity59200272010-01-25 19:47:02 +02006332 trace_kvm_msr_write_ex(ecx, data);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02006333 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006334 return 1;
6335 }
6336
Avi Kivity59200272010-01-25 19:47:02 +02006337 trace_kvm_msr_write(ecx, data);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006338 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006339}
6340
Avi Kivity851ba692009-08-24 11:10:17 +03006341static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08006342{
Paolo Bonzinieb90f342016-12-18 14:02:21 +01006343 kvm_apic_update_ppr(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08006344 return 1;
6345}
6346
Avi Kivity851ba692009-08-24 11:10:17 +03006347static int handle_interrupt_window(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006348{
Paolo Bonzini47c01522016-12-19 11:44:07 +01006349 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
6350 CPU_BASED_VIRTUAL_INTR_PENDING);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04006351
Avi Kivity3842d132010-07-27 12:30:24 +03006352 kvm_make_request(KVM_REQ_EVENT, vcpu);
6353
Jan Kiszkaa26bf122008-09-26 09:30:45 +02006354 ++vcpu->stat.irq_window_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006355 return 1;
6356}
6357
Avi Kivity851ba692009-08-24 11:10:17 +03006358static int handle_halt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006359{
Avi Kivityd3bef152007-06-05 15:53:05 +03006360 return kvm_emulate_halt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006361}
6362
Avi Kivity851ba692009-08-24 11:10:17 +03006363static int handle_vmcall(struct kvm_vcpu *vcpu)
Ingo Molnarc21415e2007-02-19 14:37:47 +02006364{
Andrey Smetanin0d9c0552016-02-11 16:44:59 +03006365 return kvm_emulate_hypercall(vcpu);
Ingo Molnarc21415e2007-02-19 14:37:47 +02006366}
6367
Gleb Natapovec25d5e2010-11-01 15:35:01 +02006368static int handle_invd(struct kvm_vcpu *vcpu)
6369{
Andre Przywara51d8b662010-12-21 11:12:02 +01006370 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovec25d5e2010-11-01 15:35:01 +02006371}
6372
Avi Kivity851ba692009-08-24 11:10:17 +03006373static int handle_invlpg(struct kvm_vcpu *vcpu)
Marcelo Tosattia7052892008-09-23 13:18:35 -03006374{
Sheng Yangf9c617f2009-03-25 10:08:52 +08006375 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Marcelo Tosattia7052892008-09-23 13:18:35 -03006376
6377 kvm_mmu_invlpg(vcpu, exit_qualification);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006378 return kvm_skip_emulated_instruction(vcpu);
Marcelo Tosattia7052892008-09-23 13:18:35 -03006379}
6380
Avi Kivityfee84b02011-11-10 14:57:25 +02006381static int handle_rdpmc(struct kvm_vcpu *vcpu)
6382{
6383 int err;
6384
6385 err = kvm_rdpmc(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006386 return kvm_complete_insn_gp(vcpu, err);
Avi Kivityfee84b02011-11-10 14:57:25 +02006387}
6388
Avi Kivity851ba692009-08-24 11:10:17 +03006389static int handle_wbinvd(struct kvm_vcpu *vcpu)
Eddie Donge5edaa02007-11-11 12:28:35 +02006390{
Kyle Huey6affcbe2016-11-29 12:40:40 -08006391 return kvm_emulate_wbinvd(vcpu);
Eddie Donge5edaa02007-11-11 12:28:35 +02006392}
6393
Dexuan Cui2acf9232010-06-10 11:27:12 +08006394static int handle_xsetbv(struct kvm_vcpu *vcpu)
6395{
6396 u64 new_bv = kvm_read_edx_eax(vcpu);
6397 u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
6398
6399 if (kvm_set_xcr(vcpu, index, new_bv) == 0)
Kyle Huey6affcbe2016-11-29 12:40:40 -08006400 return kvm_skip_emulated_instruction(vcpu);
Dexuan Cui2acf9232010-06-10 11:27:12 +08006401 return 1;
6402}
6403
Wanpeng Lif53cd632014-12-02 19:14:58 +08006404static int handle_xsaves(struct kvm_vcpu *vcpu)
6405{
Kyle Huey6affcbe2016-11-29 12:40:40 -08006406 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08006407 WARN(1, "this should never happen\n");
6408 return 1;
6409}
6410
6411static int handle_xrstors(struct kvm_vcpu *vcpu)
6412{
Kyle Huey6affcbe2016-11-29 12:40:40 -08006413 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08006414 WARN(1, "this should never happen\n");
6415 return 1;
6416}
6417
Avi Kivity851ba692009-08-24 11:10:17 +03006418static int handle_apic_access(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08006419{
Kevin Tian58fbbf22011-08-30 13:56:17 +03006420 if (likely(fasteoi)) {
6421 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6422 int access_type, offset;
6423
6424 access_type = exit_qualification & APIC_ACCESS_TYPE;
6425 offset = exit_qualification & APIC_ACCESS_OFFSET;
6426 /*
6427 * Sane guest uses MOV to write EOI, with written value
6428 * not cared. So make a short-circuit here by avoiding
6429 * heavy instruction emulation.
6430 */
6431 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
6432 (offset == APIC_EOI)) {
6433 kvm_lapic_set_eoi(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006434 return kvm_skip_emulated_instruction(vcpu);
Kevin Tian58fbbf22011-08-30 13:56:17 +03006435 }
6436 }
Andre Przywara51d8b662010-12-21 11:12:02 +01006437 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
Sheng Yangf78e0e22007-10-29 09:40:42 +08006438}
6439
Yang Zhangc7c9c562013-01-25 10:18:51 +08006440static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
6441{
6442 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6443 int vector = exit_qualification & 0xff;
6444
6445 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
6446 kvm_apic_set_eoi_accelerated(vcpu, vector);
6447 return 1;
6448}
6449
Yang Zhang83d4c282013-01-25 10:18:49 +08006450static int handle_apic_write(struct kvm_vcpu *vcpu)
6451{
6452 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6453 u32 offset = exit_qualification & 0xfff;
6454
6455 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
6456 kvm_apic_write_nodecode(vcpu, offset);
6457 return 1;
6458}
6459
Avi Kivity851ba692009-08-24 11:10:17 +03006460static int handle_task_switch(struct kvm_vcpu *vcpu)
Izik Eidus37817f22008-03-24 23:14:53 +02006461{
Jan Kiszka60637aa2008-09-26 09:30:47 +02006462 struct vcpu_vmx *vmx = to_vmx(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02006463 unsigned long exit_qualification;
Jan Kiszkae269fb22010-04-14 15:51:09 +02006464 bool has_error_code = false;
6465 u32 error_code = 0;
Izik Eidus37817f22008-03-24 23:14:53 +02006466 u16 tss_selector;
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01006467 int reason, type, idt_v, idt_index;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006468
6469 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01006470 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006471 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
Izik Eidus37817f22008-03-24 23:14:53 +02006472
6473 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6474
6475 reason = (u32)exit_qualification >> 30;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006476 if (reason == TASK_SWITCH_GATE && idt_v) {
6477 switch (type) {
6478 case INTR_TYPE_NMI_INTR:
6479 vcpu->arch.nmi_injected = false;
Avi Kivity654f06f2011-03-23 15:02:47 +02006480 vmx_set_nmi_mask(vcpu, true);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006481 break;
6482 case INTR_TYPE_EXT_INTR:
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006483 case INTR_TYPE_SOFT_INTR:
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006484 kvm_clear_interrupt_queue(vcpu);
6485 break;
6486 case INTR_TYPE_HARD_EXCEPTION:
Jan Kiszkae269fb22010-04-14 15:51:09 +02006487 if (vmx->idt_vectoring_info &
6488 VECTORING_INFO_DELIVER_CODE_MASK) {
6489 has_error_code = true;
6490 error_code =
6491 vmcs_read32(IDT_VECTORING_ERROR_CODE);
6492 }
6493 /* fall through */
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006494 case INTR_TYPE_SOFT_EXCEPTION:
6495 kvm_clear_exception_queue(vcpu);
6496 break;
6497 default:
6498 break;
6499 }
Jan Kiszka60637aa2008-09-26 09:30:47 +02006500 }
Izik Eidus37817f22008-03-24 23:14:53 +02006501 tss_selector = exit_qualification;
6502
Gleb Natapov64a7ec02009-03-30 16:03:29 +03006503 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
6504 type != INTR_TYPE_EXT_INTR &&
6505 type != INTR_TYPE_NMI_INTR))
6506 skip_emulated_instruction(vcpu);
6507
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01006508 if (kvm_task_switch(vcpu, tss_selector,
6509 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
6510 has_error_code, error_code) == EMULATE_FAIL) {
Gleb Natapovacb54512010-04-15 21:03:50 +03006511 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6512 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6513 vcpu->run->internal.ndata = 0;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006514 return 0;
Gleb Natapovacb54512010-04-15 21:03:50 +03006515 }
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006516
Jan Kiszka42dbaa52008-12-15 13:52:10 +01006517 /*
6518 * TODO: What about debug traps on tss switch?
6519 * Are we supposed to inject them and update dr6?
6520 */
6521
6522 return 1;
Izik Eidus37817f22008-03-24 23:14:53 +02006523}
6524
Avi Kivity851ba692009-08-24 11:10:17 +03006525static int handle_ept_violation(struct kvm_vcpu *vcpu)
Sheng Yang14394422008-04-28 12:24:45 +08006526{
Sheng Yangf9c617f2009-03-25 10:08:52 +08006527 unsigned long exit_qualification;
Sheng Yang14394422008-04-28 12:24:45 +08006528 gpa_t gpa;
Paolo Bonzinieebed242016-11-28 14:39:58 +01006529 u64 error_code;
Sheng Yang14394422008-04-28 12:24:45 +08006530
Sheng Yangf9c617f2009-03-25 10:08:52 +08006531 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Sheng Yang14394422008-04-28 12:24:45 +08006532
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03006533 /*
6534 * EPT violation happened while executing iret from NMI,
6535 * "blocked by NMI" bit has to be set before next VM entry.
6536 * There are errata that may cause this bit to not be set:
6537 * AAK134, BY25.
6538 */
Gleb Natapovbcd1c292013-09-25 10:58:22 +03006539 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006540 enable_vnmi &&
Gleb Natapovbcd1c292013-09-25 10:58:22 +03006541 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03006542 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
6543
Sheng Yang14394422008-04-28 12:24:45 +08006544 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006545 trace_kvm_page_fault(gpa, exit_qualification);
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08006546
Junaid Shahid27959a42016-12-06 16:46:10 -08006547 /* Is it a read fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08006548 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
Junaid Shahid27959a42016-12-06 16:46:10 -08006549 ? PFERR_USER_MASK : 0;
6550 /* Is it a write fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08006551 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
Junaid Shahid27959a42016-12-06 16:46:10 -08006552 ? PFERR_WRITE_MASK : 0;
6553 /* Is it a fetch fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08006554 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
Junaid Shahid27959a42016-12-06 16:46:10 -08006555 ? PFERR_FETCH_MASK : 0;
6556 /* ept page table entry is present? */
6557 error_code |= (exit_qualification &
6558 (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
6559 EPT_VIOLATION_EXECUTABLE))
6560 ? PFERR_PRESENT_MASK : 0;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08006561
Paolo Bonzinieebed242016-11-28 14:39:58 +01006562 error_code |= (exit_qualification & 0x100) != 0 ?
6563 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
Yang Zhang25d92082013-08-06 12:00:32 +03006564
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08006565 vcpu->arch.exit_qualification = exit_qualification;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08006566 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
Sheng Yang14394422008-04-28 12:24:45 +08006567}
6568
Avi Kivity851ba692009-08-24 11:10:17 +03006569static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006570{
Xiao Guangrongf735d4a2015-08-05 12:04:27 +08006571 int ret;
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006572 gpa_t gpa;
6573
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02006574 /*
6575 * A nested guest cannot optimize MMIO vmexits, because we have an
6576 * nGPA here instead of the required GPA.
6577 */
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006578 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02006579 if (!is_guest_mode(vcpu) &&
6580 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
Jason Wang931c33b2015-09-15 14:41:58 +08006581 trace_kvm_fast_mmio(gpa);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006582 return kvm_skip_emulated_instruction(vcpu);
Michael S. Tsirkin68c3b4d2014-03-31 21:50:44 +03006583 }
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006584
Paolo Bonzinie08d26f2017-08-17 18:36:56 +02006585 ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
6586 if (ret >= 0)
6587 return ret;
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006588
6589 /* It is the real ept misconfig */
Xiao Guangrongf735d4a2015-08-05 12:04:27 +08006590 WARN_ON(1);
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006591
Avi Kivity851ba692009-08-24 11:10:17 +03006592 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
6593 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
Marcelo Tosatti68f89402009-06-11 12:07:43 -03006594
6595 return 0;
6596}
6597
Avi Kivity851ba692009-08-24 11:10:17 +03006598static int handle_nmi_window(struct kvm_vcpu *vcpu)
Sheng Yangf08864b2008-05-15 18:23:25 +08006599{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006600 WARN_ON_ONCE(!enable_vnmi);
Paolo Bonzini47c01522016-12-19 11:44:07 +01006601 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
6602 CPU_BASED_VIRTUAL_NMI_PENDING);
Sheng Yangf08864b2008-05-15 18:23:25 +08006603 ++vcpu->stat.nmi_window_exits;
Avi Kivity3842d132010-07-27 12:30:24 +03006604 kvm_make_request(KVM_REQ_EVENT, vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08006605
6606 return 1;
6607}
6608
Mohammed Gamal80ced182009-09-01 12:48:18 +02006609static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006610{
Avi Kivity8b3079a2009-01-05 12:10:54 +02006611 struct vcpu_vmx *vmx = to_vmx(vcpu);
6612 enum emulation_result err = EMULATE_DONE;
Mohammed Gamal80ced182009-09-01 12:48:18 +02006613 int ret = 1;
Avi Kivity49e9d552010-09-19 14:34:08 +02006614 u32 cpu_exec_ctrl;
6615 bool intr_window_requested;
Avi Kivityb8405c12012-06-07 17:08:48 +03006616 unsigned count = 130;
Avi Kivity49e9d552010-09-19 14:34:08 +02006617
6618 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6619 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006620
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01006621 while (vmx->emulation_required && count-- != 0) {
Avi Kivitybdea48e2012-06-10 18:07:57 +03006622 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
Avi Kivity49e9d552010-09-19 14:34:08 +02006623 return handle_interrupt_window(&vmx->vcpu);
6624
Radim Krčmář72875d82017-04-26 22:32:19 +02006625 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
Avi Kivityde87dcdd2012-06-12 20:21:38 +03006626 return 1;
6627
Liran Alon9b8ae632017-11-05 16:56:34 +02006628 err = emulate_instruction(vcpu, 0);
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006629
Paolo Bonziniac0a48c2013-06-25 18:24:41 +02006630 if (err == EMULATE_USER_EXIT) {
Paolo Bonzini94452b92013-08-27 15:41:42 +02006631 ++vcpu->stat.mmio_exits;
Mohammed Gamal80ced182009-09-01 12:48:18 +02006632 ret = 0;
6633 goto out;
6634 }
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01006635
Avi Kivityde5f70e2012-06-12 20:22:28 +03006636 if (err != EMULATE_DONE) {
6637 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6638 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6639 vcpu->run->internal.ndata = 0;
Gleb Natapov6d77dbf2010-05-10 11:16:56 +03006640 return 0;
Avi Kivityde5f70e2012-06-12 20:22:28 +03006641 }
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006642
Gleb Natapov8d76c492013-05-08 18:38:44 +03006643 if (vcpu->arch.halt_request) {
6644 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06006645 ret = kvm_vcpu_halt(vcpu);
Gleb Natapov8d76c492013-05-08 18:38:44 +03006646 goto out;
6647 }
6648
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006649 if (signal_pending(current))
Mohammed Gamal80ced182009-09-01 12:48:18 +02006650 goto out;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006651 if (need_resched())
6652 schedule();
6653 }
6654
Mohammed Gamal80ced182009-09-01 12:48:18 +02006655out:
6656 return ret;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03006657}
6658
Radim Krčmářb4a2d312014-08-21 18:08:08 +02006659static int __grow_ple_window(int val)
6660{
6661 if (ple_window_grow < 1)
6662 return ple_window;
6663
6664 val = min(val, ple_window_actual_max);
6665
6666 if (ple_window_grow < ple_window)
6667 val *= ple_window_grow;
6668 else
6669 val += ple_window_grow;
6670
6671 return val;
6672}
6673
6674static int __shrink_ple_window(int val, int modifier, int minimum)
6675{
6676 if (modifier < 1)
6677 return ple_window;
6678
6679 if (modifier < ple_window)
6680 val /= modifier;
6681 else
6682 val -= modifier;
6683
6684 return max(val, minimum);
6685}
6686
6687static void grow_ple_window(struct kvm_vcpu *vcpu)
6688{
6689 struct vcpu_vmx *vmx = to_vmx(vcpu);
6690 int old = vmx->ple_window;
6691
6692 vmx->ple_window = __grow_ple_window(old);
6693
6694 if (vmx->ple_window != old)
6695 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02006696
6697 trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02006698}
6699
6700static void shrink_ple_window(struct kvm_vcpu *vcpu)
6701{
6702 struct vcpu_vmx *vmx = to_vmx(vcpu);
6703 int old = vmx->ple_window;
6704
6705 vmx->ple_window = __shrink_ple_window(old,
6706 ple_window_shrink, ple_window);
6707
6708 if (vmx->ple_window != old)
6709 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02006710
6711 trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02006712}
6713
6714/*
6715 * ple_window_actual_max is computed to be one grow_ple_window() below
6716 * ple_window_max. (See __grow_ple_window for the reason.)
6717 * This prevents overflows, because ple_window_max is int.
6718 * ple_window_max effectively rounded down to a multiple of ple_window_grow in
6719 * this process.
6720 * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
6721 */
6722static void update_ple_window_actual_max(void)
6723{
6724 ple_window_actual_max =
6725 __shrink_ple_window(max(ple_window_max, ple_window),
6726 ple_window_grow, INT_MIN);
6727}
6728
Feng Wubf9f6ac2015-09-18 22:29:55 +08006729/*
6730 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
6731 */
6732static void wakeup_handler(void)
6733{
6734 struct kvm_vcpu *vcpu;
6735 int cpu = smp_processor_id();
6736
6737 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6738 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
6739 blocked_vcpu_list) {
6740 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6741
6742 if (pi_test_on(pi_desc) == 1)
6743 kvm_vcpu_kick(vcpu);
6744 }
6745 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6746}
6747
Junaid Shahidf160c7b2016-12-06 16:46:16 -08006748void vmx_enable_tdp(void)
6749{
6750 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
6751 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
6752 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
6753 0ull, VMX_EPT_EXECUTABLE_MASK,
6754 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
Tom Lendackyd0ec49d2017-07-17 16:10:27 -05006755 VMX_EPT_RWX_MASK, 0ull);
Junaid Shahidf160c7b2016-12-06 16:46:16 -08006756
6757 ept_set_mmio_spte_mask();
6758 kvm_enable_tdp();
6759}
6760
Tiejun Chenf2c76482014-10-28 10:14:47 +08006761static __init int hardware_setup(void)
6762{
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006763 int r = -ENOMEM, i, msr;
6764
6765 rdmsrl_safe(MSR_EFER, &host_efer);
6766
6767 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
6768 kvm_define_shared_msr(i, vmx_msr_index[i]);
6769
Radim Krčmář23611332016-09-29 22:41:33 +02006770 for (i = 0; i < VMX_BITMAP_NR; i++) {
6771 vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
6772 if (!vmx_bitmap[i])
6773 goto out;
6774 }
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006775
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006776 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
6777 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
6778
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006779 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
6780 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
6781
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006782 if (setup_vmcs_config(&vmcs_config) < 0) {
6783 r = -EIO;
Radim Krčmář23611332016-09-29 22:41:33 +02006784 goto out;
Tiejun Chenbaa03522014-12-23 16:21:11 +08006785 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08006786
6787 if (boot_cpu_has(X86_FEATURE_NX))
6788 kvm_enable_efer_bits(EFER_NX);
6789
Wanpeng Li08d839c2017-03-23 05:30:08 -07006790 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
6791 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
Tiejun Chenf2c76482014-10-28 10:14:47 +08006792 enable_vpid = 0;
Wanpeng Li08d839c2017-03-23 05:30:08 -07006793
Tiejun Chenf2c76482014-10-28 10:14:47 +08006794 if (!cpu_has_vmx_shadow_vmcs())
6795 enable_shadow_vmcs = 0;
6796 if (enable_shadow_vmcs)
6797 init_vmcs_shadow_fields();
6798
6799 if (!cpu_has_vmx_ept() ||
David Hildenbrand42aa53b2017-08-10 23:15:29 +02006800 !cpu_has_vmx_ept_4levels() ||
David Hildenbrandf5f51582017-08-24 20:51:30 +02006801 !cpu_has_vmx_ept_mt_wb() ||
Wanpeng Li8ad81822017-10-09 15:51:53 -07006802 !cpu_has_vmx_invept_global())
Tiejun Chenf2c76482014-10-28 10:14:47 +08006803 enable_ept = 0;
Tiejun Chenf2c76482014-10-28 10:14:47 +08006804
Wanpeng Lifce6ac42017-05-11 02:58:56 -07006805 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08006806 enable_ept_ad_bits = 0;
6807
Wanpeng Li8ad81822017-10-09 15:51:53 -07006808 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08006809 enable_unrestricted_guest = 0;
6810
Paolo Bonziniad15a292015-01-30 16:18:49 +01006811 if (!cpu_has_vmx_flexpriority())
Tiejun Chenf2c76482014-10-28 10:14:47 +08006812 flexpriority_enabled = 0;
6813
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006814 if (!cpu_has_virtual_nmis())
6815 enable_vnmi = 0;
6816
Paolo Bonziniad15a292015-01-30 16:18:49 +01006817 /*
6818 * set_apic_access_page_addr() is used to reload apic access
6819 * page upon invalidation. No need to do anything if not
6820 * using the APIC_ACCESS_ADDR VMCS field.
6821 */
6822 if (!flexpriority_enabled)
Tiejun Chenf2c76482014-10-28 10:14:47 +08006823 kvm_x86_ops->set_apic_access_page_addr = NULL;
Tiejun Chenf2c76482014-10-28 10:14:47 +08006824
6825 if (!cpu_has_vmx_tpr_shadow())
6826 kvm_x86_ops->update_cr8_intercept = NULL;
6827
6828 if (enable_ept && !cpu_has_vmx_ept_2m_page())
6829 kvm_disable_largepages();
6830
Wanpeng Li0f107682017-09-28 18:06:24 -07006831 if (!cpu_has_vmx_ple()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08006832 ple_gap = 0;
Wanpeng Li0f107682017-09-28 18:06:24 -07006833 ple_window = 0;
6834 ple_window_grow = 0;
6835 ple_window_max = 0;
6836 ple_window_shrink = 0;
6837 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08006838
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01006839 if (!cpu_has_vmx_apicv()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08006840 enable_apicv = 0;
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01006841 kvm_x86_ops->sync_pir_to_irr = NULL;
6842 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08006843
Haozhong Zhang64903d62015-10-20 15:39:09 +08006844 if (cpu_has_vmx_tsc_scaling()) {
6845 kvm_has_tsc_control = true;
6846 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
6847 kvm_tsc_scaling_ratio_frac_bits = 48;
6848 }
6849
Tiejun Chenbaa03522014-12-23 16:21:11 +08006850 vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
6851 vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
6852 vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
6853 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
6854 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
6855 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
Tiejun Chenbaa03522014-12-23 16:21:11 +08006856
Wanpeng Lic63e4562016-09-23 19:17:16 +08006857 memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
6858 vmx_msr_bitmap_legacy, PAGE_SIZE);
6859 memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
6860 vmx_msr_bitmap_longmode, PAGE_SIZE);
Tiejun Chenbaa03522014-12-23 16:21:11 +08006861 memcpy(vmx_msr_bitmap_legacy_x2apic,
6862 vmx_msr_bitmap_legacy, PAGE_SIZE);
6863 memcpy(vmx_msr_bitmap_longmode_x2apic,
6864 vmx_msr_bitmap_longmode, PAGE_SIZE);
6865
Wanpeng Li04bb92e2015-09-16 19:31:11 +08006866 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
6867
Radim Krčmář40d83382016-09-29 22:41:31 +02006868 for (msr = 0x800; msr <= 0x8ff; msr++) {
6869 if (msr == 0x839 /* TMCCT */)
6870 continue;
Radim Krčmář2e69f862016-09-29 22:41:32 +02006871 vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
Radim Krčmář40d83382016-09-29 22:41:31 +02006872 }
Tiejun Chenbaa03522014-12-23 16:21:11 +08006873
Wanpeng Lif6e90f92016-09-22 07:43:25 +08006874 /*
Radim Krčmář2e69f862016-09-29 22:41:32 +02006875 * TPR reads and writes can be virtualized even if virtual interrupt
6876 * delivery is not in use.
Wanpeng Lif6e90f92016-09-22 07:43:25 +08006877 */
Radim Krčmář2e69f862016-09-29 22:41:32 +02006878 vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
6879 vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
6880
Roman Kagan3ce424e2016-05-18 17:48:20 +03006881 /* EOI */
Radim Krčmář2e69f862016-09-29 22:41:32 +02006882 vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
Roman Kagan3ce424e2016-05-18 17:48:20 +03006883 /* SELF-IPI */
Radim Krčmář2e69f862016-09-29 22:41:32 +02006884 vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
Tiejun Chenbaa03522014-12-23 16:21:11 +08006885
Junaid Shahidf160c7b2016-12-06 16:46:16 -08006886 if (enable_ept)
6887 vmx_enable_tdp();
6888 else
Tiejun Chenbaa03522014-12-23 16:21:11 +08006889 kvm_disable_tdp();
6890
6891 update_ple_window_actual_max();
6892
Kai Huang843e4332015-01-28 10:54:28 +08006893 /*
6894 * Only enable PML when hardware supports PML feature, and both EPT
6895 * and EPT A/D bit features are enabled -- PML depends on them to work.
6896 */
6897 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
6898 enable_pml = 0;
6899
6900 if (!enable_pml) {
6901 kvm_x86_ops->slot_enable_log_dirty = NULL;
6902 kvm_x86_ops->slot_disable_log_dirty = NULL;
6903 kvm_x86_ops->flush_log_dirty = NULL;
6904 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
6905 }
6906
Yunhong Jiang64672c92016-06-13 14:19:59 -07006907 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
6908 u64 vmx_msr;
6909
6910 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
6911 cpu_preemption_timer_multi =
6912 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
6913 } else {
6914 kvm_x86_ops->set_hv_timer = NULL;
6915 kvm_x86_ops->cancel_hv_timer = NULL;
6916 }
6917
Feng Wubf9f6ac2015-09-18 22:29:55 +08006918 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
6919
Ashok Rajc45dcc72016-06-22 14:59:56 +08006920 kvm_mce_cap_supported |= MCG_LMCE_P;
6921
Tiejun Chenf2c76482014-10-28 10:14:47 +08006922 return alloc_kvm_area();
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006923
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006924out:
Radim Krčmář23611332016-09-29 22:41:33 +02006925 for (i = 0; i < VMX_BITMAP_NR; i++)
6926 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006927
6928 return r;
Tiejun Chenf2c76482014-10-28 10:14:47 +08006929}
6930
6931static __exit void hardware_unsetup(void)
6932{
Radim Krčmář23611332016-09-29 22:41:33 +02006933 int i;
6934
6935 for (i = 0; i < VMX_BITMAP_NR; i++)
6936 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08006937
Tiejun Chenf2c76482014-10-28 10:14:47 +08006938 free_kvm_area();
6939}
6940
Avi Kivity6aa8b732006-12-10 02:21:36 -08006941/*
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006942 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
6943 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
6944 */
Marcelo Tosatti9fb41ba2009-10-12 19:37:31 -03006945static int handle_pause(struct kvm_vcpu *vcpu)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006946{
Radim Krčmářb4a2d312014-08-21 18:08:08 +02006947 if (ple_gap)
6948 grow_ple_window(vcpu);
6949
Longpeng(Mike)de63ad42017-08-08 12:05:33 +08006950 /*
6951 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
6952 * VM-execution control is ignored if CPL > 0. OTOH, KVM
6953 * never set PAUSE_EXITING and just set PLE if supported,
6954 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
6955 */
6956 kvm_vcpu_on_spin(vcpu, true);
Kyle Huey6affcbe2016-11-29 12:40:40 -08006957 return kvm_skip_emulated_instruction(vcpu);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006958}
6959
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04006960static int handle_nop(struct kvm_vcpu *vcpu)
Sheng Yang59708672009-12-15 13:29:54 +08006961{
Kyle Huey6affcbe2016-11-29 12:40:40 -08006962 return kvm_skip_emulated_instruction(vcpu);
Sheng Yang59708672009-12-15 13:29:54 +08006963}
6964
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04006965static int handle_mwait(struct kvm_vcpu *vcpu)
6966{
6967 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
6968 return handle_nop(vcpu);
6969}
6970
Jim Mattson45ec3682017-08-23 16:32:04 -07006971static int handle_invalid_op(struct kvm_vcpu *vcpu)
6972{
6973 kvm_queue_exception(vcpu, UD_VECTOR);
6974 return 1;
6975}
6976
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03006977static int handle_monitor_trap(struct kvm_vcpu *vcpu)
6978{
6979 return 1;
6980}
6981
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04006982static int handle_monitor(struct kvm_vcpu *vcpu)
6983{
6984 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
6985 return handle_nop(vcpu);
6986}
6987
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006988/*
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08006989 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
6990 * set the success or error code of an emulated VMX instruction, as specified
6991 * by Vol 2B, VMX Instruction Reference, "Conventions".
6992 */
6993static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
6994{
6995 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
6996 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
6997 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
6998}
6999
7000static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
7001{
7002 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
7003 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
7004 X86_EFLAGS_SF | X86_EFLAGS_OF))
7005 | X86_EFLAGS_CF);
7006}
7007
Abel Gordon145c28d2013-04-18 14:36:55 +03007008static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08007009 u32 vm_instruction_error)
7010{
7011 if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
7012 /*
7013 * failValid writes the error number to the current VMCS, which
7014 * can't be done there isn't a current VMCS.
7015 */
7016 nested_vmx_failInvalid(vcpu);
7017 return;
7018 }
7019 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
7020 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
7021 X86_EFLAGS_SF | X86_EFLAGS_OF))
7022 | X86_EFLAGS_ZF);
7023 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
7024 /*
7025 * We don't need to force a shadow sync because
7026 * VM_INSTRUCTION_ERROR is not shadowed
7027 */
7028}
Abel Gordon145c28d2013-04-18 14:36:55 +03007029
Wincy Vanff651cb2014-12-11 08:52:58 +03007030static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
7031{
7032 /* TODO: not to reset guest simply here. */
7033 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Paolo Bonzinibbe41b92016-08-19 17:51:20 +02007034 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
Wincy Vanff651cb2014-12-11 08:52:58 +03007035}
7036
Jan Kiszkaf41245002014-03-07 20:03:13 +01007037static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
7038{
7039 struct vcpu_vmx *vmx =
7040 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
7041
7042 vmx->nested.preemption_timer_expired = true;
7043 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
7044 kvm_vcpu_kick(&vmx->vcpu);
7045
7046 return HRTIMER_NORESTART;
7047}
7048
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03007049/*
Bandan Das19677e32014-05-06 02:19:15 -04007050 * Decode the memory-address operand of a vmx instruction, as recorded on an
7051 * exit caused by such an instruction (run by a guest hypervisor).
7052 * On success, returns 0. When the operand is invalid, returns 1 and throws
7053 * #UD or #GP.
7054 */
7055static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
7056 unsigned long exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007057 u32 vmx_instruction_info, bool wr, gva_t *ret)
Bandan Das19677e32014-05-06 02:19:15 -04007058{
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007059 gva_t off;
7060 bool exn;
7061 struct kvm_segment s;
7062
Bandan Das19677e32014-05-06 02:19:15 -04007063 /*
7064 * According to Vol. 3B, "Information for VM Exits Due to Instruction
7065 * Execution", on an exit, vmx_instruction_info holds most of the
7066 * addressing components of the operand. Only the displacement part
7067 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
7068 * For how an actual address is calculated from all these components,
7069 * refer to Vol. 1, "Operand Addressing".
7070 */
7071 int scaling = vmx_instruction_info & 3;
7072 int addr_size = (vmx_instruction_info >> 7) & 7;
7073 bool is_reg = vmx_instruction_info & (1u << 10);
7074 int seg_reg = (vmx_instruction_info >> 15) & 7;
7075 int index_reg = (vmx_instruction_info >> 18) & 0xf;
7076 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
7077 int base_reg = (vmx_instruction_info >> 23) & 0xf;
7078 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
7079
7080 if (is_reg) {
7081 kvm_queue_exception(vcpu, UD_VECTOR);
7082 return 1;
7083 }
7084
7085 /* Addr = segment_base + offset */
7086 /* offset = base + [index * scale] + displacement */
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007087 off = exit_qualification; /* holds the displacement */
Bandan Das19677e32014-05-06 02:19:15 -04007088 if (base_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007089 off += kvm_register_read(vcpu, base_reg);
Bandan Das19677e32014-05-06 02:19:15 -04007090 if (index_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007091 off += kvm_register_read(vcpu, index_reg)<<scaling;
7092 vmx_get_segment(vcpu, &s, seg_reg);
7093 *ret = s.base + off;
Bandan Das19677e32014-05-06 02:19:15 -04007094
7095 if (addr_size == 1) /* 32 bit */
7096 *ret &= 0xffffffff;
7097
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007098 /* Checks for #GP/#SS exceptions. */
7099 exn = false;
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02007100 if (is_long_mode(vcpu)) {
7101 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
7102 * non-canonical form. This is the only check on the memory
7103 * destination for long mode!
7104 */
Yu Zhangfd8cb432017-08-24 20:27:56 +08007105 exn = is_noncanonical_address(*ret, vcpu);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02007106 } else if (is_protmode(vcpu)) {
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007107 /* Protected mode: apply checks for segment validity in the
7108 * following order:
7109 * - segment type check (#GP(0) may be thrown)
7110 * - usability check (#GP(0)/#SS(0))
7111 * - limit check (#GP(0)/#SS(0))
7112 */
7113 if (wr)
7114 /* #GP(0) if the destination operand is located in a
7115 * read-only data segment or any code segment.
7116 */
7117 exn = ((s.type & 0xa) == 0 || (s.type & 8));
7118 else
7119 /* #GP(0) if the source operand is located in an
7120 * execute-only code segment
7121 */
7122 exn = ((s.type & 0xa) == 8);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02007123 if (exn) {
7124 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7125 return 1;
7126 }
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007127 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
7128 */
7129 exn = (s.unusable != 0);
7130 /* Protected mode: #GP(0)/#SS(0) if the memory
7131 * operand is outside the segment limit.
7132 */
7133 exn = exn || (off + sizeof(u64) > s.limit);
7134 }
7135 if (exn) {
7136 kvm_queue_exception_e(vcpu,
7137 seg_reg == VCPU_SREG_SS ?
7138 SS_VECTOR : GP_VECTOR,
7139 0);
7140 return 1;
7141 }
7142
Bandan Das19677e32014-05-06 02:19:15 -04007143 return 0;
7144}
7145
Radim Krčmářcbf71272017-05-19 15:48:51 +02007146static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
Bandan Das3573e222014-05-06 02:19:16 -04007147{
7148 gva_t gva;
Bandan Das3573e222014-05-06 02:19:16 -04007149 struct x86_exception e;
Bandan Das3573e222014-05-06 02:19:16 -04007150
7151 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007152 vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
Bandan Das3573e222014-05-06 02:19:16 -04007153 return 1;
7154
Radim Krčmářcbf71272017-05-19 15:48:51 +02007155 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
7156 sizeof(*vmpointer), &e)) {
Bandan Das3573e222014-05-06 02:19:16 -04007157 kvm_inject_page_fault(vcpu, &e);
7158 return 1;
7159 }
7160
Bandan Das3573e222014-05-06 02:19:16 -04007161 return 0;
7162}
7163
Jim Mattsone29acc52016-11-30 12:03:43 -08007164static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7165{
7166 struct vcpu_vmx *vmx = to_vmx(vcpu);
7167 struct vmcs *shadow_vmcs;
7168
Jim Mattson00647b42017-11-27 17:22:25 -06007169 vmx->nested.vmcs02.vmcs = alloc_vmcs();
7170 vmx->nested.vmcs02.shadow_vmcs = NULL;
7171 if (!vmx->nested.vmcs02.vmcs)
7172 goto out_vmcs02;
7173 loaded_vmcs_init(&vmx->nested.vmcs02);
7174
Jim Mattsone29acc52016-11-30 12:03:43 -08007175 if (cpu_has_vmx_msr_bitmap()) {
7176 vmx->nested.msr_bitmap =
7177 (unsigned long *)__get_free_page(GFP_KERNEL);
7178 if (!vmx->nested.msr_bitmap)
7179 goto out_msr_bitmap;
7180 }
7181
7182 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
7183 if (!vmx->nested.cached_vmcs12)
7184 goto out_cached_vmcs12;
7185
7186 if (enable_shadow_vmcs) {
7187 shadow_vmcs = alloc_vmcs();
7188 if (!shadow_vmcs)
7189 goto out_shadow_vmcs;
7190 /* mark vmcs as shadow */
7191 shadow_vmcs->revision_id |= (1u << 31);
7192 /* init shadow vmcs */
7193 vmcs_clear(shadow_vmcs);
7194 vmx->vmcs01.shadow_vmcs = shadow_vmcs;
7195 }
7196
Jim Mattsone29acc52016-11-30 12:03:43 -08007197 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
7198 HRTIMER_MODE_REL_PINNED);
7199 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
7200
7201 vmx->nested.vmxon = true;
7202 return 0;
7203
7204out_shadow_vmcs:
7205 kfree(vmx->nested.cached_vmcs12);
7206
7207out_cached_vmcs12:
7208 free_page((unsigned long)vmx->nested.msr_bitmap);
7209
7210out_msr_bitmap:
Mark Kanda276c7962017-11-27 17:22:26 -06007211 vmx_nested_free_vmcs02(vmx);
Jim Mattson00647b42017-11-27 17:22:25 -06007212
7213out_vmcs02:
Jim Mattsone29acc52016-11-30 12:03:43 -08007214 return -ENOMEM;
7215}
7216
Bandan Das3573e222014-05-06 02:19:16 -04007217/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007218 * Emulate the VMXON instruction.
7219 * Currently, we just remember that VMX is active, and do not save or even
7220 * inspect the argument to VMXON (the so-called "VMXON pointer") because we
7221 * do not currently need to store anything in that guest-allocated memory
7222 * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
7223 * argument is different from the VMXON pointer (which the spec says they do).
7224 */
7225static int handle_vmon(struct kvm_vcpu *vcpu)
7226{
Jim Mattsone29acc52016-11-30 12:03:43 -08007227 int ret;
Radim Krčmářcbf71272017-05-19 15:48:51 +02007228 gpa_t vmptr;
7229 struct page *page;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007230 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08007231 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
7232 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007233
Jim Mattson70f3aac2017-04-26 08:53:46 -07007234 /*
7235 * The Intel VMX Instruction Reference lists a bunch of bits that are
7236 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
7237 * 1 (see vmx_set_cr4() for when we allow the guest to set this).
7238 * Otherwise, we should fail with #UD. But most faulting conditions
7239 * have already been checked by hardware, prior to the VM-exit for
7240 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
7241 * that bit set to 1 in non-root mode.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007242 */
Jim Mattson70f3aac2017-04-26 08:53:46 -07007243 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007244 kvm_queue_exception(vcpu, UD_VECTOR);
7245 return 1;
7246 }
7247
Abel Gordon145c28d2013-04-18 14:36:55 +03007248 if (vmx->nested.vmxon) {
7249 nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007250 return kvm_skip_emulated_instruction(vcpu);
Abel Gordon145c28d2013-04-18 14:36:55 +03007251 }
Nadav Har'Elb3897a42013-07-08 19:12:35 +08007252
Haozhong Zhang3b840802016-06-22 14:59:54 +08007253 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
Nadav Har'Elb3897a42013-07-08 19:12:35 +08007254 != VMXON_NEEDED_FEATURES) {
7255 kvm_inject_gp(vcpu, 0);
7256 return 1;
7257 }
7258
Radim Krčmářcbf71272017-05-19 15:48:51 +02007259 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Jim Mattson21e7fbe2016-12-22 15:49:55 -08007260 return 1;
Radim Krčmářcbf71272017-05-19 15:48:51 +02007261
7262 /*
7263 * SDM 3: 24.11.5
7264 * The first 4 bytes of VMXON region contain the supported
7265 * VMCS revision identifier
7266 *
7267 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
7268 * which replaces physical address width with 32
7269 */
7270 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
7271 nested_vmx_failInvalid(vcpu);
7272 return kvm_skip_emulated_instruction(vcpu);
7273 }
7274
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02007275 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
7276 if (is_error_page(page)) {
Radim Krčmářcbf71272017-05-19 15:48:51 +02007277 nested_vmx_failInvalid(vcpu);
7278 return kvm_skip_emulated_instruction(vcpu);
7279 }
7280 if (*(u32 *)kmap(page) != VMCS12_REVISION) {
7281 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02007282 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02007283 nested_vmx_failInvalid(vcpu);
7284 return kvm_skip_emulated_instruction(vcpu);
7285 }
7286 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02007287 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02007288
7289 vmx->nested.vmxon_ptr = vmptr;
Jim Mattsone29acc52016-11-30 12:03:43 -08007290 ret = enter_vmx_operation(vcpu);
7291 if (ret)
7292 return ret;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007293
Arthur Chunqi Lia25eb112013-07-04 15:03:33 +08007294 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007295 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007296}
7297
7298/*
7299 * Intel's VMX Instruction Reference specifies a common set of prerequisites
7300 * for running VMX instructions (except VMXON, whose prerequisites are
7301 * slightly different). It also specifies what exception to inject otherwise.
Jim Mattson70f3aac2017-04-26 08:53:46 -07007302 * Note that many of these exceptions have priority over VM exits, so they
7303 * don't have to be checked again here.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007304 */
7305static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
7306{
Jim Mattson70f3aac2017-04-26 08:53:46 -07007307 if (!to_vmx(vcpu)->nested.vmxon) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007308 kvm_queue_exception(vcpu, UD_VECTOR);
7309 return 0;
7310 }
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007311 return 1;
7312}
7313
David Matlack8ca44e82017-08-01 14:00:39 -07007314static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
7315{
7316 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
7317 vmcs_write64(VMCS_LINK_POINTER, -1ull);
7318}
7319
Abel Gordone7953d72013-04-18 14:37:55 +03007320static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
7321{
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007322 if (vmx->nested.current_vmptr == -1ull)
7323 return;
7324
Abel Gordon012f83c2013-04-18 14:39:25 +03007325 if (enable_shadow_vmcs) {
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007326 /* copy to memory all shadowed fields in case
7327 they were modified */
7328 copy_shadow_to_vmcs12(vmx);
7329 vmx->nested.sync_shadow_vmcs = false;
David Matlack8ca44e82017-08-01 14:00:39 -07007330 vmx_disable_shadow_vmcs(vmx);
Abel Gordon012f83c2013-04-18 14:39:25 +03007331 }
Wincy Van705699a2015-02-03 23:58:17 +08007332 vmx->nested.posted_intr_nv = -1;
David Matlack4f2777b2016-07-13 17:16:37 -07007333
7334 /* Flush VMCS12 to guest memory */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02007335 kvm_vcpu_write_guest_page(&vmx->vcpu,
7336 vmx->nested.current_vmptr >> PAGE_SHIFT,
7337 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
David Matlack4f2777b2016-07-13 17:16:37 -07007338
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007339 vmx->nested.current_vmptr = -1ull;
Abel Gordone7953d72013-04-18 14:37:55 +03007340}
7341
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007342/*
7343 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
7344 * just stops using VMX.
7345 */
7346static void free_nested(struct vcpu_vmx *vmx)
7347{
Wanpeng Lib7455822017-11-22 14:04:00 -08007348 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007349 return;
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007350
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007351 vmx->nested.vmxon = false;
Wanpeng Lib7455822017-11-22 14:04:00 -08007352 vmx->nested.smm.vmxon = false;
Wanpeng Li5c614b32015-10-13 09:18:36 -07007353 free_vpid(vmx->nested.vpid02);
David Matlack8ca44e82017-08-01 14:00:39 -07007354 vmx->nested.posted_intr_nv = -1;
7355 vmx->nested.current_vmptr = -1ull;
Radim Krčmářd048c092016-08-08 20:16:22 +02007356 if (vmx->nested.msr_bitmap) {
7357 free_page((unsigned long)vmx->nested.msr_bitmap);
7358 vmx->nested.msr_bitmap = NULL;
7359 }
Jim Mattson355f4fb2016-10-28 08:29:39 -07007360 if (enable_shadow_vmcs) {
David Matlack8ca44e82017-08-01 14:00:39 -07007361 vmx_disable_shadow_vmcs(vmx);
Jim Mattson355f4fb2016-10-28 08:29:39 -07007362 vmcs_clear(vmx->vmcs01.shadow_vmcs);
7363 free_vmcs(vmx->vmcs01.shadow_vmcs);
7364 vmx->vmcs01.shadow_vmcs = NULL;
7365 }
David Matlack4f2777b2016-07-13 17:16:37 -07007366 kfree(vmx->nested.cached_vmcs12);
Jim Mattson00647b42017-11-27 17:22:25 -06007367 /* Unpin physical memory we referred to in the vmcs02 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03007368 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02007369 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02007370 vmx->nested.apic_access_page = NULL;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03007371 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08007372 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02007373 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02007374 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +08007375 }
Wincy Van705699a2015-02-03 23:58:17 +08007376 if (vmx->nested.pi_desc_page) {
7377 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02007378 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +08007379 vmx->nested.pi_desc_page = NULL;
7380 vmx->nested.pi_desc = NULL;
7381 }
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03007382
Mark Kanda276c7962017-11-27 17:22:26 -06007383 vmx_nested_free_vmcs02(vmx);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007384}
7385
7386/* Emulate the VMXOFF instruction */
7387static int handle_vmoff(struct kvm_vcpu *vcpu)
7388{
7389 if (!nested_vmx_check_permission(vcpu))
7390 return 1;
7391 free_nested(to_vmx(vcpu));
Arthur Chunqi Lia25eb112013-07-04 15:03:33 +08007392 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007393 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03007394}
7395
Nadav Har'El27d6c862011-05-25 23:06:59 +03007396/* Emulate the VMCLEAR instruction */
7397static int handle_vmclear(struct kvm_vcpu *vcpu)
7398{
7399 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson587d7e722017-03-02 12:41:48 -08007400 u32 zero = 0;
Nadav Har'El27d6c862011-05-25 23:06:59 +03007401 gpa_t vmptr;
Nadav Har'El27d6c862011-05-25 23:06:59 +03007402
7403 if (!nested_vmx_check_permission(vcpu))
7404 return 1;
7405
Radim Krčmářcbf71272017-05-19 15:48:51 +02007406 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El27d6c862011-05-25 23:06:59 +03007407 return 1;
7408
Radim Krčmářcbf71272017-05-19 15:48:51 +02007409 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
7410 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
7411 return kvm_skip_emulated_instruction(vcpu);
7412 }
7413
7414 if (vmptr == vmx->nested.vmxon_ptr) {
7415 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
7416 return kvm_skip_emulated_instruction(vcpu);
7417 }
7418
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007419 if (vmptr == vmx->nested.current_vmptr)
Abel Gordone7953d72013-04-18 14:37:55 +03007420 nested_release_vmcs12(vmx);
Nadav Har'El27d6c862011-05-25 23:06:59 +03007421
Jim Mattson587d7e722017-03-02 12:41:48 -08007422 kvm_vcpu_write_guest(vcpu,
7423 vmptr + offsetof(struct vmcs12, launch_state),
7424 &zero, sizeof(zero));
Nadav Har'El27d6c862011-05-25 23:06:59 +03007425
Nadav Har'El27d6c862011-05-25 23:06:59 +03007426 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007427 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03007428}
7429
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03007430static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
7431
7432/* Emulate the VMLAUNCH instruction */
7433static int handle_vmlaunch(struct kvm_vcpu *vcpu)
7434{
7435 return nested_vmx_run(vcpu, true);
7436}
7437
7438/* Emulate the VMRESUME instruction */
7439static int handle_vmresume(struct kvm_vcpu *vcpu)
7440{
7441
7442 return nested_vmx_run(vcpu, false);
7443}
7444
Nadav Har'El49f705c2011-05-25 23:08:30 +03007445/*
7446 * Read a vmcs12 field. Since these can have varying lengths and we return
7447 * one type, we chose the biggest type (u64) and zero-extend the return value
7448 * to that size. Note that the caller, handle_vmread, might need to use only
7449 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
7450 * 64-bit fields are to be returned).
7451 */
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007452static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
7453 unsigned long field, u64 *ret)
Nadav Har'El49f705c2011-05-25 23:08:30 +03007454{
7455 short offset = vmcs_field_to_offset(field);
7456 char *p;
7457
7458 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007459 return offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007460
7461 p = ((char *)(get_vmcs12(vcpu))) + offset;
7462
7463 switch (vmcs_field_type(field)) {
7464 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
7465 *ret = *((natural_width *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007466 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007467 case VMCS_FIELD_TYPE_U16:
7468 *ret = *((u16 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007469 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007470 case VMCS_FIELD_TYPE_U32:
7471 *ret = *((u32 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007472 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007473 case VMCS_FIELD_TYPE_U64:
7474 *ret = *((u64 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007475 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007476 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007477 WARN_ON(1);
7478 return -ENOENT;
Nadav Har'El49f705c2011-05-25 23:08:30 +03007479 }
7480}
7481
Abel Gordon20b97fe2013-04-18 14:36:25 +03007482
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007483static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
7484 unsigned long field, u64 field_value){
Abel Gordon20b97fe2013-04-18 14:36:25 +03007485 short offset = vmcs_field_to_offset(field);
7486 char *p = ((char *) get_vmcs12(vcpu)) + offset;
7487 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007488 return offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007489
7490 switch (vmcs_field_type(field)) {
7491 case VMCS_FIELD_TYPE_U16:
7492 *(u16 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007493 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007494 case VMCS_FIELD_TYPE_U32:
7495 *(u32 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007496 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007497 case VMCS_FIELD_TYPE_U64:
7498 *(u64 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007499 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007500 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
7501 *(natural_width *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007502 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007503 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007504 WARN_ON(1);
7505 return -ENOENT;
Abel Gordon20b97fe2013-04-18 14:36:25 +03007506 }
7507
7508}
7509
Abel Gordon16f5b902013-04-18 14:38:25 +03007510static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
7511{
7512 int i;
7513 unsigned long field;
7514 u64 field_value;
Jim Mattson355f4fb2016-10-28 08:29:39 -07007515 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Mathias Krausec2bae892013-06-26 20:36:21 +02007516 const unsigned long *fields = shadow_read_write_fields;
7517 const int num_fields = max_shadow_read_write_fields;
Abel Gordon16f5b902013-04-18 14:38:25 +03007518
Jan Kiszka282da872014-10-08 18:05:39 +02007519 preempt_disable();
7520
Abel Gordon16f5b902013-04-18 14:38:25 +03007521 vmcs_load(shadow_vmcs);
7522
7523 for (i = 0; i < num_fields; i++) {
7524 field = fields[i];
7525 switch (vmcs_field_type(field)) {
7526 case VMCS_FIELD_TYPE_U16:
7527 field_value = vmcs_read16(field);
7528 break;
7529 case VMCS_FIELD_TYPE_U32:
7530 field_value = vmcs_read32(field);
7531 break;
7532 case VMCS_FIELD_TYPE_U64:
7533 field_value = vmcs_read64(field);
7534 break;
7535 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
7536 field_value = vmcs_readl(field);
7537 break;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007538 default:
7539 WARN_ON(1);
7540 continue;
Abel Gordon16f5b902013-04-18 14:38:25 +03007541 }
7542 vmcs12_write_any(&vmx->vcpu, field, field_value);
7543 }
7544
7545 vmcs_clear(shadow_vmcs);
7546 vmcs_load(vmx->loaded_vmcs->vmcs);
Jan Kiszka282da872014-10-08 18:05:39 +02007547
7548 preempt_enable();
Abel Gordon16f5b902013-04-18 14:38:25 +03007549}
7550
Abel Gordonc3114422013-04-18 14:38:55 +03007551static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
7552{
Mathias Krausec2bae892013-06-26 20:36:21 +02007553 const unsigned long *fields[] = {
7554 shadow_read_write_fields,
7555 shadow_read_only_fields
Abel Gordonc3114422013-04-18 14:38:55 +03007556 };
Mathias Krausec2bae892013-06-26 20:36:21 +02007557 const int max_fields[] = {
Abel Gordonc3114422013-04-18 14:38:55 +03007558 max_shadow_read_write_fields,
7559 max_shadow_read_only_fields
7560 };
7561 int i, q;
7562 unsigned long field;
7563 u64 field_value = 0;
Jim Mattson355f4fb2016-10-28 08:29:39 -07007564 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordonc3114422013-04-18 14:38:55 +03007565
7566 vmcs_load(shadow_vmcs);
7567
Mathias Krausec2bae892013-06-26 20:36:21 +02007568 for (q = 0; q < ARRAY_SIZE(fields); q++) {
Abel Gordonc3114422013-04-18 14:38:55 +03007569 for (i = 0; i < max_fields[q]; i++) {
7570 field = fields[q][i];
7571 vmcs12_read_any(&vmx->vcpu, field, &field_value);
7572
7573 switch (vmcs_field_type(field)) {
7574 case VMCS_FIELD_TYPE_U16:
7575 vmcs_write16(field, (u16)field_value);
7576 break;
7577 case VMCS_FIELD_TYPE_U32:
7578 vmcs_write32(field, (u32)field_value);
7579 break;
7580 case VMCS_FIELD_TYPE_U64:
7581 vmcs_write64(field, (u64)field_value);
7582 break;
7583 case VMCS_FIELD_TYPE_NATURAL_WIDTH:
7584 vmcs_writel(field, (long)field_value);
7585 break;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007586 default:
7587 WARN_ON(1);
7588 break;
Abel Gordonc3114422013-04-18 14:38:55 +03007589 }
7590 }
7591 }
7592
7593 vmcs_clear(shadow_vmcs);
7594 vmcs_load(vmx->loaded_vmcs->vmcs);
7595}
7596
Nadav Har'El49f705c2011-05-25 23:08:30 +03007597/*
7598 * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
7599 * used before) all generate the same failure when it is missing.
7600 */
7601static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
7602{
7603 struct vcpu_vmx *vmx = to_vmx(vcpu);
7604 if (vmx->nested.current_vmptr == -1ull) {
7605 nested_vmx_failInvalid(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007606 return 0;
7607 }
7608 return 1;
7609}
7610
7611static int handle_vmread(struct kvm_vcpu *vcpu)
7612{
7613 unsigned long field;
7614 u64 field_value;
7615 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7616 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
7617 gva_t gva = 0;
7618
Kyle Hueyeb277562016-11-29 12:40:39 -08007619 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03007620 return 1;
7621
Kyle Huey6affcbe2016-11-29 12:40:40 -08007622 if (!nested_vmx_check_vmcs12(vcpu))
7623 return kvm_skip_emulated_instruction(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08007624
Nadav Har'El49f705c2011-05-25 23:08:30 +03007625 /* Decode instruction info and find the field to read */
Nadav Amit27e6fb52014-06-18 17:19:26 +03007626 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Nadav Har'El49f705c2011-05-25 23:08:30 +03007627 /* Read the field, zero-extended to a u64 field_value */
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007628 if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03007629 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007630 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007631 }
7632 /*
7633 * Now copy part of this value to register or memory, as requested.
7634 * Note that the number of bits actually copied is 32 or 64 depending
7635 * on the guest's mode (32 or 64 bit), not on the given field's length.
7636 */
7637 if (vmx_instruction_info & (1u << 10)) {
Nadav Amit27e6fb52014-06-18 17:19:26 +03007638 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
Nadav Har'El49f705c2011-05-25 23:08:30 +03007639 field_value);
7640 } else {
7641 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007642 vmx_instruction_info, true, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03007643 return 1;
Jim Mattson70f3aac2017-04-26 08:53:46 -07007644 /* _system ok, as hardware has verified cpl=0 */
Nadav Har'El49f705c2011-05-25 23:08:30 +03007645 kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
7646 &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
7647 }
7648
7649 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007650 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007651}
7652
7653
7654static int handle_vmwrite(struct kvm_vcpu *vcpu)
7655{
7656 unsigned long field;
7657 gva_t gva;
7658 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7659 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007660 /* The value to write might be 32 or 64 bits, depending on L1's long
7661 * mode, and eventually we need to write that into a field of several
7662 * possible lengths. The code below first zero-extends the value to 64
Adam Buchbinder6a6256f2016-02-23 15:34:30 -08007663 * bit (field_value), and then copies only the appropriate number of
Nadav Har'El49f705c2011-05-25 23:08:30 +03007664 * bits into the vmcs12 field.
7665 */
7666 u64 field_value = 0;
7667 struct x86_exception e;
7668
Kyle Hueyeb277562016-11-29 12:40:39 -08007669 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03007670 return 1;
7671
Kyle Huey6affcbe2016-11-29 12:40:40 -08007672 if (!nested_vmx_check_vmcs12(vcpu))
7673 return kvm_skip_emulated_instruction(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08007674
Nadav Har'El49f705c2011-05-25 23:08:30 +03007675 if (vmx_instruction_info & (1u << 10))
Nadav Amit27e6fb52014-06-18 17:19:26 +03007676 field_value = kvm_register_readl(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03007677 (((vmx_instruction_info) >> 3) & 0xf));
7678 else {
7679 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007680 vmx_instruction_info, false, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03007681 return 1;
7682 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
Nadav Amit27e6fb52014-06-18 17:19:26 +03007683 &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03007684 kvm_inject_page_fault(vcpu, &e);
7685 return 1;
7686 }
7687 }
7688
7689
Nadav Amit27e6fb52014-06-18 17:19:26 +03007690 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Nadav Har'El49f705c2011-05-25 23:08:30 +03007691 if (vmcs_field_readonly(field)) {
7692 nested_vmx_failValid(vcpu,
7693 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007694 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007695 }
7696
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01007697 if (vmcs12_write_any(vcpu, field, field_value) < 0) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03007698 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007699 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007700 }
7701
7702 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007703 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03007704}
7705
Jim Mattsona8bc2842016-11-30 12:03:44 -08007706static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
7707{
7708 vmx->nested.current_vmptr = vmptr;
7709 if (enable_shadow_vmcs) {
7710 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
7711 SECONDARY_EXEC_SHADOW_VMCS);
7712 vmcs_write64(VMCS_LINK_POINTER,
7713 __pa(vmx->vmcs01.shadow_vmcs));
7714 vmx->nested.sync_shadow_vmcs = true;
7715 }
7716}
7717
Nadav Har'El63846662011-05-25 23:07:29 +03007718/* Emulate the VMPTRLD instruction */
7719static int handle_vmptrld(struct kvm_vcpu *vcpu)
7720{
7721 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03007722 gpa_t vmptr;
Nadav Har'El63846662011-05-25 23:07:29 +03007723
7724 if (!nested_vmx_check_permission(vcpu))
7725 return 1;
7726
Radim Krčmářcbf71272017-05-19 15:48:51 +02007727 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El63846662011-05-25 23:07:29 +03007728 return 1;
7729
Radim Krčmářcbf71272017-05-19 15:48:51 +02007730 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
7731 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
7732 return kvm_skip_emulated_instruction(vcpu);
7733 }
7734
7735 if (vmptr == vmx->nested.vmxon_ptr) {
7736 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
7737 return kvm_skip_emulated_instruction(vcpu);
7738 }
7739
Nadav Har'El63846662011-05-25 23:07:29 +03007740 if (vmx->nested.current_vmptr != vmptr) {
7741 struct vmcs12 *new_vmcs12;
7742 struct page *page;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02007743 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
7744 if (is_error_page(page)) {
Nadav Har'El63846662011-05-25 23:07:29 +03007745 nested_vmx_failInvalid(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007746 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03007747 }
7748 new_vmcs12 = kmap(page);
7749 if (new_vmcs12->revision_id != VMCS12_REVISION) {
7750 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02007751 kvm_release_page_clean(page);
Nadav Har'El63846662011-05-25 23:07:29 +03007752 nested_vmx_failValid(vcpu,
7753 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007754 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03007755 }
Nadav Har'El63846662011-05-25 23:07:29 +03007756
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02007757 nested_release_vmcs12(vmx);
David Matlack4f2777b2016-07-13 17:16:37 -07007758 /*
7759 * Load VMCS12 from guest memory since it is not already
7760 * cached.
7761 */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02007762 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
7763 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02007764 kvm_release_page_clean(page);
Paolo Bonzini9f744c52017-07-27 15:54:46 +02007765
Jim Mattsona8bc2842016-11-30 12:03:44 -08007766 set_current_vmptr(vmx, vmptr);
Nadav Har'El63846662011-05-25 23:07:29 +03007767 }
7768
7769 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007770 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03007771}
7772
Nadav Har'El6a4d7552011-05-25 23:08:00 +03007773/* Emulate the VMPTRST instruction */
7774static int handle_vmptrst(struct kvm_vcpu *vcpu)
7775{
7776 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7777 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
7778 gva_t vmcs_gva;
7779 struct x86_exception e;
7780
7781 if (!nested_vmx_check_permission(vcpu))
7782 return 1;
7783
7784 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007785 vmx_instruction_info, true, &vmcs_gva))
Nadav Har'El6a4d7552011-05-25 23:08:00 +03007786 return 1;
Jim Mattson70f3aac2017-04-26 08:53:46 -07007787 /* ok to use *_system, as hardware has verified cpl=0 */
Nadav Har'El6a4d7552011-05-25 23:08:00 +03007788 if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
7789 (void *)&to_vmx(vcpu)->nested.current_vmptr,
7790 sizeof(u64), &e)) {
7791 kvm_inject_page_fault(vcpu, &e);
7792 return 1;
7793 }
7794 nested_vmx_succeed(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007795 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'El6a4d7552011-05-25 23:08:00 +03007796}
7797
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007798/* Emulate the INVEPT instruction */
7799static int handle_invept(struct kvm_vcpu *vcpu)
7800{
Wincy Vanb9c237b2015-02-03 23:56:30 +08007801 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007802 u32 vmx_instruction_info, types;
7803 unsigned long type;
7804 gva_t gva;
7805 struct x86_exception e;
7806 struct {
7807 u64 eptp, gpa;
7808 } operand;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007809
Wincy Vanb9c237b2015-02-03 23:56:30 +08007810 if (!(vmx->nested.nested_vmx_secondary_ctls_high &
7811 SECONDARY_EXEC_ENABLE_EPT) ||
7812 !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007813 kvm_queue_exception(vcpu, UD_VECTOR);
7814 return 1;
7815 }
7816
7817 if (!nested_vmx_check_permission(vcpu))
7818 return 1;
7819
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007820 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Nadav Amit27e6fb52014-06-18 17:19:26 +03007821 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007822
Wincy Vanb9c237b2015-02-03 23:56:30 +08007823 types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007824
Jim Mattson85c856b2016-10-26 08:38:38 -07007825 if (type >= 32 || !(types & (1 << type))) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007826 nested_vmx_failValid(vcpu,
7827 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007828 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007829 }
7830
7831 /* According to the Intel VMX instruction reference, the memory
7832 * operand is read even if it isn't needed (e.g., for type==global)
7833 */
7834 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00007835 vmx_instruction_info, false, &gva))
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007836 return 1;
7837 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
7838 sizeof(operand), &e)) {
7839 kvm_inject_page_fault(vcpu, &e);
7840 return 1;
7841 }
7842
7843 switch (type) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007844 case VMX_EPT_EXTENT_GLOBAL:
Bandan Das45e11812016-08-02 16:32:36 -04007845 /*
7846 * TODO: track mappings and invalidate
7847 * single context requests appropriately
7848 */
7849 case VMX_EPT_EXTENT_CONTEXT:
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007850 kvm_mmu_sync_roots(vcpu);
Liang Chen77c39132014-09-18 12:38:37 -04007851 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007852 nested_vmx_succeed(vcpu);
7853 break;
7854 default:
7855 BUG_ON(1);
7856 break;
7857 }
7858
Kyle Huey6affcbe2016-11-29 12:40:40 -08007859 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03007860}
7861
Petr Matouseka642fc32014-09-23 20:22:30 +02007862static int handle_invvpid(struct kvm_vcpu *vcpu)
7863{
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007864 struct vcpu_vmx *vmx = to_vmx(vcpu);
7865 u32 vmx_instruction_info;
7866 unsigned long type, types;
7867 gva_t gva;
7868 struct x86_exception e;
Jim Mattson40352602017-06-28 09:37:37 -07007869 struct {
7870 u64 vpid;
7871 u64 gla;
7872 } operand;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007873
7874 if (!(vmx->nested.nested_vmx_secondary_ctls_high &
7875 SECONDARY_EXEC_ENABLE_VPID) ||
7876 !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
7877 kvm_queue_exception(vcpu, UD_VECTOR);
7878 return 1;
7879 }
7880
7881 if (!nested_vmx_check_permission(vcpu))
7882 return 1;
7883
7884 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
7885 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
7886
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007887 types = (vmx->nested.nested_vmx_vpid_caps &
7888 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007889
Jim Mattson85c856b2016-10-26 08:38:38 -07007890 if (type >= 32 || !(types & (1 << type))) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007891 nested_vmx_failValid(vcpu,
7892 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007893 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007894 }
7895
7896 /* according to the intel vmx instruction reference, the memory
7897 * operand is read even if it isn't needed (e.g., for type==global)
7898 */
7899 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
7900 vmx_instruction_info, false, &gva))
7901 return 1;
Jim Mattson40352602017-06-28 09:37:37 -07007902 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
7903 sizeof(operand), &e)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007904 kvm_inject_page_fault(vcpu, &e);
7905 return 1;
7906 }
Jim Mattson40352602017-06-28 09:37:37 -07007907 if (operand.vpid >> 16) {
7908 nested_vmx_failValid(vcpu,
7909 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
7910 return kvm_skip_emulated_instruction(vcpu);
7911 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007912
7913 switch (type) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007914 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
Yu Zhangfd8cb432017-08-24 20:27:56 +08007915 if (is_noncanonical_address(operand.gla, vcpu)) {
Jim Mattson40352602017-06-28 09:37:37 -07007916 nested_vmx_failValid(vcpu,
7917 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
7918 return kvm_skip_emulated_instruction(vcpu);
7919 }
7920 /* fall through */
Paolo Bonzinief697a72016-03-18 16:58:38 +01007921 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007922 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
Jim Mattson40352602017-06-28 09:37:37 -07007923 if (!operand.vpid) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007924 nested_vmx_failValid(vcpu,
7925 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007926 return kvm_skip_emulated_instruction(vcpu);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007927 }
7928 break;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007929 case VMX_VPID_EXTENT_ALL_CONTEXT:
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007930 break;
7931 default:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007932 WARN_ON_ONCE(1);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007933 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07007934 }
7935
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08007936 __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03007937 nested_vmx_succeed(vcpu);
7938
Kyle Huey6affcbe2016-11-29 12:40:40 -08007939 return kvm_skip_emulated_instruction(vcpu);
Petr Matouseka642fc32014-09-23 20:22:30 +02007940}
7941
Kai Huang843e4332015-01-28 10:54:28 +08007942static int handle_pml_full(struct kvm_vcpu *vcpu)
7943{
7944 unsigned long exit_qualification;
7945
7946 trace_kvm_pml_full(vcpu->vcpu_id);
7947
7948 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7949
7950 /*
7951 * PML buffer FULL happened while executing iret from NMI,
7952 * "blocked by NMI" bit has to be set before next VM entry.
7953 */
7954 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007955 enable_vnmi &&
Kai Huang843e4332015-01-28 10:54:28 +08007956 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
7957 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7958 GUEST_INTR_STATE_NMI);
7959
7960 /*
7961 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
7962 * here.., and there's no userspace involvement needed for PML.
7963 */
7964 return 1;
7965}
7966
Yunhong Jiang64672c92016-06-13 14:19:59 -07007967static int handle_preemption_timer(struct kvm_vcpu *vcpu)
7968{
7969 kvm_lapic_expired_hv_timer(vcpu);
7970 return 1;
7971}
7972
Bandan Das41ab9372017-08-03 15:54:43 -04007973static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
7974{
7975 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das41ab9372017-08-03 15:54:43 -04007976 int maxphyaddr = cpuid_maxphyaddr(vcpu);
7977
7978 /* Check for memory type validity */
David Hildenbrandbb97a012017-08-10 23:15:28 +02007979 switch (address & VMX_EPTP_MT_MASK) {
7980 case VMX_EPTP_MT_UC:
Bandan Das41ab9372017-08-03 15:54:43 -04007981 if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
7982 return false;
7983 break;
David Hildenbrandbb97a012017-08-10 23:15:28 +02007984 case VMX_EPTP_MT_WB:
Bandan Das41ab9372017-08-03 15:54:43 -04007985 if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
7986 return false;
7987 break;
7988 default:
7989 return false;
7990 }
7991
David Hildenbrandbb97a012017-08-10 23:15:28 +02007992 /* only 4 levels page-walk length are valid */
7993 if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
Bandan Das41ab9372017-08-03 15:54:43 -04007994 return false;
7995
7996 /* Reserved bits should not be set */
7997 if (address >> maxphyaddr || ((address >> 7) & 0x1f))
7998 return false;
7999
8000 /* AD, if set, should be supported */
David Hildenbrandbb97a012017-08-10 23:15:28 +02008001 if (address & VMX_EPTP_AD_ENABLE_BIT) {
Bandan Das41ab9372017-08-03 15:54:43 -04008002 if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
8003 return false;
8004 }
8005
8006 return true;
8007}
8008
8009static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
8010 struct vmcs12 *vmcs12)
8011{
8012 u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
8013 u64 address;
8014 bool accessed_dirty;
8015 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
8016
8017 if (!nested_cpu_has_eptp_switching(vmcs12) ||
8018 !nested_cpu_has_ept(vmcs12))
8019 return 1;
8020
8021 if (index >= VMFUNC_EPTP_ENTRIES)
8022 return 1;
8023
8024
8025 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
8026 &address, index * 8, 8))
8027 return 1;
8028
David Hildenbrandbb97a012017-08-10 23:15:28 +02008029 accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
Bandan Das41ab9372017-08-03 15:54:43 -04008030
8031 /*
8032 * If the (L2) guest does a vmfunc to the currently
8033 * active ept pointer, we don't have to do anything else
8034 */
8035 if (vmcs12->ept_pointer != address) {
8036 if (!valid_ept_address(vcpu, address))
8037 return 1;
8038
8039 kvm_mmu_unload(vcpu);
8040 mmu->ept_ad = accessed_dirty;
8041 mmu->base_role.ad_disabled = !accessed_dirty;
8042 vmcs12->ept_pointer = address;
8043 /*
8044 * TODO: Check what's the correct approach in case
8045 * mmu reload fails. Currently, we just let the next
8046 * reload potentially fail
8047 */
8048 kvm_mmu_reload(vcpu);
8049 }
8050
8051 return 0;
8052}
8053
Bandan Das2a499e42017-08-03 15:54:41 -04008054static int handle_vmfunc(struct kvm_vcpu *vcpu)
8055{
Bandan Das27c42a12017-08-03 15:54:42 -04008056 struct vcpu_vmx *vmx = to_vmx(vcpu);
8057 struct vmcs12 *vmcs12;
8058 u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
8059
8060 /*
8061 * VMFUNC is only supported for nested guests, but we always enable the
8062 * secondary control for simplicity; for non-nested mode, fake that we
8063 * didn't by injecting #UD.
8064 */
8065 if (!is_guest_mode(vcpu)) {
8066 kvm_queue_exception(vcpu, UD_VECTOR);
8067 return 1;
8068 }
8069
8070 vmcs12 = get_vmcs12(vcpu);
8071 if ((vmcs12->vm_function_control & (1 << function)) == 0)
8072 goto fail;
Bandan Das41ab9372017-08-03 15:54:43 -04008073
8074 switch (function) {
8075 case 0:
8076 if (nested_vmx_eptp_switching(vcpu, vmcs12))
8077 goto fail;
8078 break;
8079 default:
8080 goto fail;
8081 }
8082 return kvm_skip_emulated_instruction(vcpu);
Bandan Das27c42a12017-08-03 15:54:42 -04008083
8084fail:
8085 nested_vmx_vmexit(vcpu, vmx->exit_reason,
8086 vmcs_read32(VM_EXIT_INTR_INFO),
8087 vmcs_readl(EXIT_QUALIFICATION));
Bandan Das2a499e42017-08-03 15:54:41 -04008088 return 1;
8089}
8090
Nadav Har'El0140cae2011-05-25 23:06:28 +03008091/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08008092 * The exit handlers return 1 if the exit was handled fully and guest execution
8093 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
8094 * to be done to userspace and return 0.
8095 */
Mathias Krause772e0312012-08-30 01:30:19 +02008096static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
Avi Kivity6aa8b732006-12-10 02:21:36 -08008097 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
8098 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
Avi Kivity988ad742007-02-12 00:54:36 -08008099 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
Sheng Yangf08864b2008-05-15 18:23:25 +08008100 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -08008101 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
Avi Kivity6aa8b732006-12-10 02:21:36 -08008102 [EXIT_REASON_CR_ACCESS] = handle_cr,
8103 [EXIT_REASON_DR_ACCESS] = handle_dr,
8104 [EXIT_REASON_CPUID] = handle_cpuid,
8105 [EXIT_REASON_MSR_READ] = handle_rdmsr,
8106 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
8107 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
8108 [EXIT_REASON_HLT] = handle_halt,
Gleb Natapovec25d5e2010-11-01 15:35:01 +02008109 [EXIT_REASON_INVD] = handle_invd,
Marcelo Tosattia7052892008-09-23 13:18:35 -03008110 [EXIT_REASON_INVLPG] = handle_invlpg,
Avi Kivityfee84b02011-11-10 14:57:25 +02008111 [EXIT_REASON_RDPMC] = handle_rdpmc,
Ingo Molnarc21415e2007-02-19 14:37:47 +02008112 [EXIT_REASON_VMCALL] = handle_vmcall,
Nadav Har'El27d6c862011-05-25 23:06:59 +03008113 [EXIT_REASON_VMCLEAR] = handle_vmclear,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03008114 [EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
Nadav Har'El63846662011-05-25 23:07:29 +03008115 [EXIT_REASON_VMPTRLD] = handle_vmptrld,
Nadav Har'El6a4d7552011-05-25 23:08:00 +03008116 [EXIT_REASON_VMPTRST] = handle_vmptrst,
Nadav Har'El49f705c2011-05-25 23:08:30 +03008117 [EXIT_REASON_VMREAD] = handle_vmread,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03008118 [EXIT_REASON_VMRESUME] = handle_vmresume,
Nadav Har'El49f705c2011-05-25 23:08:30 +03008119 [EXIT_REASON_VMWRITE] = handle_vmwrite,
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008120 [EXIT_REASON_VMOFF] = handle_vmoff,
8121 [EXIT_REASON_VMON] = handle_vmon,
Sheng Yangf78e0e22007-10-29 09:40:42 +08008122 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
8123 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
Yang Zhang83d4c282013-01-25 10:18:49 +08008124 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
Yang Zhangc7c9c562013-01-25 10:18:51 +08008125 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
Eddie Donge5edaa02007-11-11 12:28:35 +02008126 [EXIT_REASON_WBINVD] = handle_wbinvd,
Dexuan Cui2acf9232010-06-10 11:27:12 +08008127 [EXIT_REASON_XSETBV] = handle_xsetbv,
Izik Eidus37817f22008-03-24 23:14:53 +02008128 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
Andi Kleena0861c02009-06-08 17:37:09 +08008129 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
Paolo Bonzini0367f202016-07-12 10:44:55 +02008130 [EXIT_REASON_GDTR_IDTR] = handle_desc,
8131 [EXIT_REASON_LDTR_TR] = handle_desc,
Marcelo Tosatti68f89402009-06-11 12:07:43 -03008132 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
8133 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008134 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008135 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03008136 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008137 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03008138 [EXIT_REASON_INVEPT] = handle_invept,
Petr Matouseka642fc32014-09-23 20:22:30 +02008139 [EXIT_REASON_INVVPID] = handle_invvpid,
Jim Mattson45ec3682017-08-23 16:32:04 -07008140 [EXIT_REASON_RDRAND] = handle_invalid_op,
Jim Mattson75f4fc82017-08-23 16:32:03 -07008141 [EXIT_REASON_RDSEED] = handle_invalid_op,
Wanpeng Lif53cd632014-12-02 19:14:58 +08008142 [EXIT_REASON_XSAVES] = handle_xsaves,
8143 [EXIT_REASON_XRSTORS] = handle_xrstors,
Kai Huang843e4332015-01-28 10:54:28 +08008144 [EXIT_REASON_PML_FULL] = handle_pml_full,
Bandan Das2a499e42017-08-03 15:54:41 -04008145 [EXIT_REASON_VMFUNC] = handle_vmfunc,
Yunhong Jiang64672c92016-06-13 14:19:59 -07008146 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
Avi Kivity6aa8b732006-12-10 02:21:36 -08008147};
8148
8149static const int kvm_vmx_max_exit_handlers =
Robert P. J. Day50a34852007-06-03 13:35:29 -04008150 ARRAY_SIZE(kvm_vmx_exit_handlers);
Avi Kivity6aa8b732006-12-10 02:21:36 -08008151
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008152static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
8153 struct vmcs12 *vmcs12)
8154{
8155 unsigned long exit_qualification;
8156 gpa_t bitmap, last_bitmap;
8157 unsigned int port;
8158 int size;
8159 u8 b;
8160
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008161 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
Zhihui Zhang2f0a6392013-12-30 15:56:29 -05008162 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008163
8164 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
8165
8166 port = exit_qualification >> 16;
8167 size = (exit_qualification & 7) + 1;
8168
8169 last_bitmap = (gpa_t)-1;
8170 b = -1;
8171
8172 while (size > 0) {
8173 if (port < 0x8000)
8174 bitmap = vmcs12->io_bitmap_a;
8175 else if (port < 0x10000)
8176 bitmap = vmcs12->io_bitmap_b;
8177 else
Joe Perches1d804d02015-03-30 16:46:09 -07008178 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008179 bitmap += (port & 0x7fff) / 8;
8180
8181 if (last_bitmap != bitmap)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008182 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07008183 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008184 if (b & (1 << (port & 7)))
Joe Perches1d804d02015-03-30 16:46:09 -07008185 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008186
8187 port++;
8188 size--;
8189 last_bitmap = bitmap;
8190 }
8191
Joe Perches1d804d02015-03-30 16:46:09 -07008192 return false;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008193}
8194
Nadav Har'El644d7112011-05-25 23:12:35 +03008195/*
8196 * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
8197 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
8198 * disinterest in the current event (read or write a specific MSR) by using an
8199 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
8200 */
8201static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
8202 struct vmcs12 *vmcs12, u32 exit_reason)
8203{
8204 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
8205 gpa_t bitmap;
8206
Jan Kiszkacbd29cb2013-02-11 12:19:28 +01008207 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
Joe Perches1d804d02015-03-30 16:46:09 -07008208 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008209
8210 /*
8211 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
8212 * for the four combinations of read/write and low/high MSR numbers.
8213 * First we need to figure out which of the four to use:
8214 */
8215 bitmap = vmcs12->msr_bitmap;
8216 if (exit_reason == EXIT_REASON_MSR_WRITE)
8217 bitmap += 2048;
8218 if (msr_index >= 0xc0000000) {
8219 msr_index -= 0xc0000000;
8220 bitmap += 1024;
8221 }
8222
8223 /* Then read the msr_index'th bit from this bitmap: */
8224 if (msr_index < 1024*8) {
8225 unsigned char b;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008226 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07008227 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008228 return 1 & (b >> (msr_index & 7));
8229 } else
Joe Perches1d804d02015-03-30 16:46:09 -07008230 return true; /* let L1 handle the wrong parameter */
Nadav Har'El644d7112011-05-25 23:12:35 +03008231}
8232
8233/*
8234 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
8235 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
8236 * intercept (via guest_host_mask etc.) the current event.
8237 */
8238static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
8239 struct vmcs12 *vmcs12)
8240{
8241 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
8242 int cr = exit_qualification & 15;
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02008243 int reg;
8244 unsigned long val;
Nadav Har'El644d7112011-05-25 23:12:35 +03008245
8246 switch ((exit_qualification >> 4) & 3) {
8247 case 0: /* mov to cr */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02008248 reg = (exit_qualification >> 8) & 15;
8249 val = kvm_register_readl(vcpu, reg);
Nadav Har'El644d7112011-05-25 23:12:35 +03008250 switch (cr) {
8251 case 0:
8252 if (vmcs12->cr0_guest_host_mask &
8253 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07008254 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008255 break;
8256 case 3:
8257 if ((vmcs12->cr3_target_count >= 1 &&
8258 vmcs12->cr3_target_value0 == val) ||
8259 (vmcs12->cr3_target_count >= 2 &&
8260 vmcs12->cr3_target_value1 == val) ||
8261 (vmcs12->cr3_target_count >= 3 &&
8262 vmcs12->cr3_target_value2 == val) ||
8263 (vmcs12->cr3_target_count >= 4 &&
8264 vmcs12->cr3_target_value3 == val))
Joe Perches1d804d02015-03-30 16:46:09 -07008265 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008266 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07008267 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008268 break;
8269 case 4:
8270 if (vmcs12->cr4_guest_host_mask &
8271 (vmcs12->cr4_read_shadow ^ val))
Joe Perches1d804d02015-03-30 16:46:09 -07008272 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008273 break;
8274 case 8:
8275 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -07008276 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008277 break;
8278 }
8279 break;
8280 case 2: /* clts */
8281 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
8282 (vmcs12->cr0_read_shadow & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -07008283 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008284 break;
8285 case 1: /* mov from cr */
8286 switch (cr) {
8287 case 3:
8288 if (vmcs12->cpu_based_vm_exec_control &
8289 CPU_BASED_CR3_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07008290 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008291 break;
8292 case 8:
8293 if (vmcs12->cpu_based_vm_exec_control &
8294 CPU_BASED_CR8_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -07008295 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008296 break;
8297 }
8298 break;
8299 case 3: /* lmsw */
8300 /*
8301 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
8302 * cr0. Other attempted changes are ignored, with no exit.
8303 */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02008304 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Nadav Har'El644d7112011-05-25 23:12:35 +03008305 if (vmcs12->cr0_guest_host_mask & 0xe &
8306 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07008307 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008308 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
8309 !(vmcs12->cr0_read_shadow & 0x1) &&
8310 (val & 0x1))
Joe Perches1d804d02015-03-30 16:46:09 -07008311 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008312 break;
8313 }
Joe Perches1d804d02015-03-30 16:46:09 -07008314 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008315}
8316
8317/*
8318 * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
8319 * should handle it ourselves in L0 (and then continue L2). Only call this
8320 * when in is_guest_mode (L2).
8321 */
Paolo Bonzini7313c692017-07-27 10:31:25 +02008322static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
Nadav Har'El644d7112011-05-25 23:12:35 +03008323{
Nadav Har'El644d7112011-05-25 23:12:35 +03008324 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
8325 struct vcpu_vmx *vmx = to_vmx(vcpu);
8326 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8327
Jim Mattson4f350c62017-09-14 16:31:44 -07008328 if (vmx->nested.nested_run_pending)
8329 return false;
8330
8331 if (unlikely(vmx->fail)) {
8332 pr_info_ratelimited("%s failed vm entry %x\n", __func__,
8333 vmcs_read32(VM_INSTRUCTION_ERROR));
8334 return true;
8335 }
Jan Kiszka542060e2014-01-04 18:47:21 +01008336
David Matlackc9f04402017-08-01 14:00:40 -07008337 /*
8338 * The host physical addresses of some pages of guest memory
Jim Mattson00647b42017-11-27 17:22:25 -06008339 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
8340 * Page). The CPU may write to these pages via their host
8341 * physical address while L2 is running, bypassing any
8342 * address-translation-based dirty tracking (e.g. EPT write
8343 * protection).
David Matlackc9f04402017-08-01 14:00:40 -07008344 *
8345 * Mark them dirty on every exit from L2 to prevent them from
8346 * getting out of sync with dirty tracking.
8347 */
8348 nested_mark_vmcs12_pages_dirty(vcpu);
8349
Jim Mattson4f350c62017-09-14 16:31:44 -07008350 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
8351 vmcs_readl(EXIT_QUALIFICATION),
8352 vmx->idt_vectoring_info,
8353 intr_info,
8354 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
8355 KVM_ISA_VMX);
Nadav Har'El644d7112011-05-25 23:12:35 +03008356
8357 switch (exit_reason) {
8358 case EXIT_REASON_EXCEPTION_NMI:
Jim Mattsonef85b672016-12-12 11:01:37 -08008359 if (is_nmi(intr_info))
Joe Perches1d804d02015-03-30 16:46:09 -07008360 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008361 else if (is_page_fault(intr_info))
Wanpeng Li52a5c152017-07-13 18:30:42 -07008362 return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
Anthoine Bourgeoise504c902013-11-13 11:45:37 +01008363 else if (is_no_device(intr_info) &&
Paolo Bonziniccf98442014-02-27 22:54:11 +01008364 !(vmcs12->guest_cr0 & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -07008365 return false;
Jan Kiszka6f054852016-02-09 20:15:18 +01008366 else if (is_debug(intr_info) &&
8367 vcpu->guest_debug &
8368 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
8369 return false;
8370 else if (is_breakpoint(intr_info) &&
8371 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
8372 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008373 return vmcs12->exception_bitmap &
8374 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
8375 case EXIT_REASON_EXTERNAL_INTERRUPT:
Joe Perches1d804d02015-03-30 16:46:09 -07008376 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008377 case EXIT_REASON_TRIPLE_FAULT:
Joe Perches1d804d02015-03-30 16:46:09 -07008378 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008379 case EXIT_REASON_PENDING_INTERRUPT:
Jan Kiszka3b656cf2013-04-14 12:12:45 +02008380 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +03008381 case EXIT_REASON_NMI_WINDOW:
Jan Kiszka3b656cf2013-04-14 12:12:45 +02008382 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +03008383 case EXIT_REASON_TASK_SWITCH:
Joe Perches1d804d02015-03-30 16:46:09 -07008384 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008385 case EXIT_REASON_CPUID:
Joe Perches1d804d02015-03-30 16:46:09 -07008386 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008387 case EXIT_REASON_HLT:
8388 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
8389 case EXIT_REASON_INVD:
Joe Perches1d804d02015-03-30 16:46:09 -07008390 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008391 case EXIT_REASON_INVLPG:
8392 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
8393 case EXIT_REASON_RDPMC:
8394 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +02008395 case EXIT_REASON_RDRAND:
David Hildenbrand736fdf72017-08-24 20:51:37 +02008396 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +02008397 case EXIT_REASON_RDSEED:
David Hildenbrand736fdf72017-08-24 20:51:37 +02008398 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
Jan Kiszkab3a2a902015-03-23 19:27:19 +01008399 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
Nadav Har'El644d7112011-05-25 23:12:35 +03008400 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
8401 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
8402 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
8403 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
8404 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
8405 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
Petr Matouseka642fc32014-09-23 20:22:30 +02008406 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
Nadav Har'El644d7112011-05-25 23:12:35 +03008407 /*
8408 * VMX instructions trap unconditionally. This allows L1 to
8409 * emulate them for its L2 guest, i.e., allows 3-level nesting!
8410 */
Joe Perches1d804d02015-03-30 16:46:09 -07008411 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008412 case EXIT_REASON_CR_ACCESS:
8413 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
8414 case EXIT_REASON_DR_ACCESS:
8415 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
8416 case EXIT_REASON_IO_INSTRUCTION:
Jan Kiszka908a7bd2013-02-18 11:21:16 +01008417 return nested_vmx_exit_handled_io(vcpu, vmcs12);
Paolo Bonzini1b073042016-10-25 16:06:30 +02008418 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
8419 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
Nadav Har'El644d7112011-05-25 23:12:35 +03008420 case EXIT_REASON_MSR_READ:
8421 case EXIT_REASON_MSR_WRITE:
8422 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
8423 case EXIT_REASON_INVALID_STATE:
Joe Perches1d804d02015-03-30 16:46:09 -07008424 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008425 case EXIT_REASON_MWAIT_INSTRUCTION:
8426 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03008427 case EXIT_REASON_MONITOR_TRAP_FLAG:
8428 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
Nadav Har'El644d7112011-05-25 23:12:35 +03008429 case EXIT_REASON_MONITOR_INSTRUCTION:
8430 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
8431 case EXIT_REASON_PAUSE_INSTRUCTION:
8432 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
8433 nested_cpu_has2(vmcs12,
8434 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
8435 case EXIT_REASON_MCE_DURING_VMENTRY:
Joe Perches1d804d02015-03-30 16:46:09 -07008436 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008437 case EXIT_REASON_TPR_BELOW_THRESHOLD:
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008438 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
Nadav Har'El644d7112011-05-25 23:12:35 +03008439 case EXIT_REASON_APIC_ACCESS:
8440 return nested_cpu_has2(vmcs12,
8441 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
Wincy Van82f0dd42015-02-03 23:57:18 +08008442 case EXIT_REASON_APIC_WRITE:
Wincy Van608406e2015-02-03 23:57:51 +08008443 case EXIT_REASON_EOI_INDUCED:
8444 /* apic_write and eoi_induced should exit unconditionally. */
Joe Perches1d804d02015-03-30 16:46:09 -07008445 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008446 case EXIT_REASON_EPT_VIOLATION:
Nadav Har'El2b1be672013-08-05 11:07:19 +03008447 /*
8448 * L0 always deals with the EPT violation. If nested EPT is
8449 * used, and the nested mmu code discovers that the address is
8450 * missing in the guest EPT table (EPT12), the EPT violation
8451 * will be injected with nested_ept_inject_page_fault()
8452 */
Joe Perches1d804d02015-03-30 16:46:09 -07008453 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008454 case EXIT_REASON_EPT_MISCONFIG:
Nadav Har'El2b1be672013-08-05 11:07:19 +03008455 /*
8456 * L2 never uses directly L1's EPT, but rather L0's own EPT
8457 * table (shadow on EPT) or a merged EPT table that L0 built
8458 * (EPT on EPT). So any problems with the structure of the
8459 * table is L0's fault.
8460 */
Joe Perches1d804d02015-03-30 16:46:09 -07008461 return false;
Paolo Bonzini90a2db62017-07-27 13:22:13 +02008462 case EXIT_REASON_INVPCID:
8463 return
8464 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
8465 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
Nadav Har'El644d7112011-05-25 23:12:35 +03008466 case EXIT_REASON_WBINVD:
8467 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
8468 case EXIT_REASON_XSETBV:
Joe Perches1d804d02015-03-30 16:46:09 -07008469 return true;
Wanpeng Li81dc01f2014-12-04 19:11:07 +08008470 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
8471 /*
8472 * This should never happen, since it is not possible to
8473 * set XSS to a non-zero value---neither in L1 nor in L2.
8474 * If if it were, XSS would have to be checked against
8475 * the XSS exit bitmap in vmcs12.
8476 */
8477 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li55123e32016-07-06 18:29:58 +08008478 case EXIT_REASON_PREEMPTION_TIMER:
8479 return false;
Ladi Prosekab007cc2017-03-31 10:19:26 +02008480 case EXIT_REASON_PML_FULL:
Bandan Das03efce62017-05-05 15:25:15 -04008481 /* We emulate PML support to L1. */
Ladi Prosekab007cc2017-03-31 10:19:26 +02008482 return false;
Bandan Das2a499e42017-08-03 15:54:41 -04008483 case EXIT_REASON_VMFUNC:
8484 /* VM functions are emulated through L2->L0 vmexits. */
8485 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +03008486 default:
Joe Perches1d804d02015-03-30 16:46:09 -07008487 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03008488 }
8489}
8490
Paolo Bonzini7313c692017-07-27 10:31:25 +02008491static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
8492{
8493 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
8494
8495 /*
8496 * At this point, the exit interruption info in exit_intr_info
8497 * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
8498 * we need to query the in-kernel LAPIC.
8499 */
8500 WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
8501 if ((exit_intr_info &
8502 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8503 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
8504 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8505 vmcs12->vm_exit_intr_error_code =
8506 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
8507 }
8508
8509 nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
8510 vmcs_readl(EXIT_QUALIFICATION));
8511 return 1;
8512}
8513
Avi Kivity586f9602010-11-18 13:09:54 +02008514static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
8515{
8516 *info1 = vmcs_readl(EXIT_QUALIFICATION);
8517 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
8518}
8519
Kai Huanga3eaa862015-11-04 13:46:05 +08008520static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
Kai Huang843e4332015-01-28 10:54:28 +08008521{
Kai Huanga3eaa862015-11-04 13:46:05 +08008522 if (vmx->pml_pg) {
8523 __free_page(vmx->pml_pg);
8524 vmx->pml_pg = NULL;
8525 }
Kai Huang843e4332015-01-28 10:54:28 +08008526}
8527
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008528static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
Kai Huang843e4332015-01-28 10:54:28 +08008529{
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008530 struct vcpu_vmx *vmx = to_vmx(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +08008531 u64 *pml_buf;
8532 u16 pml_idx;
8533
8534 pml_idx = vmcs_read16(GUEST_PML_INDEX);
8535
8536 /* Do nothing if PML buffer is empty */
8537 if (pml_idx == (PML_ENTITY_NUM - 1))
8538 return;
8539
8540 /* PML index always points to next available PML buffer entity */
8541 if (pml_idx >= PML_ENTITY_NUM)
8542 pml_idx = 0;
8543 else
8544 pml_idx++;
8545
8546 pml_buf = page_address(vmx->pml_pg);
8547 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
8548 u64 gpa;
8549
8550 gpa = pml_buf[pml_idx];
8551 WARN_ON(gpa & (PAGE_SIZE - 1));
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008552 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
Kai Huang843e4332015-01-28 10:54:28 +08008553 }
8554
8555 /* reset PML index */
8556 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
8557}
8558
8559/*
8560 * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
8561 * Called before reporting dirty_bitmap to userspace.
8562 */
8563static void kvm_flush_pml_buffers(struct kvm *kvm)
8564{
8565 int i;
8566 struct kvm_vcpu *vcpu;
8567 /*
8568 * We only need to kick vcpu out of guest mode here, as PML buffer
8569 * is flushed at beginning of all VMEXITs, and it's obvious that only
8570 * vcpus running in guest are possible to have unflushed GPAs in PML
8571 * buffer.
8572 */
8573 kvm_for_each_vcpu(i, vcpu, kvm)
8574 kvm_vcpu_kick(vcpu);
8575}
8576
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008577static void vmx_dump_sel(char *name, uint32_t sel)
8578{
8579 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
Chao Peng96794e42017-02-21 03:50:01 -05008580 name, vmcs_read16(sel),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008581 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
8582 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
8583 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
8584}
8585
8586static void vmx_dump_dtsel(char *name, uint32_t limit)
8587{
8588 pr_err("%s limit=0x%08x, base=0x%016lx\n",
8589 name, vmcs_read32(limit),
8590 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
8591}
8592
8593static void dump_vmcs(void)
8594{
8595 u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
8596 u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
8597 u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
8598 u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
8599 u32 secondary_exec_control = 0;
8600 unsigned long cr4 = vmcs_readl(GUEST_CR4);
Paolo Bonzinif3531052015-12-03 15:49:56 +01008601 u64 efer = vmcs_read64(GUEST_IA32_EFER);
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008602 int i, n;
8603
8604 if (cpu_has_secondary_exec_ctrls())
8605 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
8606
8607 pr_err("*** Guest State ***\n");
8608 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
8609 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
8610 vmcs_readl(CR0_GUEST_HOST_MASK));
8611 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
8612 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
8613 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
8614 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
8615 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
8616 {
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008617 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
8618 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
8619 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
8620 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008621 }
8622 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
8623 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
8624 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
8625 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
8626 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
8627 vmcs_readl(GUEST_SYSENTER_ESP),
8628 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
8629 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
8630 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
8631 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
8632 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
8633 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
8634 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
8635 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
8636 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
8637 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
8638 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
8639 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
8640 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008641 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
8642 efer, vmcs_read64(GUEST_IA32_PAT));
8643 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
8644 vmcs_read64(GUEST_IA32_DEBUGCTL),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008645 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
8646 if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008647 pr_err("PerfGlobCtl = 0x%016llx\n",
8648 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008649 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008650 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008651 pr_err("Interruptibility = %08x ActivityState = %08x\n",
8652 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
8653 vmcs_read32(GUEST_ACTIVITY_STATE));
8654 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
8655 pr_err("InterruptStatus = %04x\n",
8656 vmcs_read16(GUEST_INTR_STATUS));
8657
8658 pr_err("*** Host State ***\n");
8659 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
8660 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
8661 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
8662 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
8663 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
8664 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
8665 vmcs_read16(HOST_TR_SELECTOR));
8666 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
8667 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
8668 vmcs_readl(HOST_TR_BASE));
8669 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
8670 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
8671 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
8672 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
8673 vmcs_readl(HOST_CR4));
8674 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
8675 vmcs_readl(HOST_IA32_SYSENTER_ESP),
8676 vmcs_read32(HOST_IA32_SYSENTER_CS),
8677 vmcs_readl(HOST_IA32_SYSENTER_EIP));
8678 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008679 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
8680 vmcs_read64(HOST_IA32_EFER),
8681 vmcs_read64(HOST_IA32_PAT));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008682 if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008683 pr_err("PerfGlobCtl = 0x%016llx\n",
8684 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008685
8686 pr_err("*** Control State ***\n");
8687 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
8688 pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
8689 pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
8690 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
8691 vmcs_read32(EXCEPTION_BITMAP),
8692 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
8693 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
8694 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
8695 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
8696 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
8697 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
8698 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
8699 vmcs_read32(VM_EXIT_INTR_INFO),
8700 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
8701 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
8702 pr_err(" reason=%08x qualification=%016lx\n",
8703 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
8704 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
8705 vmcs_read32(IDT_VECTORING_INFO_FIELD),
8706 vmcs_read32(IDT_VECTORING_ERROR_CODE));
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008707 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
Haozhong Zhang8cfe9862015-10-20 15:39:12 +08008708 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008709 pr_err("TSC Multiplier = 0x%016llx\n",
8710 vmcs_read64(TSC_MULTIPLIER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008711 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
8712 pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
8713 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
8714 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
8715 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
Paolo Bonzini845c5b402015-12-03 15:51:00 +01008716 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008717 n = vmcs_read32(CR3_TARGET_COUNT);
8718 for (i = 0; i + 1 < n; i += 4)
8719 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
8720 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
8721 i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
8722 if (i < n)
8723 pr_err("CR3 target%u=%016lx\n",
8724 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
8725 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
8726 pr_err("PLE Gap=%08x Window=%08x\n",
8727 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
8728 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
8729 pr_err("Virtual processor ID = 0x%04x\n",
8730 vmcs_read16(VIRTUAL_PROCESSOR_ID));
8731}
8732
Avi Kivity6aa8b732006-12-10 02:21:36 -08008733/*
8734 * The guest has exited. See if we can fix it or if we need userspace
8735 * assistance.
8736 */
Avi Kivity851ba692009-08-24 11:10:17 +03008737static int vmx_handle_exit(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08008738{
Avi Kivity29bd8a72007-09-10 17:27:03 +03008739 struct vcpu_vmx *vmx = to_vmx(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08008740 u32 exit_reason = vmx->exit_reason;
Avi Kivity1155f762007-11-22 11:30:47 +02008741 u32 vectoring_info = vmx->idt_vectoring_info;
Avi Kivity29bd8a72007-09-10 17:27:03 +03008742
Paolo Bonzini8b89fe12015-12-10 18:37:32 +01008743 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
8744
Kai Huang843e4332015-01-28 10:54:28 +08008745 /*
8746 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
8747 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
8748 * querying dirty_bitmap, we only need to kick all vcpus out of guest
8749 * mode as if vcpus is in root mode, the PML buffer must has been
8750 * flushed already.
8751 */
8752 if (enable_pml)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02008753 vmx_flush_pml_buffer(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +08008754
Mohammed Gamal80ced182009-09-01 12:48:18 +02008755 /* If guest state is invalid, start emulating */
Gleb Natapov14168782013-01-21 15:36:49 +02008756 if (vmx->emulation_required)
Mohammed Gamal80ced182009-09-01 12:48:18 +02008757 return handle_invalid_guest_state(vcpu);
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01008758
Paolo Bonzini7313c692017-07-27 10:31:25 +02008759 if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
8760 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
Nadav Har'El644d7112011-05-25 23:12:35 +03008761
Mohammed Gamal51207022010-05-31 22:40:54 +03008762 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +02008763 dump_vmcs();
Mohammed Gamal51207022010-05-31 22:40:54 +03008764 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
8765 vcpu->run->fail_entry.hardware_entry_failure_reason
8766 = exit_reason;
8767 return 0;
8768 }
8769
Avi Kivity29bd8a72007-09-10 17:27:03 +03008770 if (unlikely(vmx->fail)) {
Avi Kivity851ba692009-08-24 11:10:17 +03008771 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
8772 vcpu->run->fail_entry.hardware_entry_failure_reason
Avi Kivity29bd8a72007-09-10 17:27:03 +03008773 = vmcs_read32(VM_INSTRUCTION_ERROR);
8774 return 0;
8775 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08008776
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08008777 /*
8778 * Note:
8779 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
8780 * delivery event since it indicates guest is accessing MMIO.
8781 * The vm-exit can be triggered again after return to guest that
8782 * will cause infinite loop.
8783 */
Mike Dayd77c26f2007-10-08 09:02:08 -04008784 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
Sheng Yang14394422008-04-28 12:24:45 +08008785 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
Jan Kiszka60637aa2008-09-26 09:30:47 +02008786 exit_reason != EXIT_REASON_EPT_VIOLATION &&
Cao, Leib244c9f2016-07-15 13:54:04 +00008787 exit_reason != EXIT_REASON_PML_FULL &&
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08008788 exit_reason != EXIT_REASON_TASK_SWITCH)) {
8789 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
8790 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
Paolo Bonzini70bcd702017-07-05 12:38:06 +02008791 vcpu->run->internal.ndata = 3;
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08008792 vcpu->run->internal.data[0] = vectoring_info;
8793 vcpu->run->internal.data[1] = exit_reason;
Paolo Bonzini70bcd702017-07-05 12:38:06 +02008794 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
8795 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
8796 vcpu->run->internal.ndata++;
8797 vcpu->run->internal.data[3] =
8798 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
8799 }
Xiao Guangrongb9bf6882012-10-17 13:46:52 +08008800 return 0;
8801 }
Jan Kiszka3b86cd92008-09-26 09:30:57 +02008802
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01008803 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01008804 vmx->loaded_vmcs->soft_vnmi_blocked)) {
8805 if (vmx_interrupt_allowed(vcpu)) {
8806 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
8807 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
8808 vcpu->arch.nmi_pending) {
8809 /*
8810 * This CPU don't support us in finding the end of an
8811 * NMI-blocked window if the guest runs with IRQs
8812 * disabled. So we pull the trigger after 1 s of
8813 * futile waiting, but inform the user about this.
8814 */
8815 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
8816 "state on VCPU %d after 1 s timeout\n",
8817 __func__, vcpu->vcpu_id);
8818 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
8819 }
8820 }
8821
Avi Kivity6aa8b732006-12-10 02:21:36 -08008822 if (exit_reason < kvm_vmx_max_exit_handlers
8823 && kvm_vmx_exit_handlers[exit_reason])
Avi Kivity851ba692009-08-24 11:10:17 +03008824 return kvm_vmx_exit_handlers[exit_reason](vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08008825 else {
Radim Krčmář6c6c5e02017-01-13 18:59:04 +01008826 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
8827 exit_reason);
Michael S. Tsirkin2bc19dc2014-09-18 16:21:16 +03008828 kvm_queue_exception(vcpu, UD_VECTOR);
8829 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08008830 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08008831}
8832
Gleb Natapov95ba8273132009-04-21 17:45:08 +03008833static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08008834{
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008835 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8836
8837 if (is_guest_mode(vcpu) &&
8838 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
8839 return;
8840
Gleb Natapov95ba8273132009-04-21 17:45:08 +03008841 if (irr == -1 || tpr < irr) {
Yang, Sheng6e5d8652007-09-12 18:03:11 +08008842 vmcs_write32(TPR_THRESHOLD, 0);
8843 return;
8844 }
8845
Gleb Natapov95ba8273132009-04-21 17:45:08 +03008846 vmcs_write32(TPR_THRESHOLD, irr);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08008847}
8848
Yang Zhang8d146952013-01-25 10:18:50 +08008849static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
8850{
8851 u32 sec_exec_control;
8852
Radim Krčmářdccbfcf2016-08-08 20:16:23 +02008853 /* Postpone execution until vmcs01 is the current VMCS. */
8854 if (is_guest_mode(vcpu)) {
8855 to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
8856 return;
8857 }
8858
Wanpeng Lif6e90f92016-09-22 07:43:25 +08008859 if (!cpu_has_vmx_virtualize_x2apic_mode())
Yang Zhang8d146952013-01-25 10:18:50 +08008860 return;
8861
Paolo Bonzini35754c92015-07-29 12:05:37 +02008862 if (!cpu_need_tpr_shadow(vcpu))
Yang Zhang8d146952013-01-25 10:18:50 +08008863 return;
8864
8865 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
8866
8867 if (set) {
8868 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
8869 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
8870 } else {
8871 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
8872 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
Jim Mattsonfb6c8192017-03-16 13:53:59 -07008873 vmx_flush_tlb_ept_only(vcpu);
Yang Zhang8d146952013-01-25 10:18:50 +08008874 }
8875 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
8876
8877 vmx_set_msr_bitmap(vcpu);
8878}
8879
Tang Chen38b99172014-09-24 15:57:54 +08008880static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
8881{
8882 struct vcpu_vmx *vmx = to_vmx(vcpu);
8883
8884 /*
8885 * Currently we do not handle the nested case where L2 has an
8886 * APIC access page of its own; that page is still pinned.
8887 * Hence, we skip the case where the VCPU is in guest mode _and_
8888 * L1 prepared an APIC access page for L2.
8889 *
8890 * For the case where L1 and L2 share the same APIC access page
8891 * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
8892 * in the vmcs12), this function will only update either the vmcs01
8893 * or the vmcs02. If the former, the vmcs02 will be updated by
8894 * prepare_vmcs02. If the latter, the vmcs01 will be updated in
8895 * the next L2->L1 exit.
8896 */
8897 if (!is_guest_mode(vcpu) ||
David Matlack4f2777b2016-07-13 17:16:37 -07008898 !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
Jim Mattsonfb6c8192017-03-16 13:53:59 -07008899 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Tang Chen38b99172014-09-24 15:57:54 +08008900 vmcs_write64(APIC_ACCESS_ADDR, hpa);
Jim Mattsonfb6c8192017-03-16 13:53:59 -07008901 vmx_flush_tlb_ept_only(vcpu);
8902 }
Tang Chen38b99172014-09-24 15:57:54 +08008903}
8904
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02008905static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
Yang Zhangc7c9c562013-01-25 10:18:51 +08008906{
8907 u16 status;
8908 u8 old;
8909
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02008910 if (max_isr == -1)
8911 max_isr = 0;
Yang Zhangc7c9c562013-01-25 10:18:51 +08008912
8913 status = vmcs_read16(GUEST_INTR_STATUS);
8914 old = status >> 8;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02008915 if (max_isr != old) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08008916 status &= 0xff;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +02008917 status |= max_isr << 8;
Yang Zhangc7c9c562013-01-25 10:18:51 +08008918 vmcs_write16(GUEST_INTR_STATUS, status);
8919 }
8920}
8921
8922static void vmx_set_rvi(int vector)
8923{
8924 u16 status;
8925 u8 old;
8926
Wei Wang4114c272014-11-05 10:53:43 +08008927 if (vector == -1)
8928 vector = 0;
8929
Yang Zhangc7c9c562013-01-25 10:18:51 +08008930 status = vmcs_read16(GUEST_INTR_STATUS);
8931 old = (u8)status & 0xff;
8932 if ((u8)vector != old) {
8933 status &= ~0xff;
8934 status |= (u8)vector;
8935 vmcs_write16(GUEST_INTR_STATUS, status);
8936 }
8937}
8938
8939static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
8940{
Wanpeng Li963fee12014-07-17 19:03:00 +08008941 if (!is_guest_mode(vcpu)) {
8942 vmx_set_rvi(max_irr);
8943 return;
8944 }
8945
Wei Wang4114c272014-11-05 10:53:43 +08008946 if (max_irr == -1)
8947 return;
8948
Wanpeng Li963fee12014-07-17 19:03:00 +08008949 /*
Wei Wang4114c272014-11-05 10:53:43 +08008950 * In guest mode. If a vmexit is needed, vmx_check_nested_events
8951 * handles it.
8952 */
8953 if (nested_exit_on_intr(vcpu))
8954 return;
8955
8956 /*
8957 * Else, fall back to pre-APICv interrupt injection since L2
Wanpeng Li963fee12014-07-17 19:03:00 +08008958 * is run without virtual interrupt delivery.
8959 */
8960 if (!kvm_event_needs_reinjection(vcpu) &&
8961 vmx_interrupt_allowed(vcpu)) {
8962 kvm_queue_interrupt(vcpu, max_irr, false);
8963 vmx_inject_irq(vcpu);
8964 }
Yang Zhangc7c9c562013-01-25 10:18:51 +08008965}
8966
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01008967static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
Paolo Bonzini810e6de2016-12-19 13:05:46 +01008968{
8969 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01008970 int max_irr;
Paolo Bonzini810e6de2016-12-19 13:05:46 +01008971
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01008972 WARN_ON(!vcpu->arch.apicv_active);
8973 if (pi_test_on(&vmx->pi_desc)) {
8974 pi_clear_on(&vmx->pi_desc);
8975 /*
8976 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
8977 * But on x86 this is just a compiler barrier anyway.
8978 */
8979 smp_mb__after_atomic();
8980 max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
8981 } else {
8982 max_irr = kvm_lapic_find_highest_irr(vcpu);
8983 }
8984 vmx_hwapic_irr_update(vcpu, max_irr);
8985 return max_irr;
Paolo Bonzini810e6de2016-12-19 13:05:46 +01008986}
8987
Andrey Smetanin63086302015-11-10 15:36:32 +03008988static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
Yang Zhangc7c9c562013-01-25 10:18:51 +08008989{
Andrey Smetanind62caab2015-11-10 15:36:33 +03008990 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhang3d81bc72013-04-11 19:25:13 +08008991 return;
8992
Yang Zhangc7c9c562013-01-25 10:18:51 +08008993 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
8994 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
8995 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
8996 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
8997}
8998
Paolo Bonzini967235d2016-12-19 14:03:45 +01008999static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
9000{
9001 struct vcpu_vmx *vmx = to_vmx(vcpu);
9002
9003 pi_clear_on(&vmx->pi_desc);
9004 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
9005}
9006
Avi Kivity51aa01d2010-07-20 14:31:20 +03009007static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
Avi Kivitycf393f72008-07-01 16:20:21 +03009008{
Jim Mattson48ae0fb2017-05-22 09:48:33 -07009009 u32 exit_intr_info = 0;
9010 u16 basic_exit_reason = (u16)vmx->exit_reason;
Avi Kivity00eba012011-03-07 17:24:54 +02009011
Jim Mattson48ae0fb2017-05-22 09:48:33 -07009012 if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
9013 || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
Avi Kivity00eba012011-03-07 17:24:54 +02009014 return;
9015
Jim Mattson48ae0fb2017-05-22 09:48:33 -07009016 if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
9017 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9018 vmx->exit_intr_info = exit_intr_info;
Andi Kleena0861c02009-06-08 17:37:09 +08009019
Wanpeng Li1261bfa2017-07-13 18:30:40 -07009020 /* if exit due to PF check for async PF */
9021 if (is_page_fault(exit_intr_info))
9022 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
9023
Andi Kleena0861c02009-06-08 17:37:09 +08009024 /* Handle machine checks before interrupts are enabled */
Jim Mattson48ae0fb2017-05-22 09:48:33 -07009025 if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
9026 is_machine_check(exit_intr_info))
Andi Kleena0861c02009-06-08 17:37:09 +08009027 kvm_machine_check();
9028
Gleb Natapov20f65982009-05-11 13:35:55 +03009029 /* We need to handle NMIs before interrupts are enabled */
Jim Mattsonef85b672016-12-12 11:01:37 -08009030 if (is_nmi(exit_intr_info)) {
Zhang, Yanminff9d07a2010-04-19 13:32:45 +08009031 kvm_before_handle_nmi(&vmx->vcpu);
Gleb Natapov20f65982009-05-11 13:35:55 +03009032 asm("int $2");
Zhang, Yanminff9d07a2010-04-19 13:32:45 +08009033 kvm_after_handle_nmi(&vmx->vcpu);
9034 }
Avi Kivity51aa01d2010-07-20 14:31:20 +03009035}
Gleb Natapov20f65982009-05-11 13:35:55 +03009036
Yang Zhanga547c6d2013-04-11 19:25:10 +08009037static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
9038{
9039 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9040
Yang Zhanga547c6d2013-04-11 19:25:10 +08009041 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
9042 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
9043 unsigned int vector;
9044 unsigned long entry;
9045 gate_desc *desc;
9046 struct vcpu_vmx *vmx = to_vmx(vcpu);
9047#ifdef CONFIG_X86_64
9048 unsigned long tmp;
9049#endif
9050
9051 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
9052 desc = (gate_desc *)vmx->host_idt_base + vector;
Thomas Gleixner64b163f2017-08-28 08:47:37 +02009053 entry = gate_offset(desc);
Yang Zhanga547c6d2013-04-11 19:25:10 +08009054 asm volatile(
9055#ifdef CONFIG_X86_64
9056 "mov %%" _ASM_SP ", %[sp]\n\t"
9057 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
9058 "push $%c[ss]\n\t"
9059 "push %[sp]\n\t"
9060#endif
9061 "pushf\n\t"
Yang Zhanga547c6d2013-04-11 19:25:10 +08009062 __ASM_SIZE(push) " $%c[cs]\n\t"
9063 "call *%[entry]\n\t"
9064 :
9065#ifdef CONFIG_X86_64
Chris J Arges3f62de52016-01-22 15:44:38 -06009066 [sp]"=&r"(tmp),
Yang Zhanga547c6d2013-04-11 19:25:10 +08009067#endif
Josh Poimboeuff5caf622017-09-20 16:24:33 -05009068 ASM_CALL_CONSTRAINT
Yang Zhanga547c6d2013-04-11 19:25:10 +08009069 :
9070 [entry]"r"(entry),
9071 [ss]"i"(__KERNEL_DS),
9072 [cs]"i"(__KERNEL_CS)
9073 );
Paolo Bonzinif2485b32016-06-15 15:23:11 +02009074 }
Yang Zhanga547c6d2013-04-11 19:25:10 +08009075}
Josh Poimboeufc207aee2017-06-28 10:11:06 -05009076STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
Yang Zhanga547c6d2013-04-11 19:25:10 +08009077
Paolo Bonzini6d396b52015-04-01 14:25:33 +02009078static bool vmx_has_high_real_mode_segbase(void)
9079{
9080 return enable_unrestricted_guest || emulate_invalid_guest_state;
9081}
9082
Liu, Jinsongda8999d2014-02-24 10:55:46 +00009083static bool vmx_mpx_supported(void)
9084{
9085 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
9086 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
9087}
9088
Wanpeng Li55412b22014-12-02 19:21:30 +08009089static bool vmx_xsaves_supported(void)
9090{
9091 return vmcs_config.cpu_based_2nd_exec_ctrl &
9092 SECONDARY_EXEC_XSAVES;
9093}
9094
Paolo Bonzini66336ca2016-07-12 10:36:41 +02009095static bool vmx_umip_emulated(void)
9096{
Paolo Bonzini0367f202016-07-12 10:44:55 +02009097 return vmcs_config.cpu_based_2nd_exec_ctrl &
9098 SECONDARY_EXEC_DESC;
Paolo Bonzini66336ca2016-07-12 10:36:41 +02009099}
9100
Avi Kivity51aa01d2010-07-20 14:31:20 +03009101static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
9102{
Avi Kivityc5ca8e52011-03-07 17:37:37 +02009103 u32 exit_intr_info;
Avi Kivity51aa01d2010-07-20 14:31:20 +03009104 bool unblock_nmi;
9105 u8 vector;
9106 bool idtv_info_valid;
9107
9108 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Gleb Natapov20f65982009-05-11 13:35:55 +03009109
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009110 if (enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01009111 if (vmx->loaded_vmcs->nmi_known_unmasked)
9112 return;
9113 /*
9114 * Can't use vmx->exit_intr_info since we're not sure what
9115 * the exit reason is.
9116 */
9117 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
9118 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
9119 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
9120 /*
9121 * SDM 3: 27.7.1.2 (September 2008)
9122 * Re-set bit "block by NMI" before VM entry if vmexit caused by
9123 * a guest IRET fault.
9124 * SDM 3: 23.2.2 (September 2008)
9125 * Bit 12 is undefined in any of the following cases:
9126 * If the VM exit sets the valid bit in the IDT-vectoring
9127 * information field.
9128 * If the VM exit is due to a double fault.
9129 */
9130 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
9131 vector != DF_VECTOR && !idtv_info_valid)
9132 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
9133 GUEST_INTR_STATE_NMI);
9134 else
9135 vmx->loaded_vmcs->nmi_known_unmasked =
9136 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
9137 & GUEST_INTR_STATE_NMI);
9138 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
9139 vmx->loaded_vmcs->vnmi_blocked_time +=
9140 ktime_to_ns(ktime_sub(ktime_get(),
9141 vmx->loaded_vmcs->entry_time));
Avi Kivity51aa01d2010-07-20 14:31:20 +03009142}
9143
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009144static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
Avi Kivity83422e12010-07-20 14:43:23 +03009145 u32 idt_vectoring_info,
9146 int instr_len_field,
9147 int error_code_field)
Avi Kivity51aa01d2010-07-20 14:31:20 +03009148{
Avi Kivity51aa01d2010-07-20 14:31:20 +03009149 u8 vector;
9150 int type;
9151 bool idtv_info_valid;
9152
9153 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Avi Kivity668f6122008-07-02 09:28:55 +03009154
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009155 vcpu->arch.nmi_injected = false;
9156 kvm_clear_exception_queue(vcpu);
9157 kvm_clear_interrupt_queue(vcpu);
Gleb Natapov37b96e92009-03-30 16:03:13 +03009158
9159 if (!idtv_info_valid)
9160 return;
9161
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009162 kvm_make_request(KVM_REQ_EVENT, vcpu);
Avi Kivity3842d132010-07-27 12:30:24 +03009163
Avi Kivity668f6122008-07-02 09:28:55 +03009164 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
9165 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
Gleb Natapov37b96e92009-03-30 16:03:13 +03009166
Gleb Natapov64a7ec02009-03-30 16:03:29 +03009167 switch (type) {
Gleb Natapov37b96e92009-03-30 16:03:13 +03009168 case INTR_TYPE_NMI_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009169 vcpu->arch.nmi_injected = true;
Avi Kivity668f6122008-07-02 09:28:55 +03009170 /*
Gleb Natapov7b4a25c2009-03-30 16:03:08 +03009171 * SDM 3: 27.7.1.2 (September 2008)
Gleb Natapov37b96e92009-03-30 16:03:13 +03009172 * Clear bit "block by NMI" before VM entry if a NMI
9173 * delivery faulted.
Avi Kivity668f6122008-07-02 09:28:55 +03009174 */
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009175 vmx_set_nmi_mask(vcpu, false);
Gleb Natapov37b96e92009-03-30 16:03:13 +03009176 break;
Gleb Natapov37b96e92009-03-30 16:03:13 +03009177 case INTR_TYPE_SOFT_EXCEPTION:
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009178 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03009179 /* fall through */
9180 case INTR_TYPE_HARD_EXCEPTION:
Avi Kivity35920a32008-07-03 14:50:12 +03009181 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
Avi Kivity83422e12010-07-20 14:43:23 +03009182 u32 err = vmcs_read32(error_code_field);
Gleb Natapov851eb6672013-09-25 12:51:34 +03009183 kvm_requeue_exception_e(vcpu, vector, err);
Avi Kivity35920a32008-07-03 14:50:12 +03009184 } else
Gleb Natapov851eb6672013-09-25 12:51:34 +03009185 kvm_requeue_exception(vcpu, vector);
Gleb Natapov37b96e92009-03-30 16:03:13 +03009186 break;
Gleb Natapov66fd3f72009-05-11 13:35:50 +03009187 case INTR_TYPE_SOFT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009188 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03009189 /* fall through */
Gleb Natapov37b96e92009-03-30 16:03:13 +03009190 case INTR_TYPE_EXT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009191 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
Gleb Natapov37b96e92009-03-30 16:03:13 +03009192 break;
9193 default:
9194 break;
Avi Kivityf7d92382008-07-03 16:14:28 +03009195 }
Avi Kivitycf393f72008-07-01 16:20:21 +03009196}
9197
Avi Kivity83422e12010-07-20 14:43:23 +03009198static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
9199{
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009200 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
Avi Kivity83422e12010-07-20 14:43:23 +03009201 VM_EXIT_INSTRUCTION_LEN,
9202 IDT_VECTORING_ERROR_CODE);
9203}
9204
Avi Kivityb463a6f2010-07-20 15:06:17 +03009205static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
9206{
Jan Kiszka3ab66e82013-02-20 14:03:24 +01009207 __vmx_complete_interrupts(vcpu,
Avi Kivityb463a6f2010-07-20 15:06:17 +03009208 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
9209 VM_ENTRY_INSTRUCTION_LEN,
9210 VM_ENTRY_EXCEPTION_ERROR_CODE);
9211
9212 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
9213}
9214
Gleb Natapovd7cd9792011-10-05 14:01:23 +02009215static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
9216{
9217 int i, nr_msrs;
9218 struct perf_guest_switch_msr *msrs;
9219
9220 msrs = perf_guest_get_msrs(&nr_msrs);
9221
9222 if (!msrs)
9223 return;
9224
9225 for (i = 0; i < nr_msrs; i++)
9226 if (msrs[i].host == msrs[i].guest)
9227 clear_atomic_switch_msr(vmx, msrs[i].msr);
9228 else
9229 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
9230 msrs[i].host);
9231}
9232
Jiang Biao33365e72016-11-03 15:03:37 +08009233static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
Yunhong Jiang64672c92016-06-13 14:19:59 -07009234{
9235 struct vcpu_vmx *vmx = to_vmx(vcpu);
9236 u64 tscl;
9237 u32 delta_tsc;
9238
9239 if (vmx->hv_deadline_tsc == -1)
9240 return;
9241
9242 tscl = rdtsc();
9243 if (vmx->hv_deadline_tsc > tscl)
9244 /* sure to be 32 bit only because checked on set_hv_timer */
9245 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
9246 cpu_preemption_timer_multi);
9247 else
9248 delta_tsc = 0;
9249
9250 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
9251}
9252
Lai Jiangshana3b5ba42011-02-11 14:29:40 +08009253static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08009254{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04009255 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Li74c55932017-11-29 01:31:20 -08009256 unsigned long cr3, cr4;
Avi Kivity104f2262010-11-18 13:12:52 +02009257
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01009258 /* Record the guest's net vcpu time for enforced NMI injections. */
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009259 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01009260 vmx->loaded_vmcs->soft_vnmi_blocked))
9261 vmx->loaded_vmcs->entry_time = ktime_get();
9262
Avi Kivity104f2262010-11-18 13:12:52 +02009263 /* Don't enter VMX if guest state is invalid, let the exit handler
9264 start emulation until we arrive back to a valid state */
Gleb Natapov14168782013-01-21 15:36:49 +02009265 if (vmx->emulation_required)
Avi Kivity104f2262010-11-18 13:12:52 +02009266 return;
9267
Radim Krčmářa7653ec2014-08-21 18:08:07 +02009268 if (vmx->ple_window_dirty) {
9269 vmx->ple_window_dirty = false;
9270 vmcs_write32(PLE_WINDOW, vmx->ple_window);
9271 }
9272
Abel Gordon012f83c2013-04-18 14:39:25 +03009273 if (vmx->nested.sync_shadow_vmcs) {
9274 copy_vmcs12_to_shadow(vmx);
9275 vmx->nested.sync_shadow_vmcs = false;
9276 }
9277
Avi Kivity104f2262010-11-18 13:12:52 +02009278 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
9279 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
9280 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
9281 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
9282
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07009283 cr3 = __get_current_cr3_fast();
Ladi Prosek44889942017-09-22 07:53:15 +02009284 if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07009285 vmcs_writel(HOST_CR3, cr3);
Ladi Prosek44889942017-09-22 07:53:15 +02009286 vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07009287 }
9288
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07009289 cr4 = cr4_read_shadow();
Ladi Prosek44889942017-09-22 07:53:15 +02009290 if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
Andy Lutomirskid974baa2014-10-08 09:02:13 -07009291 vmcs_writel(HOST_CR4, cr4);
Ladi Prosek44889942017-09-22 07:53:15 +02009292 vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -07009293 }
9294
Avi Kivity104f2262010-11-18 13:12:52 +02009295 /* When single-stepping over STI and MOV SS, we must clear the
9296 * corresponding interruptibility bits in the guest state. Otherwise
9297 * vmentry fails as it then expects bit 14 (BS) in pending debug
9298 * exceptions being set, but that's not correct for the guest debugging
9299 * case. */
9300 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9301 vmx_set_interrupt_shadow(vcpu, 0);
9302
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +02009303 if (static_cpu_has(X86_FEATURE_PKU) &&
9304 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
9305 vcpu->arch.pkru != vmx->host_pkru)
9306 __write_pkru(vcpu->arch.pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08009307
Gleb Natapovd7cd9792011-10-05 14:01:23 +02009308 atomic_switch_perf_msrs(vmx);
9309
Yunhong Jiang64672c92016-06-13 14:19:59 -07009310 vmx_arm_hv_timer(vcpu);
9311
Nadav Har'Eld462b812011-05-24 15:26:10 +03009312 vmx->__launched = vmx->loaded_vmcs->launched;
Avi Kivity104f2262010-11-18 13:12:52 +02009313 asm(
Avi Kivity6aa8b732006-12-10 02:21:36 -08009314 /* Store host registers */
Avi Kivityb188c81f2012-09-16 15:10:58 +03009315 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
9316 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
9317 "push %%" _ASM_CX " \n\t"
9318 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +03009319 "je 1f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +03009320 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Avi Kivity4ecac3f2008-05-13 13:23:38 +03009321 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
Avi Kivity313dbd492008-07-17 18:04:30 +03009322 "1: \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +03009323 /* Reload cr2 if changed */
Avi Kivityb188c81f2012-09-16 15:10:58 +03009324 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
9325 "mov %%cr2, %%" _ASM_DX " \n\t"
9326 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +03009327 "je 2f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +03009328 "mov %%" _ASM_AX", %%cr2 \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +03009329 "2: \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -08009330 /* Check if vmlaunch of vmresume is needed */
Avi Kivitye08aa782007-11-15 18:06:18 +02009331 "cmpl $0, %c[launched](%0) \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -08009332 /* Load guest registers. Don't clobber flags. */
Avi Kivityb188c81f2012-09-16 15:10:58 +03009333 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
9334 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
9335 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
9336 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
9337 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
9338 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -08009339#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +02009340 "mov %c[r8](%0), %%r8 \n\t"
9341 "mov %c[r9](%0), %%r9 \n\t"
9342 "mov %c[r10](%0), %%r10 \n\t"
9343 "mov %c[r11](%0), %%r11 \n\t"
9344 "mov %c[r12](%0), %%r12 \n\t"
9345 "mov %c[r13](%0), %%r13 \n\t"
9346 "mov %c[r14](%0), %%r14 \n\t"
9347 "mov %c[r15](%0), %%r15 \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -08009348#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +03009349 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
Avi Kivityc8019492008-07-14 14:44:59 +03009350
Avi Kivity6aa8b732006-12-10 02:21:36 -08009351 /* Enter guest mode */
Avi Kivity83287ea422012-09-16 15:10:57 +03009352 "jne 1f \n\t"
Avi Kivity4ecac3f2008-05-13 13:23:38 +03009353 __ex(ASM_VMX_VMLAUNCH) "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +03009354 "jmp 2f \n\t"
9355 "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
9356 "2: "
Avi Kivity6aa8b732006-12-10 02:21:36 -08009357 /* Save guest registers, load host registers, keep flags */
Avi Kivityb188c81f2012-09-16 15:10:58 +03009358 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
Avi Kivity40712fa2011-01-06 18:09:12 +02009359 "pop %0 \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +03009360 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
9361 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
9362 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
9363 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
9364 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
9365 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
9366 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -08009367#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +02009368 "mov %%r8, %c[r8](%0) \n\t"
9369 "mov %%r9, %c[r9](%0) \n\t"
9370 "mov %%r10, %c[r10](%0) \n\t"
9371 "mov %%r11, %c[r11](%0) \n\t"
9372 "mov %%r12, %c[r12](%0) \n\t"
9373 "mov %%r13, %c[r13](%0) \n\t"
9374 "mov %%r14, %c[r14](%0) \n\t"
9375 "mov %%r15, %c[r15](%0) \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -08009376#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +03009377 "mov %%cr2, %%" _ASM_AX " \n\t"
9378 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
Avi Kivityc8019492008-07-14 14:44:59 +03009379
Avi Kivityb188c81f2012-09-16 15:10:58 +03009380 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
Avi Kivitye08aa782007-11-15 18:06:18 +02009381 "setbe %c[fail](%0) \n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +03009382 ".pushsection .rodata \n\t"
9383 ".global vmx_return \n\t"
9384 "vmx_return: " _ASM_PTR " 2b \n\t"
9385 ".popsection"
Avi Kivitye08aa782007-11-15 18:06:18 +02009386 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
Nadav Har'Eld462b812011-05-24 15:26:10 +03009387 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
Avi Kivitye08aa782007-11-15 18:06:18 +02009388 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
Avi Kivity313dbd492008-07-17 18:04:30 +03009389 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
Zhang Xiantaoad312c72007-12-13 23:50:52 +08009390 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
9391 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
9392 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
9393 [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
9394 [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
9395 [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
9396 [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
Avi Kivity05b3e0c2006-12-13 00:33:45 -08009397#ifdef CONFIG_X86_64
Zhang Xiantaoad312c72007-12-13 23:50:52 +08009398 [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
9399 [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
9400 [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
9401 [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
9402 [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
9403 [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
9404 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
9405 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
Avi Kivity6aa8b732006-12-10 02:21:36 -08009406#endif
Avi Kivity40712fa2011-01-06 18:09:12 +02009407 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
9408 [wordsize]"i"(sizeof(ulong))
Laurent Vivierc2036302007-10-25 14:18:52 +02009409 : "cc", "memory"
9410#ifdef CONFIG_X86_64
Avi Kivityb188c81f2012-09-16 15:10:58 +03009411 , "rax", "rbx", "rdi", "rsi"
Laurent Vivierc2036302007-10-25 14:18:52 +02009412 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
Avi Kivityb188c81f2012-09-16 15:10:58 +03009413#else
9414 , "eax", "ebx", "edi", "esi"
Laurent Vivierc2036302007-10-25 14:18:52 +02009415#endif
9416 );
Avi Kivity6aa8b732006-12-10 02:21:36 -08009417
Gleb Natapov2a7921b2012-08-12 16:12:29 +03009418 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
Wanpeng Li74c55932017-11-29 01:31:20 -08009419 if (vmx->host_debugctlmsr)
9420 update_debugctlmsr(vmx->host_debugctlmsr);
Gleb Natapov2a7921b2012-08-12 16:12:29 +03009421
Avi Kivityaa67f602012-08-01 16:48:03 +03009422#ifndef CONFIG_X86_64
9423 /*
9424 * The sysexit path does not restore ds/es, so we must set them to
9425 * a reasonable value ourselves.
9426 *
9427 * We can't defer this to vmx_load_host_state() since that function
9428 * may be executed in interrupt context, which saves and restore segments
9429 * around it, nullifying its effect.
9430 */
9431 loadsegment(ds, __USER_DS);
9432 loadsegment(es, __USER_DS);
9433#endif
9434
Avi Kivity6de4f3a2009-05-31 22:58:47 +03009435 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
Avi Kivity6de12732011-03-07 12:51:22 +02009436 | (1 << VCPU_EXREG_RFLAGS)
Avi Kivityaff48ba2010-12-05 18:56:11 +02009437 | (1 << VCPU_EXREG_PDPTR)
Avi Kivity2fb92db2011-04-27 19:42:18 +03009438 | (1 << VCPU_EXREG_SEGMENTS)
Avi Kivityaff48ba2010-12-05 18:56:11 +02009439 | (1 << VCPU_EXREG_CR3));
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03009440 vcpu->arch.regs_dirty = 0;
9441
Gleb Natapove0b890d2013-09-25 12:51:33 +03009442 /*
Xiao Guangrong1be0e612016-03-22 16:51:18 +08009443 * eager fpu is enabled if PKEY is supported and CR4 is switched
9444 * back on host, so it is safe to read guest PKRU from current
9445 * XSAVE.
9446 */
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +02009447 if (static_cpu_has(X86_FEATURE_PKU) &&
9448 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
9449 vcpu->arch.pkru = __read_pkru();
9450 if (vcpu->arch.pkru != vmx->host_pkru)
Xiao Guangrong1be0e612016-03-22 16:51:18 +08009451 __write_pkru(vmx->host_pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08009452 }
9453
9454 /*
Gleb Natapove0b890d2013-09-25 12:51:33 +03009455 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
9456 * we did not inject a still-pending event to L1 now because of
9457 * nested_run_pending, we need to re-enable this bit.
9458 */
9459 if (vmx->nested.nested_run_pending)
9460 kvm_make_request(KVM_REQ_EVENT, vcpu);
9461
9462 vmx->nested.nested_run_pending = 0;
Jim Mattsonb060ca32017-09-14 16:31:42 -07009463 vmx->idt_vectoring_info = 0;
9464
9465 vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
9466 if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
9467 return;
9468
9469 vmx->loaded_vmcs->launched = 1;
9470 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
Gleb Natapove0b890d2013-09-25 12:51:33 +03009471
Avi Kivity51aa01d2010-07-20 14:31:20 +03009472 vmx_complete_atomic_exit(vmx);
9473 vmx_recover_nmi_blocking(vmx);
Avi Kivitycf393f72008-07-01 16:20:21 +03009474 vmx_complete_interrupts(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009475}
Josh Poimboeufc207aee2017-06-28 10:11:06 -05009476STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009477
David Hildenbrand1279a6b12017-03-20 10:00:08 +01009478static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009479{
9480 struct vcpu_vmx *vmx = to_vmx(vcpu);
9481 int cpu;
9482
David Hildenbrand1279a6b12017-03-20 10:00:08 +01009483 if (vmx->loaded_vmcs == vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009484 return;
9485
9486 cpu = get_cpu();
David Hildenbrand1279a6b12017-03-20 10:00:08 +01009487 vmx->loaded_vmcs = vmcs;
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009488 vmx_vcpu_put(vcpu);
9489 vmx_vcpu_load(vcpu, cpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009490 put_cpu();
9491}
9492
Jim Mattson2f1fe812016-07-08 15:36:06 -07009493/*
9494 * Ensure that the current vmcs of the logical processor is the
9495 * vmcs01 of the vcpu before calling free_nested().
9496 */
9497static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
9498{
9499 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -07009500
Christoffer Dallec7660c2017-12-04 21:35:23 +01009501 vcpu_load(vcpu);
David Hildenbrand1279a6b12017-03-20 10:00:08 +01009502 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson2f1fe812016-07-08 15:36:06 -07009503 free_nested(vmx);
9504 vcpu_put(vcpu);
9505}
9506
Avi Kivity6aa8b732006-12-10 02:21:36 -08009507static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
9508{
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009509 struct vcpu_vmx *vmx = to_vmx(vcpu);
9510
Kai Huang843e4332015-01-28 10:54:28 +08009511 if (enable_pml)
Kai Huanga3eaa862015-11-04 13:46:05 +08009512 vmx_destroy_pml_buffer(vmx);
Wanpeng Li991e7a02015-09-16 17:30:05 +08009513 free_vpid(vmx->vpid);
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009514 leave_guest_mode(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -07009515 vmx_free_vcpu_nested(vcpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +02009516 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009517 kfree(vmx->guest_msrs);
9518 kvm_vcpu_uninit(vcpu);
Rusty Russella4770342007-08-01 14:46:11 +10009519 kmem_cache_free(kvm_vcpu_cache, vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009520}
9521
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009522static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
Avi Kivity6aa8b732006-12-10 02:21:36 -08009523{
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009524 int err;
Rusty Russellc16f8622007-07-30 21:12:19 +10009525 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Avi Kivity15ad7142007-07-11 18:17:21 +03009526 int cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08009527
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04009528 if (!vmx)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009529 return ERR_PTR(-ENOMEM);
9530
Wanpeng Li991e7a02015-09-16 17:30:05 +08009531 vmx->vpid = allocate_vpid();
Sheng Yang2384d2b2008-01-17 15:14:33 +08009532
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009533 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
9534 if (err)
9535 goto free_vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -08009536
Peter Feiner4e595162016-07-07 14:49:58 -07009537 err = -ENOMEM;
9538
9539 /*
9540 * If PML is turned on, failure on enabling PML just results in failure
9541 * of creating the vcpu, therefore we can simplify PML logic (by
9542 * avoiding dealing with cases, such as enabling PML partially on vcpus
9543 * for the guest, etc.
9544 */
9545 if (enable_pml) {
9546 vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
9547 if (!vmx->pml_pg)
9548 goto uninit_vcpu;
9549 }
9550
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04009551 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
Paolo Bonzini03916db2014-07-24 14:21:57 +02009552 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
9553 > PAGE_SIZE);
Nadav Amit0123be42014-07-24 15:06:56 +03009554
Peter Feiner4e595162016-07-07 14:49:58 -07009555 if (!vmx->guest_msrs)
9556 goto free_pml;
Ingo Molnar965b58a2007-01-05 16:36:23 -08009557
Nadav Har'Eld462b812011-05-24 15:26:10 +03009558 vmx->loaded_vmcs = &vmx->vmcs01;
9559 vmx->loaded_vmcs->vmcs = alloc_vmcs();
Jim Mattson355f4fb2016-10-28 08:29:39 -07009560 vmx->loaded_vmcs->shadow_vmcs = NULL;
Nadav Har'Eld462b812011-05-24 15:26:10 +03009561 if (!vmx->loaded_vmcs->vmcs)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009562 goto free_msrs;
Nadav Har'Eld462b812011-05-24 15:26:10 +03009563 loaded_vmcs_init(vmx->loaded_vmcs);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04009564
Avi Kivity15ad7142007-07-11 18:17:21 +03009565 cpu = get_cpu();
9566 vmx_vcpu_load(&vmx->vcpu, cpu);
Zachary Amsdene48672f2010-08-19 22:07:23 -10009567 vmx->vcpu.cpu = cpu;
David Hildenbrand12d79912017-08-24 20:51:26 +02009568 vmx_vcpu_setup(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009569 vmx_vcpu_put(&vmx->vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +03009570 put_cpu();
Paolo Bonzini35754c92015-07-29 12:05:37 +02009571 if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
Jan Kiszkabe6d05c2011-04-13 01:27:55 +02009572 err = alloc_apic_access_page(kvm);
9573 if (err)
Marcelo Tosatti5e4a0b32008-02-14 21:21:43 -02009574 goto free_vmcs;
Jan Kiszkaa63cb562013-04-08 11:07:46 +02009575 }
Ingo Molnar965b58a2007-01-05 16:36:23 -08009576
Sheng Yangb927a3c2009-07-21 10:42:48 +08009577 if (enable_ept) {
Tang Chenf51770e2014-09-16 18:41:59 +08009578 err = init_rmode_identity_map(kvm);
9579 if (err)
Gleb Natapov93ea5382011-02-21 12:07:59 +02009580 goto free_vmcs;
Sheng Yangb927a3c2009-07-21 10:42:48 +08009581 }
Sheng Yangb7ebfb02008-04-25 21:44:52 +08009582
Wanpeng Li5c614b32015-10-13 09:18:36 -07009583 if (nested) {
Wincy Vanb9c237b2015-02-03 23:56:30 +08009584 nested_vmx_setup_ctls_msrs(vmx);
Wanpeng Li5c614b32015-10-13 09:18:36 -07009585 vmx->nested.vpid02 = allocate_vpid();
9586 }
Wincy Vanb9c237b2015-02-03 23:56:30 +08009587
Wincy Van705699a2015-02-03 23:58:17 +08009588 vmx->nested.posted_intr_nv = -1;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03009589 vmx->nested.current_vmptr = -1ull;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03009590
Haozhong Zhang37e4c992016-06-22 14:59:55 +08009591 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
9592
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02009593 /*
9594 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
9595 * or POSTED_INTR_WAKEUP_VECTOR.
9596 */
9597 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
9598 vmx->pi_desc.sn = 1;
9599
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009600 return &vmx->vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -08009601
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009602free_vmcs:
Wanpeng Li5c614b32015-10-13 09:18:36 -07009603 free_vpid(vmx->nested.vpid02);
Xiao Guangrong5f3fbc32012-05-14 14:58:58 +08009604 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009605free_msrs:
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009606 kfree(vmx->guest_msrs);
Peter Feiner4e595162016-07-07 14:49:58 -07009607free_pml:
9608 vmx_destroy_pml_buffer(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009609uninit_vcpu:
9610 kvm_vcpu_uninit(&vmx->vcpu);
9611free_vcpu:
Wanpeng Li991e7a02015-09-16 17:30:05 +08009612 free_vpid(vmx->vpid);
Rusty Russella4770342007-08-01 14:46:11 +10009613 kmem_cache_free(kvm_vcpu_cache, vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10009614 return ERR_PTR(err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009615}
9616
Yang, Sheng002c7f72007-07-31 14:23:01 +03009617static void __init vmx_check_processor_compat(void *rtn)
9618{
9619 struct vmcs_config vmcs_conf;
9620
9621 *(int *)rtn = 0;
9622 if (setup_vmcs_config(&vmcs_conf) < 0)
9623 *(int *)rtn = -EIO;
9624 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
9625 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
9626 smp_processor_id());
9627 *(int *)rtn = -EIO;
9628 }
9629}
9630
Sheng Yang4b12f0d2009-04-27 20:35:42 +08009631static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
Sheng Yang64d4d522008-10-09 16:01:57 +08009632{
Xiao Guangrongb18d5432015-06-15 16:55:21 +08009633 u8 cache;
9634 u64 ipat = 0;
Sheng Yang4b12f0d2009-04-27 20:35:42 +08009635
Sheng Yang522c68c2009-04-27 20:35:43 +08009636 /* For VT-d and EPT combination
Paolo Bonzini606decd2015-10-01 13:12:47 +02009637 * 1. MMIO: always map as UC
Sheng Yang522c68c2009-04-27 20:35:43 +08009638 * 2. EPT with VT-d:
9639 * a. VT-d without snooping control feature: can't guarantee the
Paolo Bonzini606decd2015-10-01 13:12:47 +02009640 * result, try to trust guest.
Sheng Yang522c68c2009-04-27 20:35:43 +08009641 * b. VT-d with snooping control feature: snooping control feature of
9642 * VT-d engine can guarantee the cache correctness. Just set it
9643 * to WB to keep consistent with host. So the same as item 3.
Sheng Yanga19a6d12010-02-09 16:41:53 +08009644 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
Sheng Yang522c68c2009-04-27 20:35:43 +08009645 * consistent with host MTRR
9646 */
Paolo Bonzini606decd2015-10-01 13:12:47 +02009647 if (is_mmio) {
9648 cache = MTRR_TYPE_UNCACHABLE;
9649 goto exit;
9650 }
9651
9652 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
Xiao Guangrongb18d5432015-06-15 16:55:21 +08009653 ipat = VMX_EPT_IPAT_BIT;
9654 cache = MTRR_TYPE_WRBACK;
9655 goto exit;
9656 }
9657
9658 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
9659 ipat = VMX_EPT_IPAT_BIT;
Paolo Bonzini0da029e2015-07-23 08:24:42 +02009660 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
Xiao Guangrongfb2799502015-07-16 03:25:56 +08009661 cache = MTRR_TYPE_WRBACK;
9662 else
9663 cache = MTRR_TYPE_UNCACHABLE;
Xiao Guangrongb18d5432015-06-15 16:55:21 +08009664 goto exit;
9665 }
9666
Xiao Guangrongff536042015-06-15 16:55:22 +08009667 cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
Xiao Guangrongb18d5432015-06-15 16:55:21 +08009668
9669exit:
9670 return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
Sheng Yang64d4d522008-10-09 16:01:57 +08009671}
9672
Sheng Yang17cc3932010-01-05 19:02:27 +08009673static int vmx_get_lpage_level(void)
Joerg Roedel344f4142009-07-27 16:30:48 +02009674{
Sheng Yang878403b2010-01-05 19:02:29 +08009675 if (enable_ept && !cpu_has_vmx_ept_1g_page())
9676 return PT_DIRECTORY_LEVEL;
9677 else
9678 /* For shadow and EPT supported 1GB page */
9679 return PT_PDPE_LEVEL;
Joerg Roedel344f4142009-07-27 16:30:48 +02009680}
9681
Xiao Guangrongfeda8052015-09-09 14:05:55 +08009682static void vmcs_set_secondary_exec_control(u32 new_ctl)
9683{
9684 /*
9685 * These bits in the secondary execution controls field
9686 * are dynamic, the others are mostly based on the hypervisor
9687 * architecture and the guest's CPUID. Do not touch the
9688 * dynamic bits.
9689 */
9690 u32 mask =
9691 SECONDARY_EXEC_SHADOW_VMCS |
9692 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini0367f202016-07-12 10:44:55 +02009693 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
9694 SECONDARY_EXEC_DESC;
Xiao Guangrongfeda8052015-09-09 14:05:55 +08009695
9696 u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
9697
9698 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
9699 (new_ctl & ~mask) | (cur_ctl & mask));
9700}
9701
David Matlack8322ebb2016-11-29 18:14:09 -08009702/*
9703 * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
9704 * (indicating "allowed-1") if they are supported in the guest's CPUID.
9705 */
9706static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
9707{
9708 struct vcpu_vmx *vmx = to_vmx(vcpu);
9709 struct kvm_cpuid_entry2 *entry;
9710
9711 vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
9712 vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
9713
9714#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
9715 if (entry && (entry->_reg & (_cpuid_mask))) \
9716 vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \
9717} while (0)
9718
9719 entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
9720 cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
9721 cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
9722 cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
9723 cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
9724 cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
9725 cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
9726 cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
9727 cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
9728 cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
9729 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
9730 cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
9731 cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
9732 cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
9733 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
9734
9735 entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
9736 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
9737 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
9738 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
9739 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
Paolo Bonzinic4ad77e2017-11-13 14:23:59 +01009740 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
David Matlack8322ebb2016-11-29 18:14:09 -08009741
9742#undef cr4_fixed1_update
9743}
9744
Sheng Yang0e851882009-12-18 16:48:46 +08009745static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
9746{
Sheng Yang4e47c7a2009-12-18 16:48:47 +08009747 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08009748
Paolo Bonzini80154d72017-08-24 13:55:35 +02009749 if (cpu_has_secondary_exec_ctrls()) {
9750 vmx_compute_secondary_exec_control(vmx);
9751 vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08009752 }
Mao, Junjiead756a12012-07-02 01:18:48 +00009753
Haozhong Zhang37e4c992016-06-22 14:59:55 +08009754 if (nested_vmx_allowed(vcpu))
9755 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
9756 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
9757 else
9758 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
9759 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
David Matlack8322ebb2016-11-29 18:14:09 -08009760
9761 if (nested_vmx_allowed(vcpu))
9762 nested_vmx_cr_fixed1_bits_update(vcpu);
Sheng Yang0e851882009-12-18 16:48:46 +08009763}
9764
Joerg Roedeld4330ef2010-04-22 12:33:11 +02009765static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
9766{
Nadav Har'El7b8050f2011-05-25 23:16:10 +03009767 if (func == 1 && nested)
9768 entry->ecx |= bit(X86_FEATURE_VMX);
Joerg Roedeld4330ef2010-04-22 12:33:11 +02009769}
9770
Yang Zhang25d92082013-08-06 12:00:32 +03009771static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
9772 struct x86_exception *fault)
9773{
Jan Kiszka533558b2014-01-04 18:47:20 +01009774 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Bandan Dasc5f983f2017-05-05 15:25:14 -04009775 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka533558b2014-01-04 18:47:20 +01009776 u32 exit_reason;
Bandan Dasc5f983f2017-05-05 15:25:14 -04009777 unsigned long exit_qualification = vcpu->arch.exit_qualification;
Yang Zhang25d92082013-08-06 12:00:32 +03009778
Bandan Dasc5f983f2017-05-05 15:25:14 -04009779 if (vmx->nested.pml_full) {
9780 exit_reason = EXIT_REASON_PML_FULL;
9781 vmx->nested.pml_full = false;
9782 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
9783 } else if (fault->error_code & PFERR_RSVD_MASK)
Jan Kiszka533558b2014-01-04 18:47:20 +01009784 exit_reason = EXIT_REASON_EPT_MISCONFIG;
Yang Zhang25d92082013-08-06 12:00:32 +03009785 else
Jan Kiszka533558b2014-01-04 18:47:20 +01009786 exit_reason = EXIT_REASON_EPT_VIOLATION;
Bandan Dasc5f983f2017-05-05 15:25:14 -04009787
9788 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
Yang Zhang25d92082013-08-06 12:00:32 +03009789 vmcs12->guest_physical_address = fault->address;
9790}
9791
Peter Feiner995f00a2017-06-30 17:26:32 -07009792static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
9793{
David Hildenbrandbb97a012017-08-10 23:15:28 +02009794 return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
Peter Feiner995f00a2017-06-30 17:26:32 -07009795}
9796
Nadav Har'El155a97a2013-08-05 11:07:16 +03009797/* Callbacks for nested_ept_init_mmu_context: */
9798
9799static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
9800{
9801 /* return the page table to be shadowed - in our case, EPT12 */
9802 return get_vmcs12(vcpu)->ept_pointer;
9803}
9804
Paolo Bonziniae1e2d12017-03-30 11:55:30 +02009805static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
Nadav Har'El155a97a2013-08-05 11:07:16 +03009806{
Paolo Bonziniad896af2013-10-02 16:56:14 +02009807 WARN_ON(mmu_is_nested(vcpu));
David Hildenbranda057e0e2017-08-10 23:36:54 +02009808 if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
Paolo Bonziniae1e2d12017-03-30 11:55:30 +02009809 return 1;
9810
9811 kvm_mmu_unload(vcpu);
Paolo Bonziniad896af2013-10-02 16:56:14 +02009812 kvm_init_shadow_ept_mmu(vcpu,
Wincy Vanb9c237b2015-02-03 23:56:30 +08009813 to_vmx(vcpu)->nested.nested_vmx_ept_caps &
Paolo Bonziniae1e2d12017-03-30 11:55:30 +02009814 VMX_EPT_EXECUTE_ONLY_BIT,
David Hildenbranda057e0e2017-08-10 23:36:54 +02009815 nested_ept_ad_enabled(vcpu));
Nadav Har'El155a97a2013-08-05 11:07:16 +03009816 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
9817 vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
9818 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
9819
9820 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
Paolo Bonziniae1e2d12017-03-30 11:55:30 +02009821 return 0;
Nadav Har'El155a97a2013-08-05 11:07:16 +03009822}
9823
9824static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
9825{
9826 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
9827}
9828
Eugene Korenevsky19d5f102014-12-16 22:35:53 +03009829static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
9830 u16 error_code)
9831{
9832 bool inequality, bit;
9833
9834 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
9835 inequality =
9836 (error_code & vmcs12->page_fault_error_code_mask) !=
9837 vmcs12->page_fault_error_code_match;
9838 return inequality ^ bit;
9839}
9840
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +03009841static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
9842 struct x86_exception *fault)
9843{
9844 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9845
9846 WARN_ON(!is_guest_mode(vcpu));
9847
Wanpeng Li305d0ab2017-09-28 18:16:44 -07009848 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
9849 !to_vmx(vcpu)->nested.nested_run_pending) {
Paolo Bonzinib96fb432017-07-27 12:29:32 +02009850 vmcs12->vm_exit_intr_error_code = fault->error_code;
9851 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
9852 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
9853 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
9854 fault->address);
Paolo Bonzini7313c692017-07-27 10:31:25 +02009855 } else {
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +03009856 kvm_inject_page_fault(vcpu, fault);
Paolo Bonzini7313c692017-07-27 10:31:25 +02009857 }
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +03009858}
9859
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009860static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
9861 struct vmcs12 *vmcs12);
9862
9863static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
Wanpeng Lia2bcba52014-08-21 19:46:49 +08009864 struct vmcs12 *vmcs12)
9865{
9866 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009867 struct page *page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009868 u64 hpa;
Wanpeng Lia2bcba52014-08-21 19:46:49 +08009869
9870 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Wanpeng Lia2bcba52014-08-21 19:46:49 +08009871 /*
9872 * Translate L1 physical address to host physical
9873 * address for vmcs02. Keep the page pinned, so this
9874 * physical address remains valid. We keep a reference
9875 * to it so we can release it later.
9876 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009877 if (vmx->nested.apic_access_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +02009878 kvm_release_page_dirty(vmx->nested.apic_access_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009879 vmx->nested.apic_access_page = NULL;
9880 }
9881 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009882 /*
9883 * If translation failed, no matter: This feature asks
9884 * to exit when accessing the given address, and if it
9885 * can never be accessed, this feature won't do
9886 * anything anyway.
9887 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009888 if (!is_error_page(page)) {
9889 vmx->nested.apic_access_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009890 hpa = page_to_phys(vmx->nested.apic_access_page);
9891 vmcs_write64(APIC_ACCESS_ADDR, hpa);
9892 } else {
9893 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
9894 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
9895 }
9896 } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
9897 cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
9898 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
9899 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
9900 kvm_vcpu_reload_apic_access_page(vcpu);
Wanpeng Lia2bcba52014-08-21 19:46:49 +08009901 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009902
9903 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009904 if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +02009905 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009906 vmx->nested.virtual_apic_page = NULL;
9907 }
9908 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009909
9910 /*
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009911 * If translation failed, VM entry will fail because
9912 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
9913 * Failing the vm entry is _not_ what the processor
9914 * does but it's basically the only possibility we
9915 * have. We could still enter the guest if CR8 load
9916 * exits are enabled, CR8 store exits are enabled, and
9917 * virtualize APIC access is disabled; in this case
9918 * the processor would never use the TPR shadow and we
9919 * could simply clear the bit from the execution
9920 * control. But such a configuration is useless, so
9921 * let's keep the code simple.
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009922 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009923 if (!is_error_page(page)) {
9924 vmx->nested.virtual_apic_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009925 hpa = page_to_phys(vmx->nested.virtual_apic_page);
9926 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
9927 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08009928 }
9929
Wincy Van705699a2015-02-03 23:58:17 +08009930 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +08009931 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
9932 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02009933 kvm_release_page_dirty(vmx->nested.pi_desc_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009934 vmx->nested.pi_desc_page = NULL;
Wincy Van705699a2015-02-03 23:58:17 +08009935 }
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009936 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
9937 if (is_error_page(page))
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009938 return;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009939 vmx->nested.pi_desc_page = page;
9940 vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +08009941 vmx->nested.pi_desc =
9942 (struct pi_desc *)((void *)vmx->nested.pi_desc +
9943 (unsigned long)(vmcs12->posted_intr_desc_addr &
9944 (PAGE_SIZE - 1)));
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009945 vmcs_write64(POSTED_INTR_DESC_ADDR,
9946 page_to_phys(vmx->nested.pi_desc_page) +
9947 (unsigned long)(vmcs12->posted_intr_desc_addr &
9948 (PAGE_SIZE - 1)));
Wincy Van705699a2015-02-03 23:58:17 +08009949 }
Jim Mattson6beb7bd2016-11-30 12:03:45 -08009950 if (cpu_has_vmx_msr_bitmap() &&
9951 nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
9952 nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
9953 ;
9954 else
9955 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
9956 CPU_BASED_USE_MSR_BITMAPS);
Wanpeng Lia2bcba52014-08-21 19:46:49 +08009957}
9958
Jan Kiszkaf41245002014-03-07 20:03:13 +01009959static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
9960{
9961 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
9962 struct vcpu_vmx *vmx = to_vmx(vcpu);
9963
9964 if (vcpu->arch.virtual_tsc_khz == 0)
9965 return;
9966
9967 /* Make sure short timeouts reliably trigger an immediate vmexit.
9968 * hrtimer_start does not guarantee this. */
9969 if (preemption_timeout <= 1) {
9970 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
9971 return;
9972 }
9973
9974 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
9975 preemption_timeout *= 1000000;
9976 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
9977 hrtimer_start(&vmx->nested.preemption_timer,
9978 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
9979}
9980
Jim Mattson56a20512017-07-06 16:33:06 -07009981static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
9982 struct vmcs12 *vmcs12)
9983{
9984 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
9985 return 0;
9986
9987 if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
9988 !page_address_valid(vcpu, vmcs12->io_bitmap_b))
9989 return -EINVAL;
9990
9991 return 0;
9992}
9993
Wincy Van3af18d92015-02-03 23:49:31 +08009994static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
9995 struct vmcs12 *vmcs12)
9996{
Wincy Van3af18d92015-02-03 23:49:31 +08009997 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
9998 return 0;
9999
Jim Mattson5fa99cb2017-07-06 16:33:07 -070010000 if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
Wincy Van3af18d92015-02-03 23:49:31 +080010001 return -EINVAL;
10002
10003 return 0;
10004}
10005
Jim Mattson712b12d2017-08-24 13:24:47 -070010006static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
10007 struct vmcs12 *vmcs12)
10008{
10009 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
10010 return 0;
10011
10012 if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
10013 return -EINVAL;
10014
10015 return 0;
10016}
10017
Wincy Van3af18d92015-02-03 23:49:31 +080010018/*
10019 * Merge L0's and L1's MSR bitmap, return false to indicate that
10020 * we do not use the hardware.
10021 */
10022static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
10023 struct vmcs12 *vmcs12)
10024{
Wincy Van82f0dd42015-02-03 23:57:18 +080010025 int msr;
Wincy Vanf2b93282015-02-03 23:56:03 +080010026 struct page *page;
Radim Krčmářd048c092016-08-08 20:16:22 +020010027 unsigned long *msr_bitmap_l1;
10028 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
Wincy Vanf2b93282015-02-03 23:56:03 +080010029
Radim Krčmářd048c092016-08-08 20:16:22 +020010030 /* This shortcut is ok because we support only x2APIC MSRs so far. */
Wincy Vanf2b93282015-02-03 23:56:03 +080010031 if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
10032 return false;
10033
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020010034 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
10035 if (is_error_page(page))
Wincy Vanf2b93282015-02-03 23:56:03 +080010036 return false;
Radim Krčmářd048c092016-08-08 20:16:22 +020010037 msr_bitmap_l1 = (unsigned long *)kmap(page);
Wincy Vanf2b93282015-02-03 23:56:03 +080010038
Radim Krčmářd048c092016-08-08 20:16:22 +020010039 memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
10040
Wincy Vanf2b93282015-02-03 23:56:03 +080010041 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
Wincy Van82f0dd42015-02-03 23:57:18 +080010042 if (nested_cpu_has_apic_reg_virt(vmcs12))
10043 for (msr = 0x800; msr <= 0x8ff; msr++)
10044 nested_vmx_disable_intercept_for_msr(
Radim Krčmářd048c092016-08-08 20:16:22 +020010045 msr_bitmap_l1, msr_bitmap_l0,
Wincy Van82f0dd42015-02-03 23:57:18 +080010046 msr, MSR_TYPE_R);
Radim Krčmářd048c092016-08-08 20:16:22 +020010047
10048 nested_vmx_disable_intercept_for_msr(
10049 msr_bitmap_l1, msr_bitmap_l0,
Wincy Vanf2b93282015-02-03 23:56:03 +080010050 APIC_BASE_MSR + (APIC_TASKPRI >> 4),
10051 MSR_TYPE_R | MSR_TYPE_W);
Radim Krčmářd048c092016-08-08 20:16:22 +020010052
Wincy Van608406e2015-02-03 23:57:51 +080010053 if (nested_cpu_has_vid(vmcs12)) {
Wincy Van608406e2015-02-03 23:57:51 +080010054 nested_vmx_disable_intercept_for_msr(
Radim Krčmářd048c092016-08-08 20:16:22 +020010055 msr_bitmap_l1, msr_bitmap_l0,
Wincy Van608406e2015-02-03 23:57:51 +080010056 APIC_BASE_MSR + (APIC_EOI >> 4),
10057 MSR_TYPE_W);
10058 nested_vmx_disable_intercept_for_msr(
Radim Krčmářd048c092016-08-08 20:16:22 +020010059 msr_bitmap_l1, msr_bitmap_l0,
Wincy Van608406e2015-02-03 23:57:51 +080010060 APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
10061 MSR_TYPE_W);
10062 }
Wincy Van82f0dd42015-02-03 23:57:18 +080010063 }
Wincy Vanf2b93282015-02-03 23:56:03 +080010064 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020010065 kvm_release_page_clean(page);
Wincy Vanf2b93282015-02-03 23:56:03 +080010066
10067 return true;
10068}
10069
10070static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
10071 struct vmcs12 *vmcs12)
10072{
Wincy Van82f0dd42015-02-03 23:57:18 +080010073 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
Wincy Van608406e2015-02-03 23:57:51 +080010074 !nested_cpu_has_apic_reg_virt(vmcs12) &&
Wincy Van705699a2015-02-03 23:58:17 +080010075 !nested_cpu_has_vid(vmcs12) &&
10076 !nested_cpu_has_posted_intr(vmcs12))
Wincy Vanf2b93282015-02-03 23:56:03 +080010077 return 0;
10078
10079 /*
10080 * If virtualize x2apic mode is enabled,
10081 * virtualize apic access must be disabled.
10082 */
Wincy Van82f0dd42015-02-03 23:57:18 +080010083 if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
10084 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Wincy Vanf2b93282015-02-03 23:56:03 +080010085 return -EINVAL;
10086
Wincy Van608406e2015-02-03 23:57:51 +080010087 /*
10088 * If virtual interrupt delivery is enabled,
10089 * we must exit on external interrupts.
10090 */
10091 if (nested_cpu_has_vid(vmcs12) &&
10092 !nested_exit_on_intr(vcpu))
10093 return -EINVAL;
10094
Wincy Van705699a2015-02-03 23:58:17 +080010095 /*
10096 * bits 15:8 should be zero in posted_intr_nv,
10097 * the descriptor address has been already checked
10098 * in nested_get_vmcs12_pages.
10099 */
10100 if (nested_cpu_has_posted_intr(vmcs12) &&
10101 (!nested_cpu_has_vid(vmcs12) ||
10102 !nested_exit_intr_ack_set(vcpu) ||
10103 vmcs12->posted_intr_nv & 0xff00))
10104 return -EINVAL;
10105
Wincy Vanf2b93282015-02-03 23:56:03 +080010106 /* tpr shadow is needed by all apicv features. */
10107 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
10108 return -EINVAL;
10109
10110 return 0;
Wincy Van3af18d92015-02-03 23:49:31 +080010111}
10112
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010113static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
10114 unsigned long count_field,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010115 unsigned long addr_field)
Wincy Vanff651cb2014-12-11 08:52:58 +030010116{
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010117 int maxphyaddr;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010118 u64 count, addr;
10119
10120 if (vmcs12_read_any(vcpu, count_field, &count) ||
10121 vmcs12_read_any(vcpu, addr_field, &addr)) {
10122 WARN_ON(1);
10123 return -EINVAL;
10124 }
10125 if (count == 0)
10126 return 0;
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010127 maxphyaddr = cpuid_maxphyaddr(vcpu);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010128 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
10129 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010130 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010131 "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
10132 addr_field, maxphyaddr, count, addr);
10133 return -EINVAL;
10134 }
10135 return 0;
10136}
10137
10138static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
10139 struct vmcs12 *vmcs12)
10140{
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010141 if (vmcs12->vm_exit_msr_load_count == 0 &&
10142 vmcs12->vm_exit_msr_store_count == 0 &&
10143 vmcs12->vm_entry_msr_load_count == 0)
10144 return 0; /* Fast path */
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010145 if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010146 VM_EXIT_MSR_LOAD_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010147 nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010148 VM_EXIT_MSR_STORE_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010149 nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030010150 VM_ENTRY_MSR_LOAD_ADDR))
Wincy Vanff651cb2014-12-11 08:52:58 +030010151 return -EINVAL;
10152 return 0;
10153}
10154
Bandan Dasc5f983f2017-05-05 15:25:14 -040010155static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
10156 struct vmcs12 *vmcs12)
10157{
10158 u64 address = vmcs12->pml_address;
10159 int maxphyaddr = cpuid_maxphyaddr(vcpu);
10160
10161 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
10162 if (!nested_cpu_has_ept(vmcs12) ||
10163 !IS_ALIGNED(address, 4096) ||
10164 address >> maxphyaddr)
10165 return -EINVAL;
10166 }
10167
10168 return 0;
10169}
10170
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010171static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
10172 struct vmx_msr_entry *e)
10173{
10174 /* x2APIC MSR accesses are not allowed */
Jan Kiszka8a9781f2015-05-04 08:32:32 +020010175 if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010176 return -EINVAL;
10177 if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
10178 e->index == MSR_IA32_UCODE_REV)
10179 return -EINVAL;
10180 if (e->reserved != 0)
10181 return -EINVAL;
10182 return 0;
10183}
10184
10185static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
10186 struct vmx_msr_entry *e)
Wincy Vanff651cb2014-12-11 08:52:58 +030010187{
10188 if (e->index == MSR_FS_BASE ||
10189 e->index == MSR_GS_BASE ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010190 e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
10191 nested_vmx_msr_check_common(vcpu, e))
10192 return -EINVAL;
10193 return 0;
10194}
10195
10196static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
10197 struct vmx_msr_entry *e)
10198{
10199 if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
10200 nested_vmx_msr_check_common(vcpu, e))
Wincy Vanff651cb2014-12-11 08:52:58 +030010201 return -EINVAL;
10202 return 0;
10203}
10204
10205/*
10206 * Load guest's/host's msr at nested entry/exit.
10207 * return 0 for success, entry index for failure.
10208 */
10209static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
10210{
10211 u32 i;
10212 struct vmx_msr_entry e;
10213 struct msr_data msr;
10214
10215 msr.host_initiated = false;
10216 for (i = 0; i < count; i++) {
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010217 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
10218 &e, sizeof(e))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010219 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010220 "%s cannot read MSR entry (%u, 0x%08llx)\n",
10221 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030010222 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010223 }
10224 if (nested_vmx_load_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010225 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010226 "%s check failed (%u, 0x%x, 0x%x)\n",
10227 __func__, i, e.index, e.reserved);
10228 goto fail;
10229 }
Wincy Vanff651cb2014-12-11 08:52:58 +030010230 msr.index = e.index;
10231 msr.data = e.value;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010232 if (kvm_set_msr(vcpu, &msr)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010233 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010234 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
10235 __func__, i, e.index, e.value);
Wincy Vanff651cb2014-12-11 08:52:58 +030010236 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010237 }
Wincy Vanff651cb2014-12-11 08:52:58 +030010238 }
10239 return 0;
10240fail:
10241 return i + 1;
10242}
10243
10244static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
10245{
10246 u32 i;
10247 struct vmx_msr_entry e;
10248
10249 for (i = 0; i < count; i++) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +020010250 struct msr_data msr_info;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010251 if (kvm_vcpu_read_guest(vcpu,
10252 gpa + i * sizeof(e),
10253 &e, 2 * sizeof(u32))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010254 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010255 "%s cannot read MSR entry (%u, 0x%08llx)\n",
10256 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030010257 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010258 }
10259 if (nested_vmx_store_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010260 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010261 "%s check failed (%u, 0x%x, 0x%x)\n",
10262 __func__, i, e.index, e.reserved);
Wincy Vanff651cb2014-12-11 08:52:58 +030010263 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010264 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +020010265 msr_info.host_initiated = false;
10266 msr_info.index = e.index;
10267 if (kvm_get_msr(vcpu, &msr_info)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010268 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010269 "%s cannot read MSR (%u, 0x%x)\n",
10270 __func__, i, e.index);
10271 return -EINVAL;
10272 }
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010273 if (kvm_vcpu_write_guest(vcpu,
10274 gpa + i * sizeof(e) +
10275 offsetof(struct vmx_msr_entry, value),
10276 &msr_info.data, sizeof(msr_info.data))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020010277 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010278 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Paolo Bonzini609e36d2015-04-08 15:30:38 +020010279 __func__, i, e.index, msr_info.data);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030010280 return -EINVAL;
10281 }
Wincy Vanff651cb2014-12-11 08:52:58 +030010282 }
10283 return 0;
10284}
10285
Ladi Prosek1dc35da2016-11-30 16:03:11 +010010286static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
10287{
10288 unsigned long invalid_mask;
10289
10290 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
10291 return (val & invalid_mask) == 0;
10292}
10293
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010294/*
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010295 * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
10296 * emulating VM entry into a guest with EPT enabled.
10297 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
10298 * is assigned to entry_failure_code on failure.
10299 */
10300static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
Jim Mattsonca0bde22016-11-30 12:03:46 -080010301 u32 *entry_failure_code)
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010302{
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010303 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
Ladi Prosek1dc35da2016-11-30 16:03:11 +010010304 if (!nested_cr3_valid(vcpu, cr3)) {
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010305 *entry_failure_code = ENTRY_FAIL_DEFAULT;
10306 return 1;
10307 }
10308
10309 /*
10310 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
10311 * must not be dereferenced.
10312 */
10313 if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
10314 !nested_ept) {
10315 if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
10316 *entry_failure_code = ENTRY_FAIL_PDPTE;
10317 return 1;
10318 }
10319 }
10320
10321 vcpu->arch.cr3 = cr3;
10322 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
10323 }
10324
10325 kvm_mmu_reset_context(vcpu);
10326 return 0;
10327}
10328
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010329/*
10330 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
10331 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
Tiejun Chenb4619662014-09-22 10:31:38 +080010332 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010333 * guest in a way that will both be appropriate to L1's requests, and our
10334 * needs. In addition to modifying the active vmcs (which is vmcs02), this
10335 * function also has additional necessary side-effects, like setting various
10336 * vcpu->arch fields.
Ladi Prosekee146c12016-11-30 16:03:09 +010010337 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
10338 * is assigned to entry_failure_code on failure.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010339 */
Ladi Prosekee146c12016-11-30 16:03:09 +010010340static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
Jim Mattsonca0bde22016-11-30 12:03:46 -080010341 bool from_vmentry, u32 *entry_failure_code)
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010342{
10343 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das03efce62017-05-05 15:25:15 -040010344 u32 exec_control, vmcs12_exec_ctrl;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010345
10346 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
10347 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
10348 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
10349 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
10350 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
10351 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
10352 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
10353 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
10354 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
10355 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
10356 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
10357 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
10358 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
10359 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
10360 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
10361 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
10362 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
10363 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
10364 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
10365 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
10366 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
10367 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
10368 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
10369 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
10370 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
10371 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
10372 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
10373 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
10374 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
10375 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
10376 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
10377 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
10378 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
10379 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
10380 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
10381 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
10382
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010383 if (from_vmentry &&
10384 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
Jan Kiszka2996fca2014-06-16 13:59:43 +020010385 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
10386 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
10387 } else {
10388 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
10389 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
10390 }
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010391 if (from_vmentry) {
10392 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
10393 vmcs12->vm_entry_intr_info_field);
10394 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
10395 vmcs12->vm_entry_exception_error_code);
10396 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
10397 vmcs12->vm_entry_instruction_len);
10398 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
10399 vmcs12->guest_interruptibility_info);
Wanpeng Li2d6144e2017-07-25 03:40:46 -070010400 vmx->loaded_vmcs->nmi_known_unmasked =
10401 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010402 } else {
10403 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
10404 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010405 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
Gleb Natapov63fbf592013-07-28 18:31:06 +030010406 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010407 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
10408 vmcs12->guest_pending_dbg_exceptions);
10409 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
10410 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
10411
Wanpeng Li81dc01f2014-12-04 19:11:07 +080010412 if (nested_cpu_has_xsaves(vmcs12))
10413 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010414 vmcs_write64(VMCS_LINK_POINTER, -1ull);
10415
Jan Kiszkaf41245002014-03-07 20:03:13 +010010416 exec_control = vmcs12->pin_based_vm_exec_control;
Wincy Van705699a2015-02-03 23:58:17 +080010417
Paolo Bonzini9314006db2016-07-06 13:23:51 +020010418 /* Preemption timer setting is only taken from vmcs01. */
10419 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
10420 exec_control |= vmcs_config.pin_based_exec_ctrl;
10421 if (vmx->hv_deadline_tsc == -1)
10422 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
10423
10424 /* Posted interrupts setting is only taken from vmcs12. */
Wincy Van705699a2015-02-03 23:58:17 +080010425 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080010426 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
10427 vmx->nested.pi_pending = false;
Wincy Van06a55242017-04-28 13:13:59 +080010428 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080010429 } else {
Wincy Van705699a2015-02-03 23:58:17 +080010430 exec_control &= ~PIN_BASED_POSTED_INTR;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080010431 }
Wincy Van705699a2015-02-03 23:58:17 +080010432
Jan Kiszkaf41245002014-03-07 20:03:13 +010010433 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010434
Jan Kiszkaf41245002014-03-07 20:03:13 +010010435 vmx->nested.preemption_timer_expired = false;
10436 if (nested_cpu_has_preemption_timer(vmcs12))
10437 vmx_start_preemption_timer(vcpu);
Jan Kiszka0238ea92013-03-13 11:31:24 +010010438
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010439 /*
10440 * Whether page-faults are trapped is determined by a combination of
10441 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
10442 * If enable_ept, L0 doesn't care about page faults and we should
10443 * set all of these to L1's desires. However, if !enable_ept, L0 does
10444 * care about (at least some) page faults, and because it is not easy
10445 * (if at all possible?) to merge L0 and L1's desires, we simply ask
10446 * to exit on each and every L2 page fault. This is done by setting
10447 * MASK=MATCH=0 and (see below) EB.PF=1.
10448 * Note that below we don't need special code to set EB.PF beyond the
10449 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
10450 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
10451 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010452 */
10453 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
10454 enable_ept ? vmcs12->page_fault_error_code_mask : 0);
10455 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
10456 enable_ept ? vmcs12->page_fault_error_code_match : 0);
10457
10458 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +020010459 exec_control = vmx->secondary_exec_control;
Xiao Guangronge2821622015-09-09 14:05:52 +080010460
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010461 /* Take the following fields only from vmcs12 */
Paolo Bonzini696dfd92014-05-07 11:20:54 +020010462 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini90a2db62017-07-27 13:22:13 +020010463 SECONDARY_EXEC_ENABLE_INVPCID |
Jan Kiszkab3a2a902015-03-23 19:27:19 +010010464 SECONDARY_EXEC_RDTSCP |
Paolo Bonzini3db13482017-08-24 14:48:03 +020010465 SECONDARY_EXEC_XSAVES |
Paolo Bonzini696dfd92014-05-07 11:20:54 +020010466 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Bandan Das27c42a12017-08-03 15:54:42 -040010467 SECONDARY_EXEC_APIC_REGISTER_VIRT |
10468 SECONDARY_EXEC_ENABLE_VMFUNC);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010469 if (nested_cpu_has(vmcs12,
Bandan Das03efce62017-05-05 15:25:15 -040010470 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
10471 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
10472 ~SECONDARY_EXEC_ENABLE_PML;
10473 exec_control |= vmcs12_exec_ctrl;
10474 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010475
Bandan Das27c42a12017-08-03 15:54:42 -040010476 /* All VMFUNCs are currently emulated through L0 vmexits. */
10477 if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
10478 vmcs_write64(VM_FUNCTION_CONTROL, 0);
10479
Wincy Van608406e2015-02-03 23:57:51 +080010480 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
10481 vmcs_write64(EOI_EXIT_BITMAP0,
10482 vmcs12->eoi_exit_bitmap0);
10483 vmcs_write64(EOI_EXIT_BITMAP1,
10484 vmcs12->eoi_exit_bitmap1);
10485 vmcs_write64(EOI_EXIT_BITMAP2,
10486 vmcs12->eoi_exit_bitmap2);
10487 vmcs_write64(EOI_EXIT_BITMAP3,
10488 vmcs12->eoi_exit_bitmap3);
10489 vmcs_write16(GUEST_INTR_STATUS,
10490 vmcs12->guest_intr_status);
10491 }
10492
Jim Mattson6beb7bd2016-11-30 12:03:45 -080010493 /*
10494 * Write an illegal value to APIC_ACCESS_ADDR. Later,
10495 * nested_get_vmcs12_pages will either fix it up or
10496 * remove the VM execution control.
10497 */
10498 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
10499 vmcs_write64(APIC_ACCESS_ADDR, -1ull);
10500
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010501 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
10502 }
10503
10504
10505 /*
10506 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
10507 * Some constant fields are set here by vmx_set_constant_host_state().
10508 * Other fields are different per CPU, and will be set later when
10509 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
10510 */
Yang Zhanga547c6d2013-04-11 19:25:10 +080010511 vmx_set_constant_host_state(vmx);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010512
10513 /*
Jim Mattson83bafef2016-10-04 10:48:38 -070010514 * Set the MSR load/store lists to match L0's settings.
10515 */
10516 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
10517 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
10518 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
10519 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
10520 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
10521
10522 /*
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010523 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
10524 * entry, but only if the current (host) sp changed from the value
10525 * we wrote last (vmx->host_rsp). This cache is no longer relevant
10526 * if we switch vmcs, and rather than hold a separate cache per vmcs,
10527 * here we just force the write to happen on entry.
10528 */
10529 vmx->host_rsp = 0;
10530
10531 exec_control = vmx_exec_control(vmx); /* L0's desires */
10532 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
10533 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
10534 exec_control &= ~CPU_BASED_TPR_SHADOW;
10535 exec_control |= vmcs12->cpu_based_vm_exec_control;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010536
Jim Mattson6beb7bd2016-11-30 12:03:45 -080010537 /*
10538 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
10539 * nested_get_vmcs12_pages can't fix it up, the illegal value
10540 * will result in a VM entry failure.
10541 */
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010542 if (exec_control & CPU_BASED_TPR_SHADOW) {
Jim Mattson6beb7bd2016-11-30 12:03:45 -080010543 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010544 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
Jim Mattson51aa68e2017-09-12 13:02:54 -070010545 } else {
10546#ifdef CONFIG_X86_64
10547 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
10548 CPU_BASED_CR8_STORE_EXITING;
10549#endif
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010550 }
10551
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010552 /*
Quan Xu8eb73e22017-12-12 16:44:21 +080010553 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
10554 * for I/O port accesses.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010555 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010556 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
10557 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
10558
10559 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
10560
10561 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
10562 * bitwise-or of what L1 wants to trap for L2, and what we want to
10563 * trap. Note that CR0.TS also needs updating - we do this later.
10564 */
10565 update_exception_bitmap(vcpu);
10566 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
10567 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
10568
Nadav Har'El8049d652013-08-05 11:07:06 +030010569 /* L2->L1 exit controls are emulated - the hardware exit is to L0 so
10570 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
10571 * bits are further modified by vmx_set_efer() below.
10572 */
Jan Kiszkaf41245002014-03-07 20:03:13 +010010573 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
Nadav Har'El8049d652013-08-05 11:07:06 +030010574
10575 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
10576 * emulated by vmx_set_efer(), below.
10577 */
Gleb Natapov2961e8762013-11-25 15:37:13 +020010578 vm_entry_controls_init(vmx,
Nadav Har'El8049d652013-08-05 11:07:06 +030010579 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
10580 ~VM_ENTRY_IA32E_MODE) |
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010581 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
10582
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010583 if (from_vmentry &&
10584 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010585 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020010586 vcpu->arch.pat = vmcs12->guest_ia32_pat;
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010587 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010588 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010589 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010590
10591 set_cr4_guest_host_mask(vmx);
10592
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010593 if (from_vmentry &&
10594 vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
Paolo Bonzini36be0b92014-02-24 12:30:04 +010010595 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
10596
Nadav Har'El27fc51b2011-08-02 15:54:52 +030010597 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
10598 vmcs_write64(TSC_OFFSET,
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010010599 vcpu->arch.tsc_offset + vmcs12->tsc_offset);
Nadav Har'El27fc51b2011-08-02 15:54:52 +030010600 else
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010010601 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Peter Feinerc95ba922016-08-17 09:36:47 -070010602 if (kvm_has_tsc_control)
10603 decache_tsc_multiplier(vmx);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010604
10605 if (enable_vpid) {
10606 /*
Wanpeng Li5c614b32015-10-13 09:18:36 -070010607 * There is no direct mapping between vpid02 and vpid12, the
10608 * vpid02 is per-vCPU for L0 and reused while the value of
10609 * vpid12 is changed w/ one invvpid during nested vmentry.
10610 * The vpid12 is allocated by L1 for L2, so it will not
10611 * influence global bitmap(for vpid01 and vpid02 allocation)
10612 * even if spawn a lot of nested vCPUs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010613 */
Wanpeng Li5c614b32015-10-13 09:18:36 -070010614 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
10615 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
10616 if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
10617 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Wanpeng Lic2ba05c2017-12-12 17:33:03 -080010618 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
Wanpeng Li5c614b32015-10-13 09:18:36 -070010619 }
10620 } else {
10621 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
Wanpeng Lic2ba05c2017-12-12 17:33:03 -080010622 vmx_flush_tlb(vcpu, true);
Wanpeng Li5c614b32015-10-13 09:18:36 -070010623 }
10624
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010625 }
10626
Ladi Prosek1fb883b2017-04-04 14:18:53 +020010627 if (enable_pml) {
10628 /*
10629 * Conceptually we want to copy the PML address and index from
10630 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
10631 * since we always flush the log on each vmexit, this happens
10632 * to be equivalent to simply resetting the fields in vmcs02.
10633 */
10634 ASSERT(vmx->pml_pg);
10635 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
10636 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
10637 }
10638
Nadav Har'El155a97a2013-08-05 11:07:16 +030010639 if (nested_cpu_has_ept(vmcs12)) {
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020010640 if (nested_ept_init_mmu_context(vcpu)) {
10641 *entry_failure_code = ENTRY_FAIL_DEFAULT;
10642 return 1;
10643 }
Jim Mattsonfb6c8192017-03-16 13:53:59 -070010644 } else if (nested_cpu_has2(vmcs12,
10645 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
10646 vmx_flush_tlb_ept_only(vcpu);
Nadav Har'El155a97a2013-08-05 11:07:16 +030010647 }
10648
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010649 /*
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080010650 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
10651 * bits which we consider mandatory enabled.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010652 * The CR0_READ_SHADOW is what L2 should have expected to read given
10653 * the specifications by L1; It's not enough to take
10654 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
10655 * have more bits than L1 expected.
10656 */
10657 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
10658 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
10659
10660 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
10661 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
10662
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080010663 if (from_vmentry &&
10664 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
David Matlack5a6a9742016-11-29 18:14:10 -080010665 vcpu->arch.efer = vmcs12->guest_ia32_efer;
10666 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
10667 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
10668 else
10669 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
10670 /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
10671 vmx_set_efer(vcpu, vcpu->arch.efer);
10672
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010673 /* Shadow page tables on either EPT or shadow page tables. */
Ladi Prosek7ad658b2017-03-23 07:18:08 +010010674 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010010675 entry_failure_code))
10676 return 1;
Ladi Prosek7ca29de2016-11-30 16:03:08 +010010677
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030010678 if (!enable_ept)
10679 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
10680
Nadav Har'El3633cfc2013-08-05 11:07:07 +030010681 /*
10682 * L1 may access the L2's PDPTR, so save them to construct vmcs12
10683 */
10684 if (enable_ept) {
10685 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
10686 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
10687 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
10688 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
10689 }
10690
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010691 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
10692 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
Ladi Prosekee146c12016-11-30 16:03:09 +010010693 return 0;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030010694}
10695
Jim Mattsonca0bde22016-11-30 12:03:46 -080010696static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10697{
10698 struct vcpu_vmx *vmx = to_vmx(vcpu);
10699
10700 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
10701 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
10702 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10703
Jim Mattson56a20512017-07-06 16:33:06 -070010704 if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
10705 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10706
Jim Mattsonca0bde22016-11-30 12:03:46 -080010707 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
10708 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10709
Jim Mattson712b12d2017-08-24 13:24:47 -070010710 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
10711 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10712
Jim Mattsonca0bde22016-11-30 12:03:46 -080010713 if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
10714 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10715
10716 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
10717 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10718
Bandan Dasc5f983f2017-05-05 15:25:14 -040010719 if (nested_vmx_check_pml_controls(vcpu, vmcs12))
10720 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10721
Jim Mattsonca0bde22016-11-30 12:03:46 -080010722 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
10723 vmx->nested.nested_vmx_procbased_ctls_low,
10724 vmx->nested.nested_vmx_procbased_ctls_high) ||
Jim Mattson2e5b0bd2017-05-04 11:51:58 -070010725 (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
10726 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
10727 vmx->nested.nested_vmx_secondary_ctls_low,
10728 vmx->nested.nested_vmx_secondary_ctls_high)) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080010729 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
10730 vmx->nested.nested_vmx_pinbased_ctls_low,
10731 vmx->nested.nested_vmx_pinbased_ctls_high) ||
10732 !vmx_control_verify(vmcs12->vm_exit_controls,
10733 vmx->nested.nested_vmx_exit_ctls_low,
10734 vmx->nested.nested_vmx_exit_ctls_high) ||
10735 !vmx_control_verify(vmcs12->vm_entry_controls,
10736 vmx->nested.nested_vmx_entry_ctls_low,
10737 vmx->nested.nested_vmx_entry_ctls_high))
10738 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10739
Bandan Das41ab9372017-08-03 15:54:43 -040010740 if (nested_cpu_has_vmfunc(vmcs12)) {
10741 if (vmcs12->vm_function_control &
10742 ~vmx->nested.nested_vmx_vmfunc_controls)
10743 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10744
10745 if (nested_cpu_has_eptp_switching(vmcs12)) {
10746 if (!nested_cpu_has_ept(vmcs12) ||
10747 !page_address_valid(vcpu, vmcs12->eptp_list_address))
10748 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10749 }
10750 }
Bandan Das27c42a12017-08-03 15:54:42 -040010751
Jim Mattsonc7c2c7092017-05-05 11:28:09 -070010752 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
10753 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10754
Jim Mattsonca0bde22016-11-30 12:03:46 -080010755 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
10756 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
10757 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
10758 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
10759
10760 return 0;
10761}
10762
10763static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10764 u32 *exit_qual)
10765{
10766 bool ia32e;
10767
10768 *exit_qual = ENTRY_FAIL_DEFAULT;
10769
10770 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
10771 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
10772 return 1;
10773
10774 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) &&
10775 vmcs12->vmcs_link_pointer != -1ull) {
10776 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
10777 return 1;
10778 }
10779
10780 /*
10781 * If the load IA32_EFER VM-entry control is 1, the following checks
10782 * are performed on the field for the IA32_EFER MSR:
10783 * - Bits reserved in the IA32_EFER MSR must be 0.
10784 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
10785 * the IA-32e mode guest VM-exit control. It must also be identical
10786 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
10787 * CR0.PG) is 1.
10788 */
10789 if (to_vmx(vcpu)->nested.nested_run_pending &&
10790 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
10791 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
10792 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
10793 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
10794 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
10795 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
10796 return 1;
10797 }
10798
10799 /*
10800 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
10801 * IA32_EFER MSR must be 0 in the field for that register. In addition,
10802 * the values of the LMA and LME bits in the field must each be that of
10803 * the host address-space size VM-exit control.
10804 */
10805 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
10806 ia32e = (vmcs12->vm_exit_controls &
10807 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
10808 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
10809 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
10810 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
10811 return 1;
10812 }
10813
Wanpeng Lif1b026a2017-11-05 16:54:48 -080010814 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
10815 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
10816 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
10817 return 1;
10818
Jim Mattsonca0bde22016-11-30 12:03:46 -080010819 return 0;
10820}
10821
Jim Mattson858e25c2016-11-30 12:03:47 -080010822static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10823{
10824 struct vcpu_vmx *vmx = to_vmx(vcpu);
10825 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Jim Mattson858e25c2016-11-30 12:03:47 -080010826 u32 msr_entry_idx;
10827 u32 exit_qual;
10828
Jim Mattson858e25c2016-11-30 12:03:47 -080010829 enter_guest_mode(vcpu);
10830
10831 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
10832 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
10833
Jim Mattson00647b42017-11-27 17:22:25 -060010834 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
Jim Mattson858e25c2016-11-30 12:03:47 -080010835 vmx_segment_cache_clear(vmx);
10836
10837 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
10838 leave_guest_mode(vcpu);
David Hildenbrand1279a6b12017-03-20 10:00:08 +010010839 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson858e25c2016-11-30 12:03:47 -080010840 nested_vmx_entry_failure(vcpu, vmcs12,
10841 EXIT_REASON_INVALID_STATE, exit_qual);
10842 return 1;
10843 }
10844
10845 nested_get_vmcs12_pages(vcpu, vmcs12);
10846
10847 msr_entry_idx = nested_vmx_load_msr(vcpu,
10848 vmcs12->vm_entry_msr_load_addr,
10849 vmcs12->vm_entry_msr_load_count);
10850 if (msr_entry_idx) {
10851 leave_guest_mode(vcpu);
David Hildenbrand1279a6b12017-03-20 10:00:08 +010010852 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jim Mattson858e25c2016-11-30 12:03:47 -080010853 nested_vmx_entry_failure(vcpu, vmcs12,
10854 EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
10855 return 1;
10856 }
10857
Jim Mattson858e25c2016-11-30 12:03:47 -080010858 /*
10859 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
10860 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
10861 * returned as far as L1 is concerned. It will only return (and set
10862 * the success flag) when L2 exits (see nested_vmx_vmexit()).
10863 */
10864 return 0;
10865}
10866
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010867/*
10868 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
10869 * for running an L2 nested guest.
10870 */
10871static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10872{
10873 struct vmcs12 *vmcs12;
10874 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070010875 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Jim Mattsonca0bde22016-11-30 12:03:46 -080010876 u32 exit_qual;
10877 int ret;
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010878
Kyle Hueyeb277562016-11-29 12:40:39 -080010879 if (!nested_vmx_check_permission(vcpu))
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010880 return 1;
10881
Kyle Hueyeb277562016-11-29 12:40:39 -080010882 if (!nested_vmx_check_vmcs12(vcpu))
10883 goto out;
10884
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010885 vmcs12 = get_vmcs12(vcpu);
10886
Abel Gordon012f83c2013-04-18 14:39:25 +030010887 if (enable_shadow_vmcs)
10888 copy_shadow_to_vmcs12(vmx);
10889
Nadav Har'El7c177932011-05-25 23:12:04 +030010890 /*
10891 * The nested entry process starts with enforcing various prerequisites
10892 * on vmcs12 as required by the Intel SDM, and act appropriately when
10893 * they fail: As the SDM explains, some conditions should cause the
10894 * instruction to fail, while others will cause the instruction to seem
10895 * to succeed, but return an EXIT_REASON_INVALID_STATE.
10896 * To speed up the normal (success) code path, we should avoid checking
10897 * for misconfigurations which will anyway be caught by the processor
10898 * when using the merged vmcs02.
10899 */
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070010900 if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
10901 nested_vmx_failValid(vcpu,
10902 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
10903 goto out;
10904 }
10905
Nadav Har'El7c177932011-05-25 23:12:04 +030010906 if (vmcs12->launch_state == launch) {
10907 nested_vmx_failValid(vcpu,
10908 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
10909 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
Kyle Hueyeb277562016-11-29 12:40:39 -080010910 goto out;
Nadav Har'El7c177932011-05-25 23:12:04 +030010911 }
10912
Jim Mattsonca0bde22016-11-30 12:03:46 -080010913 ret = check_vmentry_prereqs(vcpu, vmcs12);
10914 if (ret) {
10915 nested_vmx_failValid(vcpu, ret);
Kyle Hueyeb277562016-11-29 12:40:39 -080010916 goto out;
Paolo Bonzini26539bd2013-04-15 15:00:27 +020010917 }
10918
Nadav Har'El7c177932011-05-25 23:12:04 +030010919 /*
Jim Mattsonca0bde22016-11-30 12:03:46 -080010920 * After this point, the trap flag no longer triggers a singlestep trap
10921 * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
10922 * This is not 100% correct; for performance reasons, we delegate most
10923 * of the checks on host state to the processor. If those fail,
10924 * the singlestep trap is missed.
Jan Kiszka384bb782013-04-20 10:52:36 +020010925 */
Jim Mattsonca0bde22016-11-30 12:03:46 -080010926 skip_emulated_instruction(vcpu);
Jan Kiszka384bb782013-04-20 10:52:36 +020010927
Jim Mattsonca0bde22016-11-30 12:03:46 -080010928 ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
10929 if (ret) {
10930 nested_vmx_entry_failure(vcpu, vmcs12,
10931 EXIT_REASON_INVALID_STATE, exit_qual);
10932 return 1;
Jan Kiszka384bb782013-04-20 10:52:36 +020010933 }
10934
10935 /*
Nadav Har'El7c177932011-05-25 23:12:04 +030010936 * We're finally done with prerequisite checking, and can start with
10937 * the nested entry.
10938 */
10939
Jim Mattson858e25c2016-11-30 12:03:47 -080010940 ret = enter_vmx_non_root_mode(vcpu, true);
10941 if (ret)
10942 return ret;
Wincy Vanff651cb2014-12-11 08:52:58 +030010943
Jan Kiszka6dfacad2013-12-04 08:58:54 +010010944 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
Joel Schopp5cb56052015-03-02 13:43:31 -060010945 return kvm_vcpu_halt(vcpu);
Jan Kiszka6dfacad2013-12-04 08:58:54 +010010946
Jan Kiszka7af40ad32014-01-04 18:47:23 +010010947 vmx->nested.nested_run_pending = 1;
10948
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010949 return 1;
Kyle Hueyeb277562016-11-29 12:40:39 -080010950
10951out:
Kyle Huey6affcbe2016-11-29 12:40:40 -080010952 return kvm_skip_emulated_instruction(vcpu);
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030010953}
10954
Nadav Har'El4704d0b2011-05-25 23:11:34 +030010955/*
10956 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
10957 * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
10958 * This function returns the new value we should put in vmcs12.guest_cr0.
10959 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
10960 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
10961 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
10962 * didn't trap the bit, because if L1 did, so would L0).
10963 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
10964 * been modified by L2, and L1 knows it. So just leave the old value of
10965 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
10966 * isn't relevant, because if L0 traps this bit it can set it to anything.
10967 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
10968 * changed these bits, and therefore they need to be updated, but L0
10969 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
10970 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
10971 */
10972static inline unsigned long
10973vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10974{
10975 return
10976 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
10977 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
10978 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
10979 vcpu->arch.cr0_guest_owned_bits));
10980}
10981
10982static inline unsigned long
10983vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10984{
10985 return
10986 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
10987 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
10988 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
10989 vcpu->arch.cr4_guest_owned_bits));
10990}
10991
Jan Kiszka5f3d5792013-04-14 12:12:46 +020010992static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
10993 struct vmcs12 *vmcs12)
10994{
10995 u32 idt_vectoring;
10996 unsigned int nr;
10997
Wanpeng Li664f8e22017-08-24 03:35:09 -070010998 if (vcpu->arch.exception.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020010999 nr = vcpu->arch.exception.nr;
11000 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
11001
11002 if (kvm_exception_is_soft(nr)) {
11003 vmcs12->vm_exit_instruction_len =
11004 vcpu->arch.event_exit_inst_len;
11005 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
11006 } else
11007 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
11008
11009 if (vcpu->arch.exception.has_error_code) {
11010 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
11011 vmcs12->idt_vectoring_error_code =
11012 vcpu->arch.exception.error_code;
11013 }
11014
11015 vmcs12->idt_vectoring_info_field = idt_vectoring;
Jan Kiszkacd2633c2013-10-23 17:42:15 +010011016 } else if (vcpu->arch.nmi_injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011017 vmcs12->idt_vectoring_info_field =
11018 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
11019 } else if (vcpu->arch.interrupt.pending) {
11020 nr = vcpu->arch.interrupt.nr;
11021 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
11022
11023 if (vcpu->arch.interrupt.soft) {
11024 idt_vectoring |= INTR_TYPE_SOFT_INTR;
11025 vmcs12->vm_entry_instruction_len =
11026 vcpu->arch.event_exit_inst_len;
11027 } else
11028 idt_vectoring |= INTR_TYPE_EXT_INTR;
11029
11030 vmcs12->idt_vectoring_info_field = idt_vectoring;
11031 }
11032}
11033
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011034static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11035{
11036 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070011037 unsigned long exit_qual;
Liran Alon917dc602017-11-05 16:07:43 +020011038 bool block_nested_events =
11039 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Wanpeng Liacc9ab62017-02-27 04:24:39 -080011040
Wanpeng Libfcf83b2017-08-24 03:35:11 -070011041 if (vcpu->arch.exception.pending &&
11042 nested_vmx_check_exception(vcpu, &exit_qual)) {
Liran Alon917dc602017-11-05 16:07:43 +020011043 if (block_nested_events)
Wanpeng Libfcf83b2017-08-24 03:35:11 -070011044 return -EBUSY;
11045 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
11046 vcpu->arch.exception.pending = false;
11047 return 0;
11048 }
11049
Jan Kiszkaf41245002014-03-07 20:03:13 +010011050 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
11051 vmx->nested.preemption_timer_expired) {
Liran Alon917dc602017-11-05 16:07:43 +020011052 if (block_nested_events)
Jan Kiszkaf41245002014-03-07 20:03:13 +010011053 return -EBUSY;
11054 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
11055 return 0;
11056 }
11057
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011058 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020011059 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011060 return -EBUSY;
11061 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11062 NMI_VECTOR | INTR_TYPE_NMI_INTR |
11063 INTR_INFO_VALID_MASK, 0);
11064 /*
11065 * The NMI-triggered VM exit counts as injection:
11066 * clear this one and block further NMIs.
11067 */
11068 vcpu->arch.nmi_pending = 0;
11069 vmx_set_nmi_mask(vcpu, true);
11070 return 0;
11071 }
11072
11073 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
11074 nested_exit_on_intr(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020011075 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011076 return -EBUSY;
11077 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
Wincy Van705699a2015-02-03 23:58:17 +080011078 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011079 }
11080
David Hildenbrand6342c502017-01-25 11:58:58 +010011081 vmx_complete_nested_posted_interrupt(vcpu);
11082 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011083}
11084
Jan Kiszkaf41245002014-03-07 20:03:13 +010011085static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
11086{
11087 ktime_t remaining =
11088 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
11089 u64 value;
11090
11091 if (ktime_to_ns(remaining) <= 0)
11092 return 0;
11093
11094 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
11095 do_div(value, 1000000);
11096 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
11097}
11098
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011099/*
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011100 * Update the guest state fields of vmcs12 to reflect changes that
11101 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
11102 * VM-entry controls is also updated, since this is really a guest
11103 * state bit.)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011104 */
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011105static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011106{
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011107 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
11108 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
11109
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011110 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
11111 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
11112 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
11113
11114 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
11115 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
11116 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
11117 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
11118 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
11119 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
11120 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
11121 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
11122 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
11123 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
11124 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
11125 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
11126 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
11127 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
11128 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
11129 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
11130 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
11131 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
11132 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
11133 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
11134 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
11135 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
11136 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
11137 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
11138 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
11139 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
11140 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
11141 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
11142 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
11143 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
11144 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
11145 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
11146 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
11147 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
11148 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
11149 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
11150
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011151 vmcs12->guest_interruptibility_info =
11152 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
11153 vmcs12->guest_pending_dbg_exceptions =
11154 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
Jan Kiszka3edf1e62014-01-04 18:47:24 +010011155 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
11156 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
11157 else
11158 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011159
Jan Kiszkaf41245002014-03-07 20:03:13 +010011160 if (nested_cpu_has_preemption_timer(vmcs12)) {
11161 if (vmcs12->vm_exit_controls &
11162 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
11163 vmcs12->vmx_preemption_timer_value =
11164 vmx_get_preemption_timer_value(vcpu);
11165 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
11166 }
Arthur Chunqi Li7854cbc2013-09-16 16:11:44 +080011167
Nadav Har'El3633cfc2013-08-05 11:07:07 +030011168 /*
11169 * In some cases (usually, nested EPT), L2 is allowed to change its
11170 * own CR3 without exiting. If it has changed it, we must keep it.
11171 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
11172 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
11173 *
11174 * Additionally, restore L2's PDPTR to vmcs12.
11175 */
11176 if (enable_ept) {
Paolo Bonzinif3531052015-12-03 15:49:56 +010011177 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Nadav Har'El3633cfc2013-08-05 11:07:07 +030011178 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
11179 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
11180 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
11181 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
11182 }
11183
Jim Mattsond281e132017-06-01 12:44:46 -070011184 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
Jan Dakinevich119a9c02016-09-04 21:22:47 +030011185
Wincy Van608406e2015-02-03 23:57:51 +080011186 if (nested_cpu_has_vid(vmcs12))
11187 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
11188
Jan Kiszkac18911a2013-03-13 16:06:41 +010011189 vmcs12->vm_entry_controls =
11190 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
Gleb Natapov2961e8762013-11-25 15:37:13 +020011191 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
Jan Kiszkac18911a2013-03-13 16:06:41 +010011192
Jan Kiszka2996fca2014-06-16 13:59:43 +020011193 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
11194 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
11195 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
11196 }
11197
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011198 /* TODO: These cannot have changed unless we have MSR bitmaps and
11199 * the relevant bit asks not to trap the change */
Jan Kiszkab8c07d52013-04-06 13:51:21 +020011200 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011201 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
Jan Kiszka10ba54a2013-08-08 16:26:31 +020011202 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
11203 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011204 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
11205 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
11206 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
Paolo Bonzinia87036a2016-03-08 09:52:13 +010011207 if (kvm_mpx_supported())
Paolo Bonzini36be0b92014-02-24 12:30:04 +010011208 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080011209}
11210
11211/*
11212 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
11213 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
11214 * and this function updates it to reflect the changes to the guest state while
11215 * L2 was running (and perhaps made some exits which were handled directly by L0
11216 * without going back to L1), and to reflect the exit reason.
11217 * Note that we do not have to copy here all VMCS fields, just those that
11218 * could have changed by the L2 guest or the exit - i.e., the guest-state and
11219 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
11220 * which already writes to vmcs12 directly.
11221 */
11222static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
11223 u32 exit_reason, u32 exit_intr_info,
11224 unsigned long exit_qualification)
11225{
11226 /* update guest state fields: */
11227 sync_vmcs12(vcpu, vmcs12);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011228
11229 /* update exit information fields: */
11230
Jan Kiszka533558b2014-01-04 18:47:20 +010011231 vmcs12->vm_exit_reason = exit_reason;
11232 vmcs12->exit_qualification = exit_qualification;
Jan Kiszka533558b2014-01-04 18:47:20 +010011233 vmcs12->vm_exit_intr_info = exit_intr_info;
Paolo Bonzini7313c692017-07-27 10:31:25 +020011234
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011235 vmcs12->idt_vectoring_info_field = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011236 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
11237 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
11238
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011239 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
Jim Mattson7cdc2d62017-07-06 16:33:05 -070011240 vmcs12->launch_state = 1;
11241
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011242 /* vm_entry_intr_info_field is cleared on exit. Emulate this
11243 * instead of reading the real value. */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011244 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011245
11246 /*
11247 * Transfer the event that L0 or L1 may wanted to inject into
11248 * L2 to IDT_VECTORING_INFO_FIELD.
11249 */
11250 vmcs12_save_pending_event(vcpu, vmcs12);
11251 }
11252
11253 /*
11254 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
11255 * preserved above and would only end up incorrectly in L1.
11256 */
11257 vcpu->arch.nmi_injected = false;
11258 kvm_clear_exception_queue(vcpu);
11259 kvm_clear_interrupt_queue(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011260}
11261
Wanpeng Li5af41572017-11-05 16:54:49 -080011262static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
11263 struct vmcs12 *vmcs12)
11264{
11265 u32 entry_failure_code;
11266
11267 nested_ept_uninit_mmu_context(vcpu);
11268
11269 /*
11270 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11271 * couldn't have changed.
11272 */
11273 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11274 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11275
11276 if (!enable_ept)
11277 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11278}
11279
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011280/*
11281 * A part of what we need to when the nested L2 guest exits and we want to
11282 * run its L1 parent, is to reset L1's guest state to the host state specified
11283 * in vmcs12.
11284 * This function is to be called not only on normal nested exit, but also on
11285 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
11286 * Failures During or After Loading Guest State").
11287 * This function should be called when the active VMCS is L1's (vmcs01).
11288 */
Jan Kiszka733568f2013-02-23 15:07:47 +010011289static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11290 struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011291{
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080011292 struct kvm_segment seg;
11293
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011294 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
11295 vcpu->arch.efer = vmcs12->host_ia32_efer;
Jan Kiszkad1fa0352013-04-14 12:44:54 +020011296 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011297 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
11298 else
11299 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
11300 vmx_set_efer(vcpu, vcpu->arch.efer);
11301
11302 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
11303 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
H. Peter Anvin1adfa762013-04-27 16:10:11 -070011304 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011305 /*
11306 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080011307 * actually changed, because vmx_set_cr0 refers to efer set above.
11308 *
11309 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
11310 * (KVM doesn't change it);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011311 */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080011312 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
Jan Kiszka9e3e4db2013-09-03 21:11:45 +020011313 vmx_set_cr0(vcpu, vmcs12->host_cr0);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011314
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080011315 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011316 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
Haozhong Zhang8eb3f872017-10-10 15:01:22 +080011317 vmx_set_cr4(vcpu, vmcs12->host_cr4);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011318
Wanpeng Li5af41572017-11-05 16:54:49 -080011319 load_vmcs12_mmu_host_state(vcpu, vmcs12);
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011320
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011321 if (enable_vpid) {
11322 /*
11323 * Trivially support vpid by letting L2s share their parent
11324 * L1's vpid. TODO: move to a more elaborate solution, giving
11325 * each L2 its own vpid and exposing the vpid feature to L1.
11326 */
Wanpeng Lic2ba05c2017-12-12 17:33:03 -080011327 vmx_flush_tlb(vcpu, true);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011328 }
Wincy Van06a55242017-04-28 13:13:59 +080011329 /* Restore posted intr vector. */
11330 if (nested_cpu_has_posted_intr(vmcs12))
11331 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011332
11333 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
11334 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
11335 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
11336 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
11337 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
Ladi Prosek21f2d5512017-10-11 16:54:42 +020011338 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
11339 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011340
Paolo Bonzini36be0b92014-02-24 12:30:04 +010011341 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
11342 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
11343 vmcs_write64(GUEST_BNDCFGS, 0);
11344
Jan Kiszka44811c02013-08-04 17:17:27 +020011345 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011346 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020011347 vcpu->arch.pat = vmcs12->host_ia32_pat;
11348 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011349 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
11350 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
11351 vmcs12->host_ia32_perf_global_ctrl);
Jan Kiszka503cd0c2013-03-03 13:05:44 +010011352
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080011353 /* Set L1 segment info according to Intel SDM
11354 27.5.2 Loading Host Segment and Descriptor-Table Registers */
11355 seg = (struct kvm_segment) {
11356 .base = 0,
11357 .limit = 0xFFFFFFFF,
11358 .selector = vmcs12->host_cs_selector,
11359 .type = 11,
11360 .present = 1,
11361 .s = 1,
11362 .g = 1
11363 };
11364 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
11365 seg.l = 1;
11366 else
11367 seg.db = 1;
11368 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
11369 seg = (struct kvm_segment) {
11370 .base = 0,
11371 .limit = 0xFFFFFFFF,
11372 .type = 3,
11373 .present = 1,
11374 .s = 1,
11375 .db = 1,
11376 .g = 1
11377 };
11378 seg.selector = vmcs12->host_ds_selector;
11379 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
11380 seg.selector = vmcs12->host_es_selector;
11381 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
11382 seg.selector = vmcs12->host_ss_selector;
11383 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
11384 seg.selector = vmcs12->host_fs_selector;
11385 seg.base = vmcs12->host_fs_base;
11386 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
11387 seg.selector = vmcs12->host_gs_selector;
11388 seg.base = vmcs12->host_gs_base;
11389 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
11390 seg = (struct kvm_segment) {
Gleb Natapov205befd2013-08-04 15:08:06 +030011391 .base = vmcs12->host_tr_base,
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080011392 .limit = 0x67,
11393 .selector = vmcs12->host_tr_selector,
11394 .type = 11,
11395 .present = 1
11396 };
11397 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
11398
Jan Kiszka503cd0c2013-03-03 13:05:44 +010011399 kvm_set_dr(vcpu, 7, 0x400);
11400 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
Wincy Vanff651cb2014-12-11 08:52:58 +030011401
Wincy Van3af18d92015-02-03 23:49:31 +080011402 if (cpu_has_vmx_msr_bitmap())
11403 vmx_set_msr_bitmap(vcpu);
11404
Wincy Vanff651cb2014-12-11 08:52:58 +030011405 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
11406 vmcs12->vm_exit_msr_load_count))
11407 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011408}
11409
11410/*
11411 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
11412 * and modify vmcs12 to make it see what it would expect to see there if
11413 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
11414 */
Jan Kiszka533558b2014-01-04 18:47:20 +010011415static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11416 u32 exit_intr_info,
11417 unsigned long exit_qualification)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011418{
11419 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011420 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11421
Jan Kiszka5f3d5792013-04-14 12:12:46 +020011422 /* trying to cancel vmlaunch/vmresume is a bug */
11423 WARN_ON_ONCE(vmx->nested.nested_run_pending);
11424
Wanpeng Li6550c4d2017-07-31 19:25:27 -070011425 /*
Jim Mattson4f350c62017-09-14 16:31:44 -070011426 * The only expected VM-instruction error is "VM entry with
11427 * invalid control field(s)." Anything else indicates a
11428 * problem with L0.
Wanpeng Li6550c4d2017-07-31 19:25:27 -070011429 */
Jim Mattson4f350c62017-09-14 16:31:44 -070011430 WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
11431 VMXERR_ENTRY_INVALID_CONTROL_FIELD));
11432
11433 leave_guest_mode(vcpu);
11434
11435 if (likely(!vmx->fail)) {
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011436 if (exit_reason == -1)
11437 sync_vmcs12(vcpu, vmcs12);
11438 else
11439 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
11440 exit_qualification);
Jim Mattson4f350c62017-09-14 16:31:44 -070011441
11442 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
11443 vmcs12->vm_exit_msr_store_count))
11444 nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Bandan Das77b0f5d2014-04-19 18:17:45 -040011445 }
11446
Jim Mattson4f350c62017-09-14 16:31:44 -070011447 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Paolo Bonzini8391ce42016-07-07 14:58:33 +020011448 vm_entry_controls_reset_shadow(vmx);
11449 vm_exit_controls_reset_shadow(vmx);
Jan Kiszka36c3cc42013-02-23 22:35:37 +010011450 vmx_segment_cache_clear(vmx);
11451
Paolo Bonzini9314006db2016-07-06 13:23:51 +020011452 /* Update any VMCS fields that might have changed while L2 ran */
Jim Mattson83bafef2016-10-04 10:48:38 -070011453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
11454 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010011455 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Paolo Bonzini9314006db2016-07-06 13:23:51 +020011456 if (vmx->hv_deadline_tsc == -1)
11457 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
11458 PIN_BASED_VMX_PREEMPTION_TIMER);
11459 else
11460 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
11461 PIN_BASED_VMX_PREEMPTION_TIMER);
Peter Feinerc95ba922016-08-17 09:36:47 -070011462 if (kvm_has_tsc_control)
11463 decache_tsc_multiplier(vmx);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011464
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020011465 if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
11466 vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
11467 vmx_set_virtual_x2apic_mode(vcpu,
11468 vcpu->arch.apic_base & X2APIC_ENABLE);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070011469 } else if (!nested_cpu_has_ept(vmcs12) &&
11470 nested_cpu_has2(vmcs12,
11471 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
11472 vmx_flush_tlb_ept_only(vcpu);
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020011473 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011474
11475 /* This is needed for same reason as it was needed in prepare_vmcs02 */
11476 vmx->host_rsp = 0;
11477
11478 /* Unpin physical memory we referred to in vmcs02 */
11479 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020011480 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020011481 vmx->nested.apic_access_page = NULL;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011482 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011483 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020011484 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020011485 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011486 }
Wincy Van705699a2015-02-03 23:58:17 +080011487 if (vmx->nested.pi_desc_page) {
11488 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011489 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080011490 vmx->nested.pi_desc_page = NULL;
11491 vmx->nested.pi_desc = NULL;
11492 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011493
11494 /*
Tang Chen38b99172014-09-24 15:57:54 +080011495 * We are now running in L2, mmu_notifier will force to reload the
11496 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
11497 */
Wanpeng Lic83b6d12016-09-06 17:20:33 +080011498 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Tang Chen38b99172014-09-24 15:57:54 +080011499
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011500 if (enable_shadow_vmcs && exit_reason != -1)
Abel Gordon012f83c2013-04-18 14:39:25 +030011501 vmx->nested.sync_shadow_vmcs = true;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010011502
11503 /* in case we halted in L2 */
11504 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Jim Mattson4f350c62017-09-14 16:31:44 -070011505
11506 if (likely(!vmx->fail)) {
11507 /*
11508 * TODO: SDM says that with acknowledge interrupt on
11509 * exit, bit 31 of the VM-exit interrupt information
11510 * (valid interrupt) is always set to 1 on
11511 * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
11512 * need kvm_cpu_has_interrupt(). See the commit
11513 * message for details.
11514 */
11515 if (nested_exit_intr_ack_set(vcpu) &&
11516 exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
11517 kvm_cpu_has_interrupt(vcpu)) {
11518 int irq = kvm_cpu_get_interrupt(vcpu);
11519 WARN_ON(irq < 0);
11520 vmcs12->vm_exit_intr_info = irq |
11521 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
11522 }
11523
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011524 if (exit_reason != -1)
11525 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
11526 vmcs12->exit_qualification,
11527 vmcs12->idt_vectoring_info_field,
11528 vmcs12->vm_exit_intr_info,
11529 vmcs12->vm_exit_intr_error_code,
11530 KVM_ISA_VMX);
Jim Mattson4f350c62017-09-14 16:31:44 -070011531
11532 load_vmcs12_host_state(vcpu, vmcs12);
11533
11534 return;
11535 }
11536
11537 /*
11538 * After an early L2 VM-entry failure, we're now back
11539 * in L1 which thinks it just finished a VMLAUNCH or
11540 * VMRESUME instruction, so we need to set the failure
11541 * flag and the VM-instruction error field of the VMCS
11542 * accordingly.
11543 */
11544 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Wanpeng Li5af41572017-11-05 16:54:49 -080011545
11546 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11547
Jim Mattson4f350c62017-09-14 16:31:44 -070011548 /*
11549 * The emulated instruction was already skipped in
11550 * nested_vmx_run, but the updated RIP was never
11551 * written back to the vmcs01.
11552 */
11553 skip_emulated_instruction(vcpu);
11554 vmx->fail = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030011555}
11556
Nadav Har'El7c177932011-05-25 23:12:04 +030011557/*
Jan Kiszka42124922014-01-04 18:47:19 +010011558 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
11559 */
11560static void vmx_leave_nested(struct kvm_vcpu *vcpu)
11561{
Wanpeng Li2f707d92017-03-06 04:03:28 -080011562 if (is_guest_mode(vcpu)) {
11563 to_vmx(vcpu)->nested.nested_run_pending = 0;
Jan Kiszka533558b2014-01-04 18:47:20 +010011564 nested_vmx_vmexit(vcpu, -1, 0, 0);
Wanpeng Li2f707d92017-03-06 04:03:28 -080011565 }
Jan Kiszka42124922014-01-04 18:47:19 +010011566 free_nested(to_vmx(vcpu));
11567}
11568
11569/*
Nadav Har'El7c177932011-05-25 23:12:04 +030011570 * L1's failure to enter L2 is a subset of a normal exit, as explained in
11571 * 23.7 "VM-entry failures during or after loading guest state" (this also
11572 * lists the acceptable exit-reason and exit-qualification parameters).
11573 * It should only be called before L2 actually succeeded to run, and when
11574 * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
11575 */
11576static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
11577 struct vmcs12 *vmcs12,
11578 u32 reason, unsigned long qualification)
11579{
11580 load_vmcs12_host_state(vcpu, vmcs12);
11581 vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
11582 vmcs12->exit_qualification = qualification;
11583 nested_vmx_succeed(vcpu);
Abel Gordon012f83c2013-04-18 14:39:25 +030011584 if (enable_shadow_vmcs)
11585 to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
Nadav Har'El7c177932011-05-25 23:12:04 +030011586}
11587
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020011588static int vmx_check_intercept(struct kvm_vcpu *vcpu,
11589 struct x86_instruction_info *info,
11590 enum x86_intercept_stage stage)
11591{
Paolo Bonzinifb6d4d32016-07-12 11:04:26 +020011592 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11593 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
11594
11595 /*
11596 * RDPID causes #UD if disabled through secondary execution controls.
11597 * Because it is marked as EmulateOnUD, we need to intercept it here.
11598 */
11599 if (info->intercept == x86_intercept_rdtscp &&
11600 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
11601 ctxt->exception.vector = UD_VECTOR;
11602 ctxt->exception.error_code_valid = false;
11603 return X86EMUL_PROPAGATE_FAULT;
11604 }
11605
11606 /* TODO: check more intercepts... */
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020011607 return X86EMUL_CONTINUE;
11608}
11609
Yunhong Jiang64672c92016-06-13 14:19:59 -070011610#ifdef CONFIG_X86_64
11611/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
11612static inline int u64_shl_div_u64(u64 a, unsigned int shift,
11613 u64 divisor, u64 *result)
11614{
11615 u64 low = a << shift, high = a >> (64 - shift);
11616
11617 /* To avoid the overflow on divq */
11618 if (high >= divisor)
11619 return 1;
11620
11621 /* Low hold the result, high hold rem which is discarded */
11622 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
11623 "rm" (divisor), "0" (low), "1" (high));
11624 *result = low;
11625
11626 return 0;
11627}
11628
11629static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
11630{
11631 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzini9175d2e2016-06-27 15:08:01 +020011632 u64 tscl = rdtsc();
11633 u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
11634 u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
Yunhong Jiang64672c92016-06-13 14:19:59 -070011635
11636 /* Convert to host delta tsc if tsc scaling is enabled */
11637 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
11638 u64_shl_div_u64(delta_tsc,
11639 kvm_tsc_scaling_ratio_frac_bits,
11640 vcpu->arch.tsc_scaling_ratio,
11641 &delta_tsc))
11642 return -ERANGE;
11643
11644 /*
11645 * If the delta tsc can't fit in the 32 bit after the multi shift,
11646 * we can't use the preemption timer.
11647 * It's possible that it fits on later vmentries, but checking
11648 * on every vmentry is costly so we just use an hrtimer.
11649 */
11650 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
11651 return -ERANGE;
11652
11653 vmx->hv_deadline_tsc = tscl + delta_tsc;
11654 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
11655 PIN_BASED_VMX_PREEMPTION_TIMER);
Wanpeng Lic8533542017-06-29 06:28:09 -070011656
11657 return delta_tsc == 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070011658}
11659
11660static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
11661{
11662 struct vcpu_vmx *vmx = to_vmx(vcpu);
11663 vmx->hv_deadline_tsc = -1;
11664 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
11665 PIN_BASED_VMX_PREEMPTION_TIMER);
11666}
11667#endif
11668
Paolo Bonzini48d89b92014-08-26 13:27:46 +020011669static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
Radim Krčmářae97a3b2014-08-21 18:08:06 +020011670{
Radim Krčmářb4a2d312014-08-21 18:08:08 +020011671 if (ple_gap)
11672 shrink_ple_window(vcpu);
Radim Krčmářae97a3b2014-08-21 18:08:06 +020011673}
11674
Kai Huang843e4332015-01-28 10:54:28 +080011675static void vmx_slot_enable_log_dirty(struct kvm *kvm,
11676 struct kvm_memory_slot *slot)
11677{
11678 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
11679 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
11680}
11681
11682static void vmx_slot_disable_log_dirty(struct kvm *kvm,
11683 struct kvm_memory_slot *slot)
11684{
11685 kvm_mmu_slot_set_dirty(kvm, slot);
11686}
11687
11688static void vmx_flush_log_dirty(struct kvm *kvm)
11689{
11690 kvm_flush_pml_buffers(kvm);
11691}
11692
Bandan Dasc5f983f2017-05-05 15:25:14 -040011693static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
11694{
11695 struct vmcs12 *vmcs12;
11696 struct vcpu_vmx *vmx = to_vmx(vcpu);
11697 gpa_t gpa;
11698 struct page *page = NULL;
11699 u64 *pml_address;
11700
11701 if (is_guest_mode(vcpu)) {
11702 WARN_ON_ONCE(vmx->nested.pml_full);
11703
11704 /*
11705 * Check if PML is enabled for the nested guest.
11706 * Whether eptp bit 6 is set is already checked
11707 * as part of A/D emulation.
11708 */
11709 vmcs12 = get_vmcs12(vcpu);
11710 if (!nested_cpu_has_pml(vmcs12))
11711 return 0;
11712
Dan Carpenter47698862017-05-10 22:43:17 +030011713 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
Bandan Dasc5f983f2017-05-05 15:25:14 -040011714 vmx->nested.pml_full = true;
11715 return 1;
11716 }
11717
11718 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
11719
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011720 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
11721 if (is_error_page(page))
Bandan Dasc5f983f2017-05-05 15:25:14 -040011722 return 0;
11723
11724 pml_address = kmap(page);
11725 pml_address[vmcs12->guest_pml_index--] = gpa;
11726 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011727 kvm_release_page_clean(page);
Bandan Dasc5f983f2017-05-05 15:25:14 -040011728 }
11729
11730 return 0;
11731}
11732
Kai Huang843e4332015-01-28 10:54:28 +080011733static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
11734 struct kvm_memory_slot *memslot,
11735 gfn_t offset, unsigned long mask)
11736{
11737 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
11738}
11739
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011740static void __pi_post_block(struct kvm_vcpu *vcpu)
11741{
11742 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
11743 struct pi_desc old, new;
11744 unsigned int dest;
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011745
11746 do {
11747 old.control = new.control = pi_desc->control;
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011748 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
11749 "Wakeup handler not enabled while the VCPU is blocked\n");
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011750
11751 dest = cpu_physical_id(vcpu->cpu);
11752
11753 if (x2apic_enabled())
11754 new.ndst = dest;
11755 else
11756 new.ndst = (dest << 8) & 0xFF00;
11757
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011758 /* set 'NV' to 'notification vector' */
11759 new.nv = POSTED_INTR_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020011760 } while (cmpxchg64(&pi_desc->control, old.control,
11761 new.control) != old.control);
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011762
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011763 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
11764 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011765 list_del(&vcpu->blocked_vcpu_list);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011766 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011767 vcpu->pre_pcpu = -1;
11768 }
11769}
11770
Feng Wuefc64402015-09-18 22:29:51 +080011771/*
Feng Wubf9f6ac2015-09-18 22:29:55 +080011772 * This routine does the following things for vCPU which is going
11773 * to be blocked if VT-d PI is enabled.
11774 * - Store the vCPU to the wakeup list, so when interrupts happen
11775 * we can find the right vCPU to wake up.
11776 * - Change the Posted-interrupt descriptor as below:
11777 * 'NDST' <-- vcpu->pre_pcpu
11778 * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
11779 * - If 'ON' is set during this process, which means at least one
11780 * interrupt is posted for this vCPU, we cannot block it, in
11781 * this case, return 1, otherwise, return 0.
11782 *
11783 */
Yunhong Jiangbc225122016-06-13 14:19:58 -070011784static int pi_pre_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080011785{
Feng Wubf9f6ac2015-09-18 22:29:55 +080011786 unsigned int dest;
11787 struct pi_desc old, new;
11788 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
11789
11790 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080011791 !irq_remapping_cap(IRQ_POSTING_CAP) ||
11792 !kvm_vcpu_apicv_active(vcpu))
Feng Wubf9f6ac2015-09-18 22:29:55 +080011793 return 0;
11794
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011795 WARN_ON(irqs_disabled());
11796 local_irq_disable();
11797 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
11798 vcpu->pre_pcpu = vcpu->cpu;
11799 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11800 list_add_tail(&vcpu->blocked_vcpu_list,
11801 &per_cpu(blocked_vcpu_on_cpu,
11802 vcpu->pre_pcpu));
11803 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
11804 }
Feng Wubf9f6ac2015-09-18 22:29:55 +080011805
11806 do {
11807 old.control = new.control = pi_desc->control;
11808
Feng Wubf9f6ac2015-09-18 22:29:55 +080011809 WARN((pi_desc->sn == 1),
11810 "Warning: SN field of posted-interrupts "
11811 "is set before blocking\n");
11812
11813 /*
11814 * Since vCPU can be preempted during this process,
11815 * vcpu->cpu could be different with pre_pcpu, we
11816 * need to set pre_pcpu as the destination of wakeup
11817 * notification event, then we can find the right vCPU
11818 * to wakeup in wakeup handler if interrupts happen
11819 * when the vCPU is in blocked state.
11820 */
11821 dest = cpu_physical_id(vcpu->pre_pcpu);
11822
11823 if (x2apic_enabled())
11824 new.ndst = dest;
11825 else
11826 new.ndst = (dest << 8) & 0xFF00;
11827
11828 /* set 'NV' to 'wakeup vector' */
11829 new.nv = POSTED_INTR_WAKEUP_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020011830 } while (cmpxchg64(&pi_desc->control, old.control,
11831 new.control) != old.control);
Feng Wubf9f6ac2015-09-18 22:29:55 +080011832
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011833 /* We should not block the vCPU if an interrupt is posted for it. */
11834 if (pi_test_on(pi_desc) == 1)
11835 __pi_post_block(vcpu);
11836
11837 local_irq_enable();
11838 return (vcpu->pre_pcpu == -1);
Feng Wubf9f6ac2015-09-18 22:29:55 +080011839}
11840
Yunhong Jiangbc225122016-06-13 14:19:58 -070011841static int vmx_pre_block(struct kvm_vcpu *vcpu)
11842{
11843 if (pi_pre_block(vcpu))
11844 return 1;
11845
Yunhong Jiang64672c92016-06-13 14:19:59 -070011846 if (kvm_lapic_hv_timer_in_use(vcpu))
11847 kvm_lapic_switch_to_sw_timer(vcpu);
11848
Yunhong Jiangbc225122016-06-13 14:19:58 -070011849 return 0;
11850}
11851
11852static void pi_post_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080011853{
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011854 if (vcpu->pre_pcpu == -1)
Feng Wubf9f6ac2015-09-18 22:29:55 +080011855 return;
11856
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011857 WARN_ON(irqs_disabled());
11858 local_irq_disable();
Paolo Bonzinicd39e112017-06-06 12:57:04 +020011859 __pi_post_block(vcpu);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020011860 local_irq_enable();
Feng Wubf9f6ac2015-09-18 22:29:55 +080011861}
11862
Yunhong Jiangbc225122016-06-13 14:19:58 -070011863static void vmx_post_block(struct kvm_vcpu *vcpu)
11864{
Yunhong Jiang64672c92016-06-13 14:19:59 -070011865 if (kvm_x86_ops->set_hv_timer)
11866 kvm_lapic_switch_to_hv_timer(vcpu);
11867
Yunhong Jiangbc225122016-06-13 14:19:58 -070011868 pi_post_block(vcpu);
11869}
11870
Feng Wubf9f6ac2015-09-18 22:29:55 +080011871/*
Feng Wuefc64402015-09-18 22:29:51 +080011872 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
11873 *
11874 * @kvm: kvm
11875 * @host_irq: host irq of the interrupt
11876 * @guest_irq: gsi of the interrupt
11877 * @set: set or unset PI
11878 * returns 0 on success, < 0 on failure
11879 */
11880static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
11881 uint32_t guest_irq, bool set)
11882{
11883 struct kvm_kernel_irq_routing_entry *e;
11884 struct kvm_irq_routing_table *irq_rt;
11885 struct kvm_lapic_irq irq;
11886 struct kvm_vcpu *vcpu;
11887 struct vcpu_data vcpu_info;
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010011888 int idx, ret = 0;
Feng Wuefc64402015-09-18 22:29:51 +080011889
11890 if (!kvm_arch_has_assigned_device(kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080011891 !irq_remapping_cap(IRQ_POSTING_CAP) ||
11892 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
Feng Wuefc64402015-09-18 22:29:51 +080011893 return 0;
11894
11895 idx = srcu_read_lock(&kvm->irq_srcu);
11896 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010011897 if (guest_irq >= irq_rt->nr_rt_entries ||
11898 hlist_empty(&irq_rt->map[guest_irq])) {
11899 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
11900 guest_irq, irq_rt->nr_rt_entries);
11901 goto out;
11902 }
Feng Wuefc64402015-09-18 22:29:51 +080011903
11904 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
11905 if (e->type != KVM_IRQ_ROUTING_MSI)
11906 continue;
11907 /*
11908 * VT-d PI cannot support posting multicast/broadcast
11909 * interrupts to a vCPU, we still use interrupt remapping
11910 * for these kind of interrupts.
11911 *
11912 * For lowest-priority interrupts, we only support
11913 * those with single CPU as the destination, e.g. user
11914 * configures the interrupts via /proc/irq or uses
11915 * irqbalance to make the interrupts single-CPU.
11916 *
11917 * We will support full lowest-priority interrupt later.
11918 */
11919
Radim Krčmář371313132016-07-12 22:09:27 +020011920 kvm_set_msi_irq(kvm, e, &irq);
Feng Wu23a1c252016-01-25 16:53:32 +080011921 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
11922 /*
11923 * Make sure the IRTE is in remapped mode if
11924 * we don't handle it in posted mode.
11925 */
11926 ret = irq_set_vcpu_affinity(host_irq, NULL);
11927 if (ret < 0) {
11928 printk(KERN_INFO
11929 "failed to back to remapped mode, irq: %u\n",
11930 host_irq);
11931 goto out;
11932 }
11933
Feng Wuefc64402015-09-18 22:29:51 +080011934 continue;
Feng Wu23a1c252016-01-25 16:53:32 +080011935 }
Feng Wuefc64402015-09-18 22:29:51 +080011936
11937 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
11938 vcpu_info.vector = irq.vector;
11939
Feng Wub6ce9782016-01-25 16:53:35 +080011940 trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
Feng Wuefc64402015-09-18 22:29:51 +080011941 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
11942
11943 if (set)
11944 ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
Haozhong Zhangdc91f2eb2017-09-18 09:56:49 +080011945 else
Feng Wuefc64402015-09-18 22:29:51 +080011946 ret = irq_set_vcpu_affinity(host_irq, NULL);
Feng Wuefc64402015-09-18 22:29:51 +080011947
11948 if (ret < 0) {
11949 printk(KERN_INFO "%s: failed to update PI IRTE\n",
11950 __func__);
11951 goto out;
11952 }
11953 }
11954
11955 ret = 0;
11956out:
11957 srcu_read_unlock(&kvm->irq_srcu, idx);
11958 return ret;
11959}
11960
Ashok Rajc45dcc72016-06-22 14:59:56 +080011961static void vmx_setup_mce(struct kvm_vcpu *vcpu)
11962{
11963 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
11964 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
11965 FEATURE_CONTROL_LMCE;
11966 else
11967 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
11968 ~FEATURE_CONTROL_LMCE;
11969}
11970
Ladi Prosek72d7b372017-10-11 16:54:41 +020011971static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
11972{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011973 /* we need a nested vmexit to enter SMM, postpone if run is pending */
11974 if (to_vmx(vcpu)->nested.nested_run_pending)
11975 return 0;
Ladi Prosek72d7b372017-10-11 16:54:41 +020011976 return 1;
11977}
11978
Ladi Prosek0234bf82017-10-11 16:54:40 +020011979static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
11980{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011981 struct vcpu_vmx *vmx = to_vmx(vcpu);
11982
11983 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
11984 if (vmx->nested.smm.guest_mode)
11985 nested_vmx_vmexit(vcpu, -1, 0, 0);
11986
11987 vmx->nested.smm.vmxon = vmx->nested.vmxon;
11988 vmx->nested.vmxon = false;
Ladi Prosek0234bf82017-10-11 16:54:40 +020011989 return 0;
11990}
11991
11992static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
11993{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020011994 struct vcpu_vmx *vmx = to_vmx(vcpu);
11995 int ret;
11996
11997 if (vmx->nested.smm.vmxon) {
11998 vmx->nested.vmxon = true;
11999 vmx->nested.smm.vmxon = false;
12000 }
12001
12002 if (vmx->nested.smm.guest_mode) {
12003 vcpu->arch.hflags &= ~HF_SMM_MASK;
12004 ret = enter_vmx_non_root_mode(vcpu, false);
12005 vcpu->arch.hflags |= HF_SMM_MASK;
12006 if (ret)
12007 return ret;
12008
12009 vmx->nested.smm.guest_mode = false;
12010 }
Ladi Prosek0234bf82017-10-11 16:54:40 +020012011 return 0;
12012}
12013
Ladi Prosekcc3d9672017-10-17 16:02:39 +020012014static int enable_smi_window(struct kvm_vcpu *vcpu)
12015{
12016 return 0;
12017}
12018
Kees Cook404f6aa2016-08-08 16:29:06 -070012019static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
Avi Kivity6aa8b732006-12-10 02:21:36 -080012020 .cpu_has_kvm_support = cpu_has_kvm_support,
12021 .disabled_by_bios = vmx_disabled_by_bios,
12022 .hardware_setup = hardware_setup,
12023 .hardware_unsetup = hardware_unsetup,
Yang, Sheng002c7f72007-07-31 14:23:01 +030012024 .check_processor_compatibility = vmx_check_processor_compat,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012025 .hardware_enable = hardware_enable,
12026 .hardware_disable = hardware_disable,
Sheng Yang04547152009-04-01 15:52:31 +080012027 .cpu_has_accelerated_tpr = report_flexpriority,
Paolo Bonzini6d396b52015-04-01 14:25:33 +020012028 .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012029
12030 .vcpu_create = vmx_create_vcpu,
12031 .vcpu_free = vmx_free_vcpu,
Avi Kivity04d2cc72007-09-10 18:10:54 +030012032 .vcpu_reset = vmx_vcpu_reset,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012033
Avi Kivity04d2cc72007-09-10 18:10:54 +030012034 .prepare_guest_switch = vmx_save_host_state,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012035 .vcpu_load = vmx_vcpu_load,
12036 .vcpu_put = vmx_vcpu_put,
12037
Paolo Bonzinia96036b2015-11-10 11:55:36 +010012038 .update_bp_intercept = update_exception_bitmap,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012039 .get_msr = vmx_get_msr,
12040 .set_msr = vmx_set_msr,
12041 .get_segment_base = vmx_get_segment_base,
12042 .get_segment = vmx_get_segment,
12043 .set_segment = vmx_set_segment,
Izik Eidus2e4d2652008-03-24 19:38:34 +020012044 .get_cpl = vmx_get_cpl,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012045 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
Avi Kivitye8467fd2009-12-29 18:43:06 +020012046 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
Avi Kivityaff48ba2010-12-05 18:56:11 +020012047 .decache_cr3 = vmx_decache_cr3,
Anthony Liguori25c4c272007-04-27 09:29:21 +030012048 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012049 .set_cr0 = vmx_set_cr0,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012050 .set_cr3 = vmx_set_cr3,
12051 .set_cr4 = vmx_set_cr4,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012052 .set_efer = vmx_set_efer,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012053 .get_idt = vmx_get_idt,
12054 .set_idt = vmx_set_idt,
12055 .get_gdt = vmx_get_gdt,
12056 .set_gdt = vmx_set_gdt,
Jan Kiszka73aaf249e2014-01-04 18:47:16 +010012057 .get_dr6 = vmx_get_dr6,
12058 .set_dr6 = vmx_set_dr6,
Gleb Natapov020df072010-04-13 10:05:23 +030012059 .set_dr7 = vmx_set_dr7,
Paolo Bonzini81908bf2014-02-21 10:32:27 +010012060 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030012061 .cache_reg = vmx_cache_reg,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012062 .get_rflags = vmx_get_rflags,
12063 .set_rflags = vmx_set_rflags,
Huaitong Hanbe94f6b2016-03-22 16:51:20 +080012064
Avi Kivity6aa8b732006-12-10 02:21:36 -080012065 .tlb_flush = vmx_flush_tlb,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012066
Avi Kivity6aa8b732006-12-10 02:21:36 -080012067 .run = vmx_vcpu_run,
Avi Kivity6062d012009-03-23 17:35:17 +020012068 .handle_exit = vmx_handle_exit,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012069 .skip_emulated_instruction = skip_emulated_instruction,
Glauber Costa2809f5d2009-05-12 16:21:05 -040012070 .set_interrupt_shadow = vmx_set_interrupt_shadow,
12071 .get_interrupt_shadow = vmx_get_interrupt_shadow,
Ingo Molnar102d8322007-02-19 14:37:47 +020012072 .patch_hypercall = vmx_patch_hypercall,
Eddie Dong2a8067f2007-08-06 16:29:07 +030012073 .set_irq = vmx_inject_irq,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030012074 .set_nmi = vmx_inject_nmi,
Avi Kivity298101d2007-11-25 13:41:11 +020012075 .queue_exception = vmx_queue_exception,
Avi Kivityb463a6f2010-07-20 15:06:17 +030012076 .cancel_injection = vmx_cancel_injection,
Gleb Natapov78646122009-03-23 12:12:11 +020012077 .interrupt_allowed = vmx_interrupt_allowed,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030012078 .nmi_allowed = vmx_nmi_allowed,
Jan Kiszka3cfc3092009-11-12 01:04:25 +010012079 .get_nmi_mask = vmx_get_nmi_mask,
12080 .set_nmi_mask = vmx_set_nmi_mask,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030012081 .enable_nmi_window = enable_nmi_window,
12082 .enable_irq_window = enable_irq_window,
12083 .update_cr8_intercept = update_cr8_intercept,
Yang Zhang8d146952013-01-25 10:18:50 +080012084 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
Tang Chen38b99172014-09-24 15:57:54 +080012085 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
Andrey Smetanind62caab2015-11-10 15:36:33 +030012086 .get_enable_apicv = vmx_get_enable_apicv,
12087 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
Yang Zhangc7c9c562013-01-25 10:18:51 +080012088 .load_eoi_exitmap = vmx_load_eoi_exitmap,
Paolo Bonzini967235d2016-12-19 14:03:45 +010012089 .apicv_post_state_restore = vmx_apicv_post_state_restore,
Yang Zhangc7c9c562013-01-25 10:18:51 +080012090 .hwapic_irr_update = vmx_hwapic_irr_update,
12091 .hwapic_isr_update = vmx_hwapic_isr_update,
Yang Zhanga20ed542013-04-11 19:25:15 +080012092 .sync_pir_to_irr = vmx_sync_pir_to_irr,
12093 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030012094
Izik Eiduscbc94022007-10-25 00:29:55 +020012095 .set_tss_addr = vmx_set_tss_addr,
Sheng Yang67253af2008-04-25 10:20:22 +080012096 .get_tdp_level = get_ept_level,
Sheng Yang4b12f0d2009-04-27 20:35:42 +080012097 .get_mt_mask = vmx_get_mt_mask,
Marcelo Tosatti229456f2009-06-17 09:22:14 -030012098
Avi Kivity586f9602010-11-18 13:09:54 +020012099 .get_exit_info = vmx_get_exit_info,
Avi Kivity586f9602010-11-18 13:09:54 +020012100
Sheng Yang17cc3932010-01-05 19:02:27 +080012101 .get_lpage_level = vmx_get_lpage_level,
Sheng Yang0e851882009-12-18 16:48:46 +080012102
12103 .cpuid_update = vmx_cpuid_update,
Sheng Yang4e47c7a2009-12-18 16:48:47 +080012104
12105 .rdtscp_supported = vmx_rdtscp_supported,
Mao, Junjiead756a12012-07-02 01:18:48 +000012106 .invpcid_supported = vmx_invpcid_supported,
Joerg Roedeld4330ef2010-04-22 12:33:11 +020012107
12108 .set_supported_cpuid = vmx_set_supported_cpuid,
Sheng Yangf5f48ee2010-06-30 12:25:15 +080012109
12110 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
Zachary Amsden99e3e302010-08-19 22:07:17 -100012111
12112 .write_tsc_offset = vmx_write_tsc_offset,
Joerg Roedel1c97f0a2010-09-10 17:30:41 +020012113
12114 .set_tdp_cr3 = vmx_set_cr3,
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020012115
12116 .check_intercept = vmx_check_intercept,
Yang Zhanga547c6d2013-04-11 19:25:10 +080012117 .handle_external_intr = vmx_handle_external_intr,
Liu, Jinsongda8999d2014-02-24 10:55:46 +000012118 .mpx_supported = vmx_mpx_supported,
Wanpeng Li55412b22014-12-02 19:21:30 +080012119 .xsaves_supported = vmx_xsaves_supported,
Paolo Bonzini66336ca2016-07-12 10:36:41 +020012120 .umip_emulated = vmx_umip_emulated,
Jan Kiszkab6b8a142014-03-07 20:03:12 +010012121
12122 .check_nested_events = vmx_check_nested_events,
Radim Krčmářae97a3b2014-08-21 18:08:06 +020012123
12124 .sched_in = vmx_sched_in,
Kai Huang843e4332015-01-28 10:54:28 +080012125
12126 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
12127 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
12128 .flush_log_dirty = vmx_flush_log_dirty,
12129 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
Bandan Dasc5f983f2017-05-05 15:25:14 -040012130 .write_log_dirty = vmx_write_pml_buffer,
Wei Huang25462f72015-06-19 15:45:05 +020012131
Feng Wubf9f6ac2015-09-18 22:29:55 +080012132 .pre_block = vmx_pre_block,
12133 .post_block = vmx_post_block,
12134
Wei Huang25462f72015-06-19 15:45:05 +020012135 .pmu_ops = &intel_pmu_ops,
Feng Wuefc64402015-09-18 22:29:51 +080012136
12137 .update_pi_irte = vmx_update_pi_irte,
Yunhong Jiang64672c92016-06-13 14:19:59 -070012138
12139#ifdef CONFIG_X86_64
12140 .set_hv_timer = vmx_set_hv_timer,
12141 .cancel_hv_timer = vmx_cancel_hv_timer,
12142#endif
Ashok Rajc45dcc72016-06-22 14:59:56 +080012143
12144 .setup_mce = vmx_setup_mce,
Ladi Prosek0234bf82017-10-11 16:54:40 +020012145
Ladi Prosek72d7b372017-10-11 16:54:41 +020012146 .smi_allowed = vmx_smi_allowed,
Ladi Prosek0234bf82017-10-11 16:54:40 +020012147 .pre_enter_smm = vmx_pre_enter_smm,
12148 .pre_leave_smm = vmx_pre_leave_smm,
Ladi Prosekcc3d9672017-10-17 16:02:39 +020012149 .enable_smi_window = enable_smi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -080012150};
12151
12152static int __init vmx_init(void)
12153{
Tiejun Chen34a1cd62014-10-28 10:14:48 +080012154 int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
12155 __alignof__(struct vcpu_vmx), THIS_MODULE);
He, Qingfdef3ad2007-04-30 09:45:24 +030012156 if (r)
Tiejun Chen34a1cd62014-10-28 10:14:48 +080012157 return r;
Sheng Yang25c5f222008-03-28 13:18:56 +080012158
Dave Young2965faa2015-09-09 15:38:55 -070012159#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +080012160 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
12161 crash_vmclear_local_loaded_vmcss);
12162#endif
12163
He, Qingfdef3ad2007-04-30 09:45:24 +030012164 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -080012165}
12166
12167static void __exit vmx_exit(void)
12168{
Dave Young2965faa2015-09-09 15:38:55 -070012169#ifdef CONFIG_KEXEC_CORE
Monam Agarwal3b63a432014-03-22 12:28:10 +053012170 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
Zhang Yanfei8f536b72012-12-06 23:43:34 +080012171 synchronize_rcu();
12172#endif
12173
Zhang Xiantaocb498ea2007-11-14 20:39:31 +080012174 kvm_exit();
Avi Kivity6aa8b732006-12-10 02:21:36 -080012175}
12176
12177module_init(vmx_init)
12178module_exit(vmx_exit)