blob: abeeb45d1c33d35b9e5ee0840f45bf29e7146dea [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
Nicolas Kaiser9611c182010-10-06 14:23:22 +02008 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity6aa8b732006-12-10 02:21:36 -08009 *
10 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2. See
15 * the COPYING file in the top-level directory.
16 *
17 */
18
Eddie Dong85f455f2007-07-06 12:20:49 +030019#include "irq.h"
Zhang Xiantao1d737c82007-12-14 09:35:10 +080020#include "mmu.h"
Avi Kivity00b27a32011-11-23 16:30:32 +020021#include "cpuid.h"
Andrey Smetanind62caab2015-11-10 15:36:33 +030022#include "lapic.h"
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +020023#include "hyperv.h"
Avi Kivitye4956062007-06-28 14:15:57 -040024
Avi Kivityedf88412007-12-16 11:02:48 +020025#include <linux/kvm_host.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080026#include <linux/module.h>
Ahmed S. Darwish9d8f5492007-02-19 14:37:46 +020027#include <linux/kernel.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080028#include <linux/mm.h>
29#include <linux/highmem.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040030#include <linux/sched.h>
Avi Kivityc7addb92007-09-16 18:58:32 +020031#include <linux/moduleparam.h>
Josh Triplette9bda3b2012-03-20 23:33:51 -070032#include <linux/mod_devicetable.h>
Steven Rostedt (Red Hat)af658dc2015-04-29 14:36:05 -040033#include <linux/trace_events.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090034#include <linux/slab.h>
Shane Wangcafd6652010-04-29 12:09:01 -040035#include <linux/tboot.h>
Jan Kiszkaf41245002014-03-07 20:03:13 +010036#include <linux/hrtimer.h>
Josh Poimboeufc207aee2017-06-28 10:11:06 -050037#include <linux/frame.h>
Dan Williams085331d2018-01-31 17:47:03 -080038#include <linux/nospec.h>
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030039#include "kvm_cache_regs.h"
Avi Kivity35920a32008-07-03 14:50:12 +030040#include "x86.h"
Avi Kivitye4956062007-06-28 14:15:57 -040041
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +020042#include <asm/asm.h>
Feng Wu28b835d2015-09-18 22:29:54 +080043#include <asm/cpu.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080044#include <asm/io.h>
Anthony Liguori3b3be0d2006-12-13 00:33:43 -080045#include <asm/desc.h>
Eduardo Habkost13673a92008-11-17 19:03:13 -020046#include <asm/vmx.h>
Eduardo Habkost6210e372008-11-17 19:03:16 -020047#include <asm/virtext.h>
Andi Kleena0861c02009-06-08 17:37:09 +080048#include <asm/mce.h>
Ingo Molnar952f07e2015-04-26 16:56:05 +020049#include <asm/fpu/internal.h>
Gleb Natapovd7cd9792011-10-05 14:01:23 +020050#include <asm/perf_event.h>
Paolo Bonzini81908bf2014-02-21 10:32:27 +010051#include <asm/debugreg.h>
Zhang Yanfei8f536b72012-12-06 23:43:34 +080052#include <asm/kexec.h>
Radim Krčmářdab20872015-02-09 22:44:07 +010053#include <asm/apic.h>
Feng Wuefc64402015-09-18 22:29:51 +080054#include <asm/irq_remapping.h>
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070055#include <asm/mmu_context.h>
Thomas Gleixner28a27752018-04-29 15:01:37 +020056#include <asm/spec-ctrl.h>
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010057#include <asm/mshyperv.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080058
Marcelo Tosatti229456f2009-06-17 09:22:14 -030059#include "trace.h"
Wei Huang25462f72015-06-19 15:45:05 +020060#include "pmu.h"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010061#include "vmx_evmcs.h"
Marcelo Tosatti229456f2009-06-17 09:22:14 -030062
Avi Kivity4ecac3f2008-05-13 13:23:38 +030063#define __ex(x) __kvm_handle_fault_on_reboot(x)
Avi Kivity5e520e62011-05-15 10:13:12 -040064#define __ex_clear(x, reg) \
Uros Bizjak43ce76c2018-10-17 16:46:57 +020065 ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
Avi Kivity4ecac3f2008-05-13 13:23:38 +030066
Avi Kivity6aa8b732006-12-10 02:21:36 -080067MODULE_AUTHOR("Qumranet");
68MODULE_LICENSE("GPL");
69
Josh Triplette9bda3b2012-03-20 23:33:51 -070070static const struct x86_cpu_id vmx_cpu_id[] = {
71 X86_FEATURE_MATCH(X86_FEATURE_VMX),
72 {}
73};
74MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
75
Rusty Russell476bc002012-01-13 09:32:18 +103076static bool __read_mostly enable_vpid = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020077module_param_named(vpid, enable_vpid, bool, 0444);
Sheng Yang2384d2b2008-01-17 15:14:33 +080078
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010079static bool __read_mostly enable_vnmi = 1;
80module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
81
Rusty Russell476bc002012-01-13 09:32:18 +103082static bool __read_mostly flexpriority_enabled = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020083module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
Avi Kivity4c9fc8e2008-03-24 18:15:14 +020084
Rusty Russell476bc002012-01-13 09:32:18 +103085static bool __read_mostly enable_ept = 1;
Avi Kivity736caef2009-03-23 17:39:48 +020086module_param_named(ept, enable_ept, bool, S_IRUGO);
Sheng Yangd56f5462008-04-25 10:13:16 +080087
Rusty Russell476bc002012-01-13 09:32:18 +103088static bool __read_mostly enable_unrestricted_guest = 1;
Nitin A Kamble3a624e22009-06-08 11:34:16 -070089module_param_named(unrestricted_guest,
90 enable_unrestricted_guest, bool, S_IRUGO);
91
Xudong Hao83c3a332012-05-28 19:33:35 +080092static bool __read_mostly enable_ept_ad_bits = 1;
93module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
94
Avi Kivitya27685c2012-06-12 20:30:18 +030095static bool __read_mostly emulate_invalid_guest_state = true;
Avi Kivityc1f8bc02009-03-23 15:41:17 +020096module_param(emulate_invalid_guest_state, bool, S_IRUGO);
Mohammed Gamal04fa4d32008-08-17 16:39:48 +030097
Rusty Russell476bc002012-01-13 09:32:18 +103098static bool __read_mostly fasteoi = 1;
Kevin Tian58fbbf22011-08-30 13:56:17 +030099module_param(fasteoi, bool, S_IRUGO);
100
Yang Zhang5a717852013-04-11 19:25:16 +0800101static bool __read_mostly enable_apicv = 1;
Yang Zhang01e439b2013-04-11 19:25:12 +0800102module_param(enable_apicv, bool, S_IRUGO);
Yang Zhang83d4c282013-01-25 10:18:49 +0800103
Abel Gordonabc4fc52013-04-18 14:35:25 +0300104static bool __read_mostly enable_shadow_vmcs = 1;
105module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
Nadav Har'El801d3422011-05-25 23:02:23 +0300106/*
107 * If nested=1, nested virtualization is supported, i.e., guests may use
108 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
109 * use VMX instructions.
110 */
Paolo Bonzini1e58e5e2018-10-17 00:55:22 +0200111static bool __read_mostly nested = 1;
Nadav Har'El801d3422011-05-25 23:02:23 +0300112module_param(nested, bool, S_IRUGO);
113
Sean Christopherson52017602018-09-26 09:23:57 -0700114static bool __read_mostly nested_early_check = 0;
115module_param(nested_early_check, bool, S_IRUGO);
116
Wanpeng Li20300092014-12-02 19:14:59 +0800117static u64 __read_mostly host_xss;
118
Kai Huang843e4332015-01-28 10:54:28 +0800119static bool __read_mostly enable_pml = 1;
120module_param_named(pml, enable_pml, bool, S_IRUGO);
121
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100122#define MSR_TYPE_R 1
123#define MSR_TYPE_W 2
124#define MSR_TYPE_RW 3
125
126#define MSR_BITMAP_MODE_X2APIC 1
127#define MSR_BITMAP_MODE_X2APIC_APICV 2
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100128
Haozhong Zhang64903d62015-10-20 15:39:09 +0800129#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
130
Yunhong Jiang64672c92016-06-13 14:19:59 -0700131/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
132static int __read_mostly cpu_preemption_timer_multi;
133static bool __read_mostly enable_preemption_timer = 1;
134#ifdef CONFIG_X86_64
135module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
136#endif
137
Sean Christopherson3de63472018-07-13 08:42:30 -0700138#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
Sean Christopherson1706bd02018-03-05 12:04:38 -0800139#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
140#define KVM_VM_CR0_ALWAYS_ON \
141 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
142 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
Avi Kivity4c386092009-12-07 12:26:18 +0200143#define KVM_CR4_GUEST_OWNED_BITS \
144 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
Yu Zhangfd8cb432017-08-24 20:27:56 +0800145 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
Avi Kivity4c386092009-12-07 12:26:18 +0200146
Sean Christopherson5dc1f042018-03-05 12:04:39 -0800147#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
Avi Kivitycdc0e242009-12-06 17:21:14 +0200148#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
149#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
150
Avi Kivity78ac8b42010-04-08 18:19:35 +0300151#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
152
Jan Kiszkaf41245002014-03-07 20:03:13 +0100153#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
154
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800155/*
Jan Dakinevich16c2aec2016-10-28 07:00:30 +0300156 * Hyper-V requires all of these, so mark them as supported even though
157 * they are just treated the same as all-context.
158 */
159#define VMX_VPID_EXTENT_SUPPORTED_MASK \
160 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
161 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
162 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
163 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
164
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800165/*
166 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
167 * ple_gap: upper bound on the amount of time between two successive
168 * executions of PAUSE in a loop. Also indicate if ple enabled.
Rik van Riel00c25bc2011-01-04 09:51:33 -0500169 * According to test, this time is usually smaller than 128 cycles.
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800170 * ple_window: upper bound on the amount of time a guest is allowed to execute
171 * in a PAUSE loop. Tests indicate that most spinlocks are held for
172 * less than 2^12 cycles
173 * Time is measured based on a counter that runs at the same rate as the TSC,
174 * refer SDM volume 3b section 21.6.13 & 22.1.3.
175 */
Babu Mogerc8e88712018-03-16 16:37:24 -0400176static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200177
Babu Moger7fbc85a2018-03-16 16:37:22 -0400178static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
179module_param(ple_window, uint, 0444);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +0800180
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200181/* Default doubles per-vcpu window every exit. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400182static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400183module_param(ple_window_grow, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200184
185/* Default resets per-vcpu window every exit to ple_window. */
Babu Mogerc8e88712018-03-16 16:37:24 -0400186static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
Babu Moger7fbc85a2018-03-16 16:37:22 -0400187module_param(ple_window_shrink, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200188
189/* Default is to compute the maximum so we can never overflow. */
Babu Moger7fbc85a2018-03-16 16:37:22 -0400190static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
191module_param(ple_window_max, uint, 0444);
Radim Krčmářb4a2d312014-08-21 18:08:08 +0200192
Avi Kivity83287ea422012-09-16 15:10:57 +0300193extern const ulong vmx_return;
Sean Christopherson52017602018-09-26 09:23:57 -0700194extern const ulong vmx_early_consistency_check_return;
Avi Kivity83287ea422012-09-16 15:10:57 +0300195
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200196static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
Nicolai Stange427362a2018-07-21 22:25:00 +0200197static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200198static DEFINE_MUTEX(vmx_l1d_flush_mutex);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200199
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200200/* Storage for pre module init parameter parsing */
201static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200202
203static const struct {
204 const char *option;
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200205 bool for_parse;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200206} vmentry_l1d_param[] = {
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200207 [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
208 [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
209 [VMENTER_L1D_FLUSH_COND] = {"cond", true},
210 [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
211 [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
212 [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200213};
214
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200215#define L1D_CACHE_ORDER 4
216static void *vmx_l1d_flush_pages;
217
218static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
219{
220 struct page *page;
Nicolai Stange288d1522018-07-18 19:07:38 +0200221 unsigned int i;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200222
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200223 if (!enable_ept) {
224 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
225 return 0;
226 }
227
Yi Wangd806afa2018-08-16 13:42:39 +0800228 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
229 u64 msr;
Paolo Bonzini8e0b2b92018-08-05 16:07:46 +0200230
Yi Wangd806afa2018-08-16 13:42:39 +0800231 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
232 if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
233 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
234 return 0;
235 }
236 }
Paolo Bonzini8e0b2b92018-08-05 16:07:46 +0200237
Jiri Kosinad90a7a02018-07-13 16:23:25 +0200238 /* If set to auto use the default l1tf mitigation method */
239 if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
240 switch (l1tf_mitigation) {
241 case L1TF_MITIGATION_OFF:
242 l1tf = VMENTER_L1D_FLUSH_NEVER;
243 break;
244 case L1TF_MITIGATION_FLUSH_NOWARN:
245 case L1TF_MITIGATION_FLUSH:
246 case L1TF_MITIGATION_FLUSH_NOSMT:
247 l1tf = VMENTER_L1D_FLUSH_COND;
248 break;
249 case L1TF_MITIGATION_FULL:
250 case L1TF_MITIGATION_FULL_FORCE:
251 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
252 break;
253 }
254 } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
255 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
256 }
257
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200258 if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
259 !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
260 page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
261 if (!page)
262 return -ENOMEM;
263 vmx_l1d_flush_pages = page_address(page);
Nicolai Stange288d1522018-07-18 19:07:38 +0200264
265 /*
266 * Initialize each page with a different pattern in
267 * order to protect against KSM in the nested
268 * virtualization case.
269 */
270 for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
271 memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
272 PAGE_SIZE);
273 }
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200274 }
275
276 l1tf_vmx_mitigation = l1tf;
277
Thomas Gleixner895ae472018-07-13 16:23:22 +0200278 if (l1tf != VMENTER_L1D_FLUSH_NEVER)
279 static_branch_enable(&vmx_l1d_should_flush);
280 else
281 static_branch_disable(&vmx_l1d_should_flush);
Thomas Gleixner4c6523e2018-07-13 16:23:20 +0200282
Nicolai Stange427362a2018-07-21 22:25:00 +0200283 if (l1tf == VMENTER_L1D_FLUSH_COND)
284 static_branch_enable(&vmx_l1d_flush_cond);
Thomas Gleixner895ae472018-07-13 16:23:22 +0200285 else
Nicolai Stange427362a2018-07-21 22:25:00 +0200286 static_branch_disable(&vmx_l1d_flush_cond);
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200287 return 0;
288}
289
290static int vmentry_l1d_flush_parse(const char *s)
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200291{
292 unsigned int i;
293
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200294 if (s) {
295 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200296 if (vmentry_l1d_param[i].for_parse &&
297 sysfs_streq(s, vmentry_l1d_param[i].option))
298 return i;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200299 }
300 }
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200301 return -EINVAL;
302}
303
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200304static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
305{
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200306 int l1tf, ret;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200307
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200308 l1tf = vmentry_l1d_flush_parse(s);
309 if (l1tf < 0)
310 return l1tf;
311
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200312 if (!boot_cpu_has(X86_BUG_L1TF))
313 return 0;
314
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200315 /*
316 * Has vmx_init() run already? If not then this is the pre init
317 * parameter parsing. In that case just store the value and let
318 * vmx_init() do the proper setup after enable_ept has been
319 * established.
320 */
321 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
322 vmentry_l1d_flush_param = l1tf;
323 return 0;
324 }
325
Thomas Gleixnerdd4bfa72018-07-13 16:23:21 +0200326 mutex_lock(&vmx_l1d_flush_mutex);
327 ret = vmx_setup_l1d_flush(l1tf);
328 mutex_unlock(&vmx_l1d_flush_mutex);
329 return ret;
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200330}
331
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200332static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
333{
Paolo Bonzini0027ff22018-08-22 16:43:39 +0200334 if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
335 return sprintf(s, "???\n");
336
Thomas Gleixner7db92e12018-07-13 16:23:19 +0200337 return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200338}
339
340static const struct kernel_param_ops vmentry_l1d_flush_ops = {
341 .set = vmentry_l1d_flush_set,
342 .get = vmentry_l1d_flush_get,
343};
Thomas Gleixner895ae472018-07-13 16:23:22 +0200344module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +0200345
Tianyu Lan877ad952018-07-19 08:40:23 +0000346enum ept_pointers_status {
347 EPT_POINTERS_CHECK = 0,
348 EPT_POINTERS_MATCH = 1,
349 EPT_POINTERS_MISMATCH = 2
350};
351
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700352struct kvm_vmx {
353 struct kvm kvm;
354
355 unsigned int tss_addr;
356 bool ept_identity_pagetable_done;
357 gpa_t ept_identity_map_addr;
Tianyu Lan877ad952018-07-19 08:40:23 +0000358
359 enum ept_pointers_status ept_pointers_match;
360 spinlock_t ept_pointer_lock;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -0700361};
362
Gleb Natapov8bf00a52011-10-05 14:01:22 +0200363#define NR_AUTOLOAD_MSRS 8
Avi Kivity61d2ef22010-04-28 16:40:38 +0300364
Liran Alon392b2f22018-06-23 02:35:01 +0300365struct vmcs_hdr {
366 u32 revision_id:31;
367 u32 shadow_vmcs:1;
368};
369
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400370struct vmcs {
Liran Alon392b2f22018-06-23 02:35:01 +0300371 struct vmcs_hdr hdr;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400372 u32 abort;
373 char data[0];
374};
375
Nadav Har'Eld462b812011-05-24 15:26:10 +0300376/*
Sean Christophersond7ee0392018-07-23 12:32:47 -0700377 * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
378 * and whose values change infrequently, but are not constant. I.e. this is
379 * used as a write-through cache of the corresponding VMCS fields.
380 */
381struct vmcs_host_state {
382 unsigned long cr3; /* May not match real cr3 */
383 unsigned long cr4; /* May not match real cr4 */
Sean Christopherson5e079c72018-07-23 12:32:50 -0700384 unsigned long gs_base;
385 unsigned long fs_base;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700386
387 u16 fs_sel, gs_sel, ldt_sel;
388#ifdef CONFIG_X86_64
389 u16 ds_sel, es_sel;
390#endif
391};
392
393/*
Nadav Har'Eld462b812011-05-24 15:26:10 +0300394 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
395 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
396 * loaded on this CPU (so we can clear them if the CPU goes down).
397 */
398struct loaded_vmcs {
399 struct vmcs *vmcs;
Jim Mattson355f4fb2016-10-28 08:29:39 -0700400 struct vmcs *shadow_vmcs;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300401 int cpu;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +0200402 bool launched;
403 bool nmi_known_unmasked;
Sean Christophersonf459a702018-08-27 15:21:11 -0700404 bool hv_timer_armed;
Paolo Bonzini8a1b4392017-11-06 13:31:12 +0100405 /* Support for vnmi-less CPUs */
406 int soft_vnmi_blocked;
407 ktime_t entry_time;
408 s64 vnmi_blocked_time;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100409 unsigned long *msr_bitmap;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300410 struct list_head loaded_vmcss_on_cpu_link;
Sean Christophersond7ee0392018-07-23 12:32:47 -0700411 struct vmcs_host_state host_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +0300412};
413
Avi Kivity26bb0982009-09-07 11:14:12 +0300414struct shared_msr_entry {
415 unsigned index;
416 u64 data;
Avi Kivityd5696722009-12-02 12:28:47 +0200417 u64 mask;
Avi Kivity26bb0982009-09-07 11:14:12 +0300418};
419
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300420/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300421 * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
422 * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
423 * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
424 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
425 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
426 * More than one of these structures may exist, if L1 runs multiple L2 guests.
Jim Mattsonde3a0022017-11-27 17:22:25 -0600427 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300428 * underlying hardware which will be used to run L2.
429 * This structure is packed to ensure that its layout is identical across
430 * machines (necessary for live migration).
Jim Mattsonb348e792018-05-01 15:40:27 -0700431 *
432 * IMPORTANT: Changing the layout of existing fields in this structure
433 * will break save/restore compatibility with older kvm releases. When
434 * adding new fields, either use space in the reserved padding* arrays
435 * or add the new fields to the end of the structure.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300436 */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300437typedef u64 natural_width;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300438struct __packed vmcs12 {
439 /* According to the Intel spec, a VMCS region must start with the
440 * following two fields. Then follow implementation-specific data.
441 */
Liran Alon392b2f22018-06-23 02:35:01 +0300442 struct vmcs_hdr hdr;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300443 u32 abort;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300444
Nadav Har'El27d6c862011-05-25 23:06:59 +0300445 u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
446 u32 padding[7]; /* room for future expansion */
447
Nadav Har'El22bd0352011-05-25 23:05:57 +0300448 u64 io_bitmap_a;
449 u64 io_bitmap_b;
450 u64 msr_bitmap;
451 u64 vm_exit_msr_store_addr;
452 u64 vm_exit_msr_load_addr;
453 u64 vm_entry_msr_load_addr;
454 u64 tsc_offset;
455 u64 virtual_apic_page_addr;
456 u64 apic_access_addr;
Wincy Van705699a2015-02-03 23:58:17 +0800457 u64 posted_intr_desc_addr;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300458 u64 ept_pointer;
Wincy Van608406e2015-02-03 23:57:51 +0800459 u64 eoi_exit_bitmap0;
460 u64 eoi_exit_bitmap1;
461 u64 eoi_exit_bitmap2;
462 u64 eoi_exit_bitmap3;
Wanpeng Li81dc01f2014-12-04 19:11:07 +0800463 u64 xss_exit_bitmap;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300464 u64 guest_physical_address;
465 u64 vmcs_link_pointer;
466 u64 guest_ia32_debugctl;
467 u64 guest_ia32_pat;
468 u64 guest_ia32_efer;
469 u64 guest_ia32_perf_global_ctrl;
470 u64 guest_pdptr0;
471 u64 guest_pdptr1;
472 u64 guest_pdptr2;
473 u64 guest_pdptr3;
Paolo Bonzini36be0b92014-02-24 12:30:04 +0100474 u64 guest_bndcfgs;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300475 u64 host_ia32_pat;
476 u64 host_ia32_efer;
477 u64 host_ia32_perf_global_ctrl;
Jim Mattsonb348e792018-05-01 15:40:27 -0700478 u64 vmread_bitmap;
479 u64 vmwrite_bitmap;
480 u64 vm_function_control;
481 u64 eptp_list_address;
482 u64 pml_address;
483 u64 padding64[3]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300484 /*
485 * To allow migration of L1 (complete with its L2 guests) between
486 * machines of different natural widths (32 or 64 bit), we cannot have
487 * unsigned long fields with no explict size. We use u64 (aliased
488 * natural_width) instead. Luckily, x86 is little-endian.
489 */
490 natural_width cr0_guest_host_mask;
491 natural_width cr4_guest_host_mask;
492 natural_width cr0_read_shadow;
493 natural_width cr4_read_shadow;
494 natural_width cr3_target_value0;
495 natural_width cr3_target_value1;
496 natural_width cr3_target_value2;
497 natural_width cr3_target_value3;
498 natural_width exit_qualification;
499 natural_width guest_linear_address;
500 natural_width guest_cr0;
501 natural_width guest_cr3;
502 natural_width guest_cr4;
503 natural_width guest_es_base;
504 natural_width guest_cs_base;
505 natural_width guest_ss_base;
506 natural_width guest_ds_base;
507 natural_width guest_fs_base;
508 natural_width guest_gs_base;
509 natural_width guest_ldtr_base;
510 natural_width guest_tr_base;
511 natural_width guest_gdtr_base;
512 natural_width guest_idtr_base;
513 natural_width guest_dr7;
514 natural_width guest_rsp;
515 natural_width guest_rip;
516 natural_width guest_rflags;
517 natural_width guest_pending_dbg_exceptions;
518 natural_width guest_sysenter_esp;
519 natural_width guest_sysenter_eip;
520 natural_width host_cr0;
521 natural_width host_cr3;
522 natural_width host_cr4;
523 natural_width host_fs_base;
524 natural_width host_gs_base;
525 natural_width host_tr_base;
526 natural_width host_gdtr_base;
527 natural_width host_idtr_base;
528 natural_width host_ia32_sysenter_esp;
529 natural_width host_ia32_sysenter_eip;
530 natural_width host_rsp;
531 natural_width host_rip;
532 natural_width paddingl[8]; /* room for future expansion */
533 u32 pin_based_vm_exec_control;
534 u32 cpu_based_vm_exec_control;
535 u32 exception_bitmap;
536 u32 page_fault_error_code_mask;
537 u32 page_fault_error_code_match;
538 u32 cr3_target_count;
539 u32 vm_exit_controls;
540 u32 vm_exit_msr_store_count;
541 u32 vm_exit_msr_load_count;
542 u32 vm_entry_controls;
543 u32 vm_entry_msr_load_count;
544 u32 vm_entry_intr_info_field;
545 u32 vm_entry_exception_error_code;
546 u32 vm_entry_instruction_len;
547 u32 tpr_threshold;
548 u32 secondary_vm_exec_control;
549 u32 vm_instruction_error;
550 u32 vm_exit_reason;
551 u32 vm_exit_intr_info;
552 u32 vm_exit_intr_error_code;
553 u32 idt_vectoring_info_field;
554 u32 idt_vectoring_error_code;
555 u32 vm_exit_instruction_len;
556 u32 vmx_instruction_info;
557 u32 guest_es_limit;
558 u32 guest_cs_limit;
559 u32 guest_ss_limit;
560 u32 guest_ds_limit;
561 u32 guest_fs_limit;
562 u32 guest_gs_limit;
563 u32 guest_ldtr_limit;
564 u32 guest_tr_limit;
565 u32 guest_gdtr_limit;
566 u32 guest_idtr_limit;
567 u32 guest_es_ar_bytes;
568 u32 guest_cs_ar_bytes;
569 u32 guest_ss_ar_bytes;
570 u32 guest_ds_ar_bytes;
571 u32 guest_fs_ar_bytes;
572 u32 guest_gs_ar_bytes;
573 u32 guest_ldtr_ar_bytes;
574 u32 guest_tr_ar_bytes;
575 u32 guest_interruptibility_info;
576 u32 guest_activity_state;
577 u32 guest_sysenter_cs;
578 u32 host_ia32_sysenter_cs;
Jan Kiszka0238ea92013-03-13 11:31:24 +0100579 u32 vmx_preemption_timer_value;
580 u32 padding32[7]; /* room for future expansion */
Nadav Har'El22bd0352011-05-25 23:05:57 +0300581 u16 virtual_processor_id;
Wincy Van705699a2015-02-03 23:58:17 +0800582 u16 posted_intr_nv;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300583 u16 guest_es_selector;
584 u16 guest_cs_selector;
585 u16 guest_ss_selector;
586 u16 guest_ds_selector;
587 u16 guest_fs_selector;
588 u16 guest_gs_selector;
589 u16 guest_ldtr_selector;
590 u16 guest_tr_selector;
Wincy Van608406e2015-02-03 23:57:51 +0800591 u16 guest_intr_status;
Nadav Har'El22bd0352011-05-25 23:05:57 +0300592 u16 host_es_selector;
593 u16 host_cs_selector;
594 u16 host_ss_selector;
595 u16 host_ds_selector;
596 u16 host_fs_selector;
597 u16 host_gs_selector;
598 u16 host_tr_selector;
Jim Mattsonb348e792018-05-01 15:40:27 -0700599 u16 guest_pml_index;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300600};
601
602/*
Jim Mattson21ebf532018-05-01 15:40:28 -0700603 * For save/restore compatibility, the vmcs12 field offsets must not change.
604 */
605#define CHECK_OFFSET(field, loc) \
606 BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
607 "Offset of " #field " in struct vmcs12 has changed.")
608
609static inline void vmx_check_vmcs12_offsets(void) {
Liran Alon392b2f22018-06-23 02:35:01 +0300610 CHECK_OFFSET(hdr, 0);
Jim Mattson21ebf532018-05-01 15:40:28 -0700611 CHECK_OFFSET(abort, 4);
612 CHECK_OFFSET(launch_state, 8);
613 CHECK_OFFSET(io_bitmap_a, 40);
614 CHECK_OFFSET(io_bitmap_b, 48);
615 CHECK_OFFSET(msr_bitmap, 56);
616 CHECK_OFFSET(vm_exit_msr_store_addr, 64);
617 CHECK_OFFSET(vm_exit_msr_load_addr, 72);
618 CHECK_OFFSET(vm_entry_msr_load_addr, 80);
619 CHECK_OFFSET(tsc_offset, 88);
620 CHECK_OFFSET(virtual_apic_page_addr, 96);
621 CHECK_OFFSET(apic_access_addr, 104);
622 CHECK_OFFSET(posted_intr_desc_addr, 112);
623 CHECK_OFFSET(ept_pointer, 120);
624 CHECK_OFFSET(eoi_exit_bitmap0, 128);
625 CHECK_OFFSET(eoi_exit_bitmap1, 136);
626 CHECK_OFFSET(eoi_exit_bitmap2, 144);
627 CHECK_OFFSET(eoi_exit_bitmap3, 152);
628 CHECK_OFFSET(xss_exit_bitmap, 160);
629 CHECK_OFFSET(guest_physical_address, 168);
630 CHECK_OFFSET(vmcs_link_pointer, 176);
631 CHECK_OFFSET(guest_ia32_debugctl, 184);
632 CHECK_OFFSET(guest_ia32_pat, 192);
633 CHECK_OFFSET(guest_ia32_efer, 200);
634 CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
635 CHECK_OFFSET(guest_pdptr0, 216);
636 CHECK_OFFSET(guest_pdptr1, 224);
637 CHECK_OFFSET(guest_pdptr2, 232);
638 CHECK_OFFSET(guest_pdptr3, 240);
639 CHECK_OFFSET(guest_bndcfgs, 248);
640 CHECK_OFFSET(host_ia32_pat, 256);
641 CHECK_OFFSET(host_ia32_efer, 264);
642 CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
643 CHECK_OFFSET(vmread_bitmap, 280);
644 CHECK_OFFSET(vmwrite_bitmap, 288);
645 CHECK_OFFSET(vm_function_control, 296);
646 CHECK_OFFSET(eptp_list_address, 304);
647 CHECK_OFFSET(pml_address, 312);
648 CHECK_OFFSET(cr0_guest_host_mask, 344);
649 CHECK_OFFSET(cr4_guest_host_mask, 352);
650 CHECK_OFFSET(cr0_read_shadow, 360);
651 CHECK_OFFSET(cr4_read_shadow, 368);
652 CHECK_OFFSET(cr3_target_value0, 376);
653 CHECK_OFFSET(cr3_target_value1, 384);
654 CHECK_OFFSET(cr3_target_value2, 392);
655 CHECK_OFFSET(cr3_target_value3, 400);
656 CHECK_OFFSET(exit_qualification, 408);
657 CHECK_OFFSET(guest_linear_address, 416);
658 CHECK_OFFSET(guest_cr0, 424);
659 CHECK_OFFSET(guest_cr3, 432);
660 CHECK_OFFSET(guest_cr4, 440);
661 CHECK_OFFSET(guest_es_base, 448);
662 CHECK_OFFSET(guest_cs_base, 456);
663 CHECK_OFFSET(guest_ss_base, 464);
664 CHECK_OFFSET(guest_ds_base, 472);
665 CHECK_OFFSET(guest_fs_base, 480);
666 CHECK_OFFSET(guest_gs_base, 488);
667 CHECK_OFFSET(guest_ldtr_base, 496);
668 CHECK_OFFSET(guest_tr_base, 504);
669 CHECK_OFFSET(guest_gdtr_base, 512);
670 CHECK_OFFSET(guest_idtr_base, 520);
671 CHECK_OFFSET(guest_dr7, 528);
672 CHECK_OFFSET(guest_rsp, 536);
673 CHECK_OFFSET(guest_rip, 544);
674 CHECK_OFFSET(guest_rflags, 552);
675 CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
676 CHECK_OFFSET(guest_sysenter_esp, 568);
677 CHECK_OFFSET(guest_sysenter_eip, 576);
678 CHECK_OFFSET(host_cr0, 584);
679 CHECK_OFFSET(host_cr3, 592);
680 CHECK_OFFSET(host_cr4, 600);
681 CHECK_OFFSET(host_fs_base, 608);
682 CHECK_OFFSET(host_gs_base, 616);
683 CHECK_OFFSET(host_tr_base, 624);
684 CHECK_OFFSET(host_gdtr_base, 632);
685 CHECK_OFFSET(host_idtr_base, 640);
686 CHECK_OFFSET(host_ia32_sysenter_esp, 648);
687 CHECK_OFFSET(host_ia32_sysenter_eip, 656);
688 CHECK_OFFSET(host_rsp, 664);
689 CHECK_OFFSET(host_rip, 672);
690 CHECK_OFFSET(pin_based_vm_exec_control, 744);
691 CHECK_OFFSET(cpu_based_vm_exec_control, 748);
692 CHECK_OFFSET(exception_bitmap, 752);
693 CHECK_OFFSET(page_fault_error_code_mask, 756);
694 CHECK_OFFSET(page_fault_error_code_match, 760);
695 CHECK_OFFSET(cr3_target_count, 764);
696 CHECK_OFFSET(vm_exit_controls, 768);
697 CHECK_OFFSET(vm_exit_msr_store_count, 772);
698 CHECK_OFFSET(vm_exit_msr_load_count, 776);
699 CHECK_OFFSET(vm_entry_controls, 780);
700 CHECK_OFFSET(vm_entry_msr_load_count, 784);
701 CHECK_OFFSET(vm_entry_intr_info_field, 788);
702 CHECK_OFFSET(vm_entry_exception_error_code, 792);
703 CHECK_OFFSET(vm_entry_instruction_len, 796);
704 CHECK_OFFSET(tpr_threshold, 800);
705 CHECK_OFFSET(secondary_vm_exec_control, 804);
706 CHECK_OFFSET(vm_instruction_error, 808);
707 CHECK_OFFSET(vm_exit_reason, 812);
708 CHECK_OFFSET(vm_exit_intr_info, 816);
709 CHECK_OFFSET(vm_exit_intr_error_code, 820);
710 CHECK_OFFSET(idt_vectoring_info_field, 824);
711 CHECK_OFFSET(idt_vectoring_error_code, 828);
712 CHECK_OFFSET(vm_exit_instruction_len, 832);
713 CHECK_OFFSET(vmx_instruction_info, 836);
714 CHECK_OFFSET(guest_es_limit, 840);
715 CHECK_OFFSET(guest_cs_limit, 844);
716 CHECK_OFFSET(guest_ss_limit, 848);
717 CHECK_OFFSET(guest_ds_limit, 852);
718 CHECK_OFFSET(guest_fs_limit, 856);
719 CHECK_OFFSET(guest_gs_limit, 860);
720 CHECK_OFFSET(guest_ldtr_limit, 864);
721 CHECK_OFFSET(guest_tr_limit, 868);
722 CHECK_OFFSET(guest_gdtr_limit, 872);
723 CHECK_OFFSET(guest_idtr_limit, 876);
724 CHECK_OFFSET(guest_es_ar_bytes, 880);
725 CHECK_OFFSET(guest_cs_ar_bytes, 884);
726 CHECK_OFFSET(guest_ss_ar_bytes, 888);
727 CHECK_OFFSET(guest_ds_ar_bytes, 892);
728 CHECK_OFFSET(guest_fs_ar_bytes, 896);
729 CHECK_OFFSET(guest_gs_ar_bytes, 900);
730 CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
731 CHECK_OFFSET(guest_tr_ar_bytes, 908);
732 CHECK_OFFSET(guest_interruptibility_info, 912);
733 CHECK_OFFSET(guest_activity_state, 916);
734 CHECK_OFFSET(guest_sysenter_cs, 920);
735 CHECK_OFFSET(host_ia32_sysenter_cs, 924);
736 CHECK_OFFSET(vmx_preemption_timer_value, 928);
737 CHECK_OFFSET(virtual_processor_id, 960);
738 CHECK_OFFSET(posted_intr_nv, 962);
739 CHECK_OFFSET(guest_es_selector, 964);
740 CHECK_OFFSET(guest_cs_selector, 966);
741 CHECK_OFFSET(guest_ss_selector, 968);
742 CHECK_OFFSET(guest_ds_selector, 970);
743 CHECK_OFFSET(guest_fs_selector, 972);
744 CHECK_OFFSET(guest_gs_selector, 974);
745 CHECK_OFFSET(guest_ldtr_selector, 976);
746 CHECK_OFFSET(guest_tr_selector, 978);
747 CHECK_OFFSET(guest_intr_status, 980);
748 CHECK_OFFSET(host_es_selector, 982);
749 CHECK_OFFSET(host_cs_selector, 984);
750 CHECK_OFFSET(host_ss_selector, 986);
751 CHECK_OFFSET(host_ds_selector, 988);
752 CHECK_OFFSET(host_fs_selector, 990);
753 CHECK_OFFSET(host_gs_selector, 992);
754 CHECK_OFFSET(host_tr_selector, 994);
755 CHECK_OFFSET(guest_pml_index, 996);
756}
757
758/*
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300759 * VMCS12_REVISION is an arbitrary id that should be changed if the content or
760 * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
761 * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
Jim Mattsonb348e792018-05-01 15:40:27 -0700762 *
763 * IMPORTANT: Changing this value will break save/restore compatibility with
764 * older kvm releases.
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300765 */
766#define VMCS12_REVISION 0x11e57ed0
767
768/*
769 * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
770 * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
771 * current implementation, 4K are reserved to avoid future complications.
772 */
773#define VMCS12_SIZE 0x1000
774
775/*
Jim Mattson5b157062017-12-22 12:11:12 -0800776 * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
777 * supported VMCS12 field encoding.
778 */
779#define VMCS12_MAX_FIELD_INDEX 0x17
780
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100781struct nested_vmx_msrs {
782 /*
783 * We only store the "true" versions of the VMX capability MSRs. We
784 * generate the "non-true" versions by setting the must-be-1 bits
785 * according to the SDM.
786 */
787 u32 procbased_ctls_low;
788 u32 procbased_ctls_high;
789 u32 secondary_ctls_low;
790 u32 secondary_ctls_high;
791 u32 pinbased_ctls_low;
792 u32 pinbased_ctls_high;
793 u32 exit_ctls_low;
794 u32 exit_ctls_high;
795 u32 entry_ctls_low;
796 u32 entry_ctls_high;
797 u32 misc_low;
798 u32 misc_high;
799 u32 ept_caps;
800 u32 vpid_caps;
801 u64 basic;
802 u64 cr0_fixed0;
803 u64 cr0_fixed1;
804 u64 cr4_fixed0;
805 u64 cr4_fixed1;
806 u64 vmcs_enum;
807 u64 vmfunc_controls;
808};
809
Jim Mattson5b157062017-12-22 12:11:12 -0800810/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300811 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
812 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
813 */
814struct nested_vmx {
815 /* Has the level1 guest done vmxon? */
816 bool vmxon;
Bandan Das3573e222014-05-06 02:19:16 -0400817 gpa_t vmxon_ptr;
Bandan Dasc5f983f2017-05-05 15:25:14 -0400818 bool pml_full;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +0300819
820 /* The guest-physical address of the current VMCS L1 keeps for L2 */
821 gpa_t current_vmptr;
David Matlack4f2777b2016-07-13 17:16:37 -0700822 /*
823 * Cache of the guest's VMCS, existing outside of guest memory.
824 * Loaded from guest memory during VMPTRLD. Flushed to guest
David Matlack8ca44e82017-08-01 14:00:39 -0700825 * memory during VMCLEAR and VMPTRLD.
David Matlack4f2777b2016-07-13 17:16:37 -0700826 */
827 struct vmcs12 *cached_vmcs12;
Abel Gordon012f83c2013-04-18 14:39:25 +0300828 /*
Liran Alon61ada742018-06-23 02:35:08 +0300829 * Cache of the guest's shadow VMCS, existing outside of guest
830 * memory. Loaded from guest memory during VM entry. Flushed
831 * to guest memory during VM exit.
832 */
833 struct vmcs12 *cached_shadow_vmcs12;
834 /*
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200835 * Indicates if the shadow vmcs or enlightened vmcs must be updated
836 * with the data held by struct vmcs12.
Abel Gordon012f83c2013-04-18 14:39:25 +0300837 */
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200838 bool need_vmcs12_sync;
Paolo Bonzini74a497f2017-12-20 13:55:39 +0100839 bool dirty_vmcs12;
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +0300840
Sean Christopherson9d6105b2018-09-26 09:23:51 -0700841 /*
842 * vmcs02 has been initialized, i.e. state that is constant for
843 * vmcs02 has been written to the backing VMCS. Initialization
844 * is delayed until L1 actually attempts to run a nested VM.
845 */
846 bool vmcs02_initialized;
847
Jim Mattson8d860bb2018-05-09 16:56:05 -0400848 bool change_vmcs01_virtual_apic_mode;
849
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +0200850 /*
851 * Enlightened VMCS has been enabled. It does not mean that L1 has to
852 * use it. However, VMX features available to L1 will be limited based
853 * on what the enlightened VMCS supports.
854 */
855 bool enlightened_vmcs_enabled;
856
Nadav Har'El644d7112011-05-25 23:12:35 +0300857 /* L2 must run next, and mustn't decide to exit to L1. */
858 bool nested_run_pending;
Jim Mattsonde3a0022017-11-27 17:22:25 -0600859
860 struct loaded_vmcs vmcs02;
861
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300862 /*
Jim Mattsonde3a0022017-11-27 17:22:25 -0600863 * Guest pages referred to in the vmcs02 with host-physical
864 * pointers, so we must keep them pinned while L2 runs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +0300865 */
866 struct page *apic_access_page;
Wanpeng Lia7c0b072014-08-21 19:46:50 +0800867 struct page *virtual_apic_page;
Wincy Van705699a2015-02-03 23:58:17 +0800868 struct page *pi_desc_page;
869 struct pi_desc *pi_desc;
870 bool pi_pending;
871 u16 posted_intr_nv;
Jan Kiszkaf41245002014-03-07 20:03:13 +0100872
873 struct hrtimer preemption_timer;
874 bool preemption_timer_expired;
Jan Kiszka2996fca2014-06-16 13:59:43 +0200875
876 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
877 u64 vmcs01_debugctl;
Liran Alon62cf9bd812018-09-14 03:25:54 +0300878 u64 vmcs01_guest_bndcfgs;
Wincy Vanb9c237b2015-02-03 23:56:30 +0800879
Wanpeng Li5c614b32015-10-13 09:18:36 -0700880 u16 vpid02;
881 u16 last_vpid;
882
Paolo Bonzini6677f3d2018-02-26 13:40:08 +0100883 struct nested_vmx_msrs msrs;
Ladi Prosek72e9cbd2017-10-11 16:54:43 +0200884
885 /* SMM related state */
886 struct {
887 /* in VMX operation on SMM entry? */
888 bool vmxon;
889 /* in guest mode on SMM entry? */
890 bool guest_mode;
891 } smm;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200892
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +0200893 gpa_t hv_evmcs_vmptr;
894 struct page *hv_evmcs_page;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +0200895 struct hv_enlightened_vmcs *hv_evmcs;
Nadav Har'Elec378ae2011-05-25 23:02:54 +0300896};
897
Yang Zhang01e439b2013-04-11 19:25:12 +0800898#define POSTED_INTR_ON 0
Feng Wuebbfc762015-09-18 22:29:46 +0800899#define POSTED_INTR_SN 1
900
Yang Zhang01e439b2013-04-11 19:25:12 +0800901/* Posted-Interrupt Descriptor */
902struct pi_desc {
903 u32 pir[8]; /* Posted interrupt requested */
Feng Wu6ef15222015-09-18 22:29:45 +0800904 union {
905 struct {
906 /* bit 256 - Outstanding Notification */
907 u16 on : 1,
908 /* bit 257 - Suppress Notification */
909 sn : 1,
910 /* bit 271:258 - Reserved */
911 rsvd_1 : 14;
912 /* bit 279:272 - Notification Vector */
913 u8 nv;
914 /* bit 287:280 - Reserved */
915 u8 rsvd_2;
916 /* bit 319:288 - Notification Destination */
917 u32 ndst;
918 };
919 u64 control;
920 };
921 u32 rsvd[6];
Yang Zhang01e439b2013-04-11 19:25:12 +0800922} __aligned(64);
923
Yang Zhanga20ed542013-04-11 19:25:15 +0800924static bool pi_test_and_set_on(struct pi_desc *pi_desc)
925{
926 return test_and_set_bit(POSTED_INTR_ON,
927 (unsigned long *)&pi_desc->control);
928}
929
930static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
931{
932 return test_and_clear_bit(POSTED_INTR_ON,
933 (unsigned long *)&pi_desc->control);
934}
935
936static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
937{
938 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
939}
940
Feng Wuebbfc762015-09-18 22:29:46 +0800941static inline void pi_clear_sn(struct pi_desc *pi_desc)
942{
943 return clear_bit(POSTED_INTR_SN,
944 (unsigned long *)&pi_desc->control);
945}
946
947static inline void pi_set_sn(struct pi_desc *pi_desc)
948{
949 return set_bit(POSTED_INTR_SN,
950 (unsigned long *)&pi_desc->control);
951}
952
Paolo Bonziniad361092016-09-20 16:15:05 +0200953static inline void pi_clear_on(struct pi_desc *pi_desc)
954{
955 clear_bit(POSTED_INTR_ON,
956 (unsigned long *)&pi_desc->control);
957}
958
Feng Wuebbfc762015-09-18 22:29:46 +0800959static inline int pi_test_on(struct pi_desc *pi_desc)
960{
961 return test_bit(POSTED_INTR_ON,
962 (unsigned long *)&pi_desc->control);
963}
964
965static inline int pi_test_sn(struct pi_desc *pi_desc)
966{
967 return test_bit(POSTED_INTR_SN,
968 (unsigned long *)&pi_desc->control);
969}
970
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -0400971struct vmx_msrs {
972 unsigned int nr;
973 struct vmx_msr_entry val[NR_AUTOLOAD_MSRS];
974};
975
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400976struct vcpu_vmx {
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000977 struct kvm_vcpu vcpu;
Avi Kivity313dbd42008-07-17 18:04:30 +0300978 unsigned long host_rsp;
Avi Kivity29bd8a72007-09-10 17:27:03 +0300979 u8 fail;
Paolo Bonzini904e14f2018-01-16 16:51:18 +0100980 u8 msr_bitmap_mode;
Avi Kivity51aa01d2010-07-20 14:31:20 +0300981 u32 exit_intr_info;
Avi Kivity1155f762007-11-22 11:30:47 +0200982 u32 idt_vectoring_info;
Avi Kivity6de12732011-03-07 12:51:22 +0200983 ulong rflags;
Avi Kivity26bb0982009-09-07 11:14:12 +0300984 struct shared_msr_entry *guest_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400985 int nmsrs;
986 int save_nmsrs;
Yang Zhanga547c6d2013-04-11 19:25:10 +0800987 unsigned long host_idt_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400988#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +0300989 u64 msr_host_kernel_gs_base;
990 u64 msr_guest_kernel_gs_base;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -0400991#endif
Ashok Raj15d45072018-02-01 22:59:43 +0100992
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100993 u64 arch_capabilities;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +0100994 u64 spec_ctrl;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +0100995
Gleb Natapov2961e8762013-11-25 15:37:13 +0200996 u32 vm_entry_controls_shadow;
997 u32 vm_exit_controls_shadow;
Paolo Bonzini80154d72017-08-24 13:55:35 +0200998 u32 secondary_exec_control;
999
Nadav Har'Eld462b812011-05-24 15:26:10 +03001000 /*
1001 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
1002 * non-nested (L1) guest, it always points to vmcs01. For a nested
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001003 * guest (L2), it points to a different VMCS. loaded_cpu_state points
1004 * to the VMCS whose state is loaded into the CPU registers that only
1005 * need to be switched when transitioning to/from the kernel; a NULL
1006 * value indicates that host state is loaded.
Nadav Har'Eld462b812011-05-24 15:26:10 +03001007 */
1008 struct loaded_vmcs vmcs01;
1009 struct loaded_vmcs *loaded_vmcs;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001010 struct loaded_vmcs *loaded_cpu_state;
Nadav Har'Eld462b812011-05-24 15:26:10 +03001011 bool __launched; /* temporary, used in vmx_vcpu_run */
Avi Kivity61d2ef22010-04-28 16:40:38 +03001012 struct msr_autoload {
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04001013 struct vmx_msrs guest;
1014 struct vmx_msrs host;
Avi Kivity61d2ef22010-04-28 16:40:38 +03001015 } msr_autoload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07001016
Avi Kivity9c8cba32007-11-22 11:42:59 +02001017 struct {
Avi Kivity7ffd92c2009-06-09 14:10:45 +03001018 int vm86_active;
Avi Kivity78ac8b42010-04-08 18:19:35 +03001019 ulong save_rflags;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03001020 struct kvm_segment segs[8];
1021 } rmode;
1022 struct {
1023 u32 bitmask; /* 4 bits per segment (1 bit per field) */
Avi Kivity7ffd92c2009-06-09 14:10:45 +03001024 struct kvm_save_segment {
1025 u16 selector;
1026 unsigned long base;
1027 u32 limit;
1028 u32 ar;
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03001029 } seg[8];
Avi Kivity2fb92db2011-04-27 19:42:18 +03001030 } segment_cache;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001031 int vpid;
Mohammed Gamal04fa4d32008-08-17 16:39:48 +03001032 bool emulation_required;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02001033
Andi Kleena0861c02009-06-08 17:37:09 +08001034 u32 exit_reason;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001035
Yang Zhang01e439b2013-04-11 19:25:12 +08001036 /* Posted interrupt descriptor */
1037 struct pi_desc pi_desc;
1038
Nadav Har'Elec378ae2011-05-25 23:02:54 +03001039 /* Support for a guest hypervisor (nested VMX) */
1040 struct nested_vmx nested;
Radim Krčmářa7653ec2014-08-21 18:08:07 +02001041
1042 /* Dynamic PLE window. */
1043 int ple_window;
1044 bool ple_window_dirty;
Kai Huang843e4332015-01-28 10:54:28 +08001045
Sean Christophersond264ee02018-08-27 15:21:12 -07001046 bool req_immediate_exit;
1047
Kai Huang843e4332015-01-28 10:54:28 +08001048 /* Support for PML */
1049#define PML_ENTITY_NUM 512
1050 struct page *pml_pg;
Owen Hofmann2680d6d2016-03-01 13:36:13 -08001051
Yunhong Jiang64672c92016-06-13 14:19:59 -07001052 /* apic deadline value in host tsc */
1053 u64 hv_deadline_tsc;
1054
Owen Hofmann2680d6d2016-03-01 13:36:13 -08001055 u64 current_tsc_ratio;
Xiao Guangrong1be0e612016-03-22 16:51:18 +08001056
Xiao Guangrong1be0e612016-03-22 16:51:18 +08001057 u32 host_pkru;
Haozhong Zhang3b840802016-06-22 14:59:54 +08001058
Wanpeng Li74c55932017-11-29 01:31:20 -08001059 unsigned long host_debugctlmsr;
1060
Haozhong Zhang37e4c992016-06-22 14:59:55 +08001061 /*
1062 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
1063 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
1064 * in msr_ia32_feature_control_valid_bits.
1065 */
Haozhong Zhang3b840802016-06-22 14:59:54 +08001066 u64 msr_ia32_feature_control;
Haozhong Zhang37e4c992016-06-22 14:59:55 +08001067 u64 msr_ia32_feature_control_valid_bits;
Tianyu Lan877ad952018-07-19 08:40:23 +00001068 u64 ept_pointer;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001069};
1070
Avi Kivity2fb92db2011-04-27 19:42:18 +03001071enum segment_cache_field {
1072 SEG_FIELD_SEL = 0,
1073 SEG_FIELD_BASE = 1,
1074 SEG_FIELD_LIMIT = 2,
1075 SEG_FIELD_AR = 3,
1076
1077 SEG_FIELD_NR = 4
1078};
1079
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07001080static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
1081{
1082 return container_of(kvm, struct kvm_vmx, kvm);
1083}
1084
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001085static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
1086{
Rusty Russellfb3f0f52007-07-27 17:16:56 +10001087 return container_of(vcpu, struct vcpu_vmx, vcpu);
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04001088}
1089
Feng Wuefc64402015-09-18 22:29:51 +08001090static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
1091{
1092 return &(to_vmx(vcpu)->pi_desc);
1093}
1094
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001095#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
Nadav Har'El22bd0352011-05-25 23:05:57 +03001096#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001097#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
1098#define FIELD64(number, name) \
1099 FIELD(number, name), \
1100 [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
Nadav Har'El22bd0352011-05-25 23:05:57 +03001101
Abel Gordon4607c2d2013-04-18 14:35:55 +03001102
Paolo Bonzini44900ba2017-12-13 12:58:02 +01001103static u16 shadow_read_only_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +01001104#define SHADOW_FIELD_RO(x) x,
1105#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +03001106};
Bandan Dasfe2b2012014-04-21 15:20:14 -04001107static int max_shadow_read_only_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +03001108 ARRAY_SIZE(shadow_read_only_fields);
1109
Paolo Bonzini44900ba2017-12-13 12:58:02 +01001110static u16 shadow_read_write_fields[] = {
Paolo Bonzinic9e9dea2017-12-20 13:16:29 +01001111#define SHADOW_FIELD_RW(x) x,
1112#include "vmx_shadow_fields.h"
Abel Gordon4607c2d2013-04-18 14:35:55 +03001113};
Bandan Dasfe2b2012014-04-21 15:20:14 -04001114static int max_shadow_read_write_fields =
Abel Gordon4607c2d2013-04-18 14:35:55 +03001115 ARRAY_SIZE(shadow_read_write_fields);
1116
Mathias Krause772e0312012-08-30 01:30:19 +02001117static const unsigned short vmcs_field_to_offset_table[] = {
Nadav Har'El22bd0352011-05-25 23:05:57 +03001118 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
Wincy Van705699a2015-02-03 23:58:17 +08001119 FIELD(POSTED_INTR_NV, posted_intr_nv),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001120 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
1121 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
1122 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
1123 FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
1124 FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
1125 FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
1126 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
1127 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
Wincy Van608406e2015-02-03 23:57:51 +08001128 FIELD(GUEST_INTR_STATUS, guest_intr_status),
Bandan Dasc5f983f2017-05-05 15:25:14 -04001129 FIELD(GUEST_PML_INDEX, guest_pml_index),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001130 FIELD(HOST_ES_SELECTOR, host_es_selector),
1131 FIELD(HOST_CS_SELECTOR, host_cs_selector),
1132 FIELD(HOST_SS_SELECTOR, host_ss_selector),
1133 FIELD(HOST_DS_SELECTOR, host_ds_selector),
1134 FIELD(HOST_FS_SELECTOR, host_fs_selector),
1135 FIELD(HOST_GS_SELECTOR, host_gs_selector),
1136 FIELD(HOST_TR_SELECTOR, host_tr_selector),
1137 FIELD64(IO_BITMAP_A, io_bitmap_a),
1138 FIELD64(IO_BITMAP_B, io_bitmap_b),
1139 FIELD64(MSR_BITMAP, msr_bitmap),
1140 FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
1141 FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
1142 FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
Jim Mattsonb348e792018-05-01 15:40:27 -07001143 FIELD64(PML_ADDRESS, pml_address),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001144 FIELD64(TSC_OFFSET, tsc_offset),
1145 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
1146 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
Wincy Van705699a2015-02-03 23:58:17 +08001147 FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
Bandan Das27c42a12017-08-03 15:54:42 -04001148 FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001149 FIELD64(EPT_POINTER, ept_pointer),
Wincy Van608406e2015-02-03 23:57:51 +08001150 FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
1151 FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
1152 FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
1153 FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
Bandan Das41ab9372017-08-03 15:54:43 -04001154 FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
Jim Mattsonb348e792018-05-01 15:40:27 -07001155 FIELD64(VMREAD_BITMAP, vmread_bitmap),
1156 FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
Wanpeng Li81dc01f2014-12-04 19:11:07 +08001157 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001158 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
1159 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
1160 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
1161 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
1162 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
1163 FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
1164 FIELD64(GUEST_PDPTR0, guest_pdptr0),
1165 FIELD64(GUEST_PDPTR1, guest_pdptr1),
1166 FIELD64(GUEST_PDPTR2, guest_pdptr2),
1167 FIELD64(GUEST_PDPTR3, guest_pdptr3),
Paolo Bonzini36be0b92014-02-24 12:30:04 +01001168 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001169 FIELD64(HOST_IA32_PAT, host_ia32_pat),
1170 FIELD64(HOST_IA32_EFER, host_ia32_efer),
1171 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
1172 FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
1173 FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
1174 FIELD(EXCEPTION_BITMAP, exception_bitmap),
1175 FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
1176 FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
1177 FIELD(CR3_TARGET_COUNT, cr3_target_count),
1178 FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
1179 FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
1180 FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
1181 FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
1182 FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
1183 FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
1184 FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
1185 FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
1186 FIELD(TPR_THRESHOLD, tpr_threshold),
1187 FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
1188 FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
1189 FIELD(VM_EXIT_REASON, vm_exit_reason),
1190 FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
1191 FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
1192 FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
1193 FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
1194 FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
1195 FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
1196 FIELD(GUEST_ES_LIMIT, guest_es_limit),
1197 FIELD(GUEST_CS_LIMIT, guest_cs_limit),
1198 FIELD(GUEST_SS_LIMIT, guest_ss_limit),
1199 FIELD(GUEST_DS_LIMIT, guest_ds_limit),
1200 FIELD(GUEST_FS_LIMIT, guest_fs_limit),
1201 FIELD(GUEST_GS_LIMIT, guest_gs_limit),
1202 FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
1203 FIELD(GUEST_TR_LIMIT, guest_tr_limit),
1204 FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
1205 FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
1206 FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
1207 FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
1208 FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
1209 FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
1210 FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
1211 FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
1212 FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
1213 FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
1214 FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
1215 FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
1216 FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
1217 FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
Jan Kiszka0238ea92013-03-13 11:31:24 +01001218 FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
Nadav Har'El22bd0352011-05-25 23:05:57 +03001219 FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
1220 FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
1221 FIELD(CR0_READ_SHADOW, cr0_read_shadow),
1222 FIELD(CR4_READ_SHADOW, cr4_read_shadow),
1223 FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
1224 FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
1225 FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
1226 FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
1227 FIELD(EXIT_QUALIFICATION, exit_qualification),
1228 FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
1229 FIELD(GUEST_CR0, guest_cr0),
1230 FIELD(GUEST_CR3, guest_cr3),
1231 FIELD(GUEST_CR4, guest_cr4),
1232 FIELD(GUEST_ES_BASE, guest_es_base),
1233 FIELD(GUEST_CS_BASE, guest_cs_base),
1234 FIELD(GUEST_SS_BASE, guest_ss_base),
1235 FIELD(GUEST_DS_BASE, guest_ds_base),
1236 FIELD(GUEST_FS_BASE, guest_fs_base),
1237 FIELD(GUEST_GS_BASE, guest_gs_base),
1238 FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
1239 FIELD(GUEST_TR_BASE, guest_tr_base),
1240 FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
1241 FIELD(GUEST_IDTR_BASE, guest_idtr_base),
1242 FIELD(GUEST_DR7, guest_dr7),
1243 FIELD(GUEST_RSP, guest_rsp),
1244 FIELD(GUEST_RIP, guest_rip),
1245 FIELD(GUEST_RFLAGS, guest_rflags),
1246 FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
1247 FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
1248 FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
1249 FIELD(HOST_CR0, host_cr0),
1250 FIELD(HOST_CR3, host_cr3),
1251 FIELD(HOST_CR4, host_cr4),
1252 FIELD(HOST_FS_BASE, host_fs_base),
1253 FIELD(HOST_GS_BASE, host_gs_base),
1254 FIELD(HOST_TR_BASE, host_tr_base),
1255 FIELD(HOST_GDTR_BASE, host_gdtr_base),
1256 FIELD(HOST_IDTR_BASE, host_idtr_base),
1257 FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
1258 FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
1259 FIELD(HOST_RSP, host_rsp),
1260 FIELD(HOST_RIP, host_rip),
1261};
Nadav Har'El22bd0352011-05-25 23:05:57 +03001262
1263static inline short vmcs_field_to_offset(unsigned long field)
1264{
Dan Williams085331d2018-01-31 17:47:03 -08001265 const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
1266 unsigned short offset;
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001267 unsigned index;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001268
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001269 if (field >> 15)
Andrew Honig75f139a2018-01-10 10:12:03 -08001270 return -ENOENT;
1271
Jim Mattson58e9ffa2017-12-22 12:13:13 -08001272 index = ROL16(field, 6);
Linus Torvalds15303ba2018-02-10 13:16:35 -08001273 if (index >= size)
Andrew Honig75f139a2018-01-10 10:12:03 -08001274 return -ENOENT;
1275
Linus Torvalds15303ba2018-02-10 13:16:35 -08001276 index = array_index_nospec(index, size);
1277 offset = vmcs_field_to_offset_table[index];
Dan Williams085331d2018-01-31 17:47:03 -08001278 if (offset == 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01001279 return -ENOENT;
Dan Williams085331d2018-01-31 17:47:03 -08001280 return offset;
Nadav Har'El22bd0352011-05-25 23:05:57 +03001281}
1282
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001283static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
1284{
David Matlack4f2777b2016-07-13 17:16:37 -07001285 return to_vmx(vcpu)->nested.cached_vmcs12;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +03001286}
1287
Liran Alon61ada742018-06-23 02:35:08 +03001288static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
1289{
1290 return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
1291}
1292
Peter Feiner995f00a2017-06-30 17:26:32 -07001293static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03001294static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
Peter Feiner995f00a2017-06-30 17:26:32 -07001295static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
Wanpeng Lif53cd632014-12-02 19:14:58 +08001296static bool vmx_xsaves_supported(void);
Orit Wassermanb246dd52012-05-31 14:49:22 +03001297static void vmx_set_segment(struct kvm_vcpu *vcpu,
1298 struct kvm_segment *var, int seg);
1299static void vmx_get_segment(struct kvm_vcpu *vcpu,
1300 struct kvm_segment *var, int seg);
Gleb Natapovd99e4152012-12-20 16:57:45 +02001301static bool guest_state_valid(struct kvm_vcpu *vcpu);
1302static u32 vmx_segment_access_rights(struct kvm_segment *var);
Abel Gordon16f5b902013-04-18 14:38:25 +03001303static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
Paolo Bonzinib96fb432017-07-27 12:29:32 +02001304static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
1305static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
1306static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
1307 u16 error_code);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01001308static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
Ashok Raj15d45072018-02-01 22:59:43 +01001309static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1310 u32 msr, int type);
Avi Kivity75880a02007-06-20 11:20:04 +03001311
Avi Kivity6aa8b732006-12-10 02:21:36 -08001312static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1313static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03001314/*
1315 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1316 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1317 */
1318static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001319
Feng Wubf9f6ac2015-09-18 22:29:55 +08001320/*
1321 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1322 * can find which vCPU should be waken up.
1323 */
1324static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1325static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1326
Radim Krčmář23611332016-09-29 22:41:33 +02001327enum {
Radim Krčmář23611332016-09-29 22:41:33 +02001328 VMX_VMREAD_BITMAP,
1329 VMX_VMWRITE_BITMAP,
1330 VMX_BITMAP_NR
1331};
1332
1333static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
1334
Radim Krčmář23611332016-09-29 22:41:33 +02001335#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
1336#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
He, Qingfdef3ad2007-04-30 09:45:24 +03001337
Avi Kivity110312c2010-12-21 12:54:20 +02001338static bool cpu_has_load_ia32_efer;
Gleb Natapov8bf00a52011-10-05 14:01:22 +02001339static bool cpu_has_load_perf_global_ctrl;
Avi Kivity110312c2010-12-21 12:54:20 +02001340
Sheng Yang2384d2b2008-01-17 15:14:33 +08001341static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1342static DEFINE_SPINLOCK(vmx_vpid_lock);
1343
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001344static struct vmcs_config {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001345 int size;
1346 int order;
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001347 u32 basic_cap;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001348 u32 revision_id;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001349 u32 pin_based_exec_ctrl;
1350 u32 cpu_based_exec_ctrl;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001351 u32 cpu_based_2nd_exec_ctrl;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001352 u32 vmexit_ctrl;
1353 u32 vmentry_ctrl;
Paolo Bonzini13893092018-02-26 13:40:09 +01001354 struct nested_vmx_msrs nested;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03001355} vmcs_config;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001356
Hannes Ederefff9e52008-11-28 17:02:06 +01001357static struct vmx_capability {
Sheng Yangd56f5462008-04-25 10:13:16 +08001358 u32 ept;
1359 u32 vpid;
1360} vmx_capability;
1361
Avi Kivity6aa8b732006-12-10 02:21:36 -08001362#define VMX_SEGMENT_FIELD(seg) \
1363 [VCPU_SREG_##seg] = { \
1364 .selector = GUEST_##seg##_SELECTOR, \
1365 .base = GUEST_##seg##_BASE, \
1366 .limit = GUEST_##seg##_LIMIT, \
1367 .ar_bytes = GUEST_##seg##_AR_BYTES, \
1368 }
1369
Mathias Krause772e0312012-08-30 01:30:19 +02001370static const struct kvm_vmx_segment_field {
Avi Kivity6aa8b732006-12-10 02:21:36 -08001371 unsigned selector;
1372 unsigned base;
1373 unsigned limit;
1374 unsigned ar_bytes;
1375} kvm_vmx_segment_fields[] = {
1376 VMX_SEGMENT_FIELD(CS),
1377 VMX_SEGMENT_FIELD(DS),
1378 VMX_SEGMENT_FIELD(ES),
1379 VMX_SEGMENT_FIELD(FS),
1380 VMX_SEGMENT_FIELD(GS),
1381 VMX_SEGMENT_FIELD(SS),
1382 VMX_SEGMENT_FIELD(TR),
1383 VMX_SEGMENT_FIELD(LDTR),
1384};
1385
Avi Kivity26bb0982009-09-07 11:14:12 +03001386static u64 host_efer;
1387
Avi Kivity6de4f3a2009-05-31 22:58:47 +03001388static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
1389
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001390/*
Brian Gerst8c065852010-07-17 09:03:26 -04001391 * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
Avi Kivity4d56c8a2007-04-19 14:28:44 +03001392 * away by decrementing the array size.
1393 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08001394static const u32 vmx_msr_index[] = {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001395#ifdef CONFIG_X86_64
Avi Kivity44ea2b12009-09-06 15:55:37 +03001396 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001397#endif
Brian Gerst8c065852010-07-17 09:03:26 -04001398 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001399};
Avi Kivity6aa8b732006-12-10 02:21:36 -08001400
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001401DEFINE_STATIC_KEY_FALSE(enable_evmcs);
1402
1403#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
1404
1405#define KVM_EVMCS_VERSION 1
1406
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001407/*
1408 * Enlightened VMCSv1 doesn't support these:
1409 *
1410 * POSTED_INTR_NV = 0x00000002,
1411 * GUEST_INTR_STATUS = 0x00000810,
1412 * APIC_ACCESS_ADDR = 0x00002014,
1413 * POSTED_INTR_DESC_ADDR = 0x00002016,
1414 * EOI_EXIT_BITMAP0 = 0x0000201c,
1415 * EOI_EXIT_BITMAP1 = 0x0000201e,
1416 * EOI_EXIT_BITMAP2 = 0x00002020,
1417 * EOI_EXIT_BITMAP3 = 0x00002022,
1418 * GUEST_PML_INDEX = 0x00000812,
1419 * PML_ADDRESS = 0x0000200e,
1420 * VM_FUNCTION_CONTROL = 0x00002018,
1421 * EPTP_LIST_ADDRESS = 0x00002024,
1422 * VMREAD_BITMAP = 0x00002026,
1423 * VMWRITE_BITMAP = 0x00002028,
1424 *
1425 * TSC_MULTIPLIER = 0x00002032,
1426 * PLE_GAP = 0x00004020,
1427 * PLE_WINDOW = 0x00004022,
1428 * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
1429 * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
1430 * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
1431 *
1432 * Currently unsupported in KVM:
1433 * GUEST_IA32_RTIT_CTL = 0x00002814,
1434 */
1435#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
1436 PIN_BASED_VMX_PREEMPTION_TIMER)
1437#define EVMCS1_UNSUPPORTED_2NDEXEC \
1438 (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
1439 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
1440 SECONDARY_EXEC_APIC_REGISTER_VIRT | \
1441 SECONDARY_EXEC_ENABLE_PML | \
1442 SECONDARY_EXEC_ENABLE_VMFUNC | \
1443 SECONDARY_EXEC_SHADOW_VMCS | \
1444 SECONDARY_EXEC_TSC_SCALING | \
1445 SECONDARY_EXEC_PAUSE_LOOP_EXITING)
1446#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
1447#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
1448#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
1449
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001450#if IS_ENABLED(CONFIG_HYPERV)
1451static bool __read_mostly enlightened_vmcs = true;
1452module_param(enlightened_vmcs, bool, 0444);
1453
1454static inline void evmcs_write64(unsigned long field, u64 value)
1455{
1456 u16 clean_field;
1457 int offset = get_evmcs_offset(field, &clean_field);
1458
1459 if (offset < 0)
1460 return;
1461
1462 *(u64 *)((char *)current_evmcs + offset) = value;
1463
1464 current_evmcs->hv_clean_fields &= ~clean_field;
1465}
1466
1467static inline void evmcs_write32(unsigned long field, u32 value)
1468{
1469 u16 clean_field;
1470 int offset = get_evmcs_offset(field, &clean_field);
1471
1472 if (offset < 0)
1473 return;
1474
1475 *(u32 *)((char *)current_evmcs + offset) = value;
1476 current_evmcs->hv_clean_fields &= ~clean_field;
1477}
1478
1479static inline void evmcs_write16(unsigned long field, u16 value)
1480{
1481 u16 clean_field;
1482 int offset = get_evmcs_offset(field, &clean_field);
1483
1484 if (offset < 0)
1485 return;
1486
1487 *(u16 *)((char *)current_evmcs + offset) = value;
1488 current_evmcs->hv_clean_fields &= ~clean_field;
1489}
1490
1491static inline u64 evmcs_read64(unsigned long field)
1492{
1493 int offset = get_evmcs_offset(field, NULL);
1494
1495 if (offset < 0)
1496 return 0;
1497
1498 return *(u64 *)((char *)current_evmcs + offset);
1499}
1500
1501static inline u32 evmcs_read32(unsigned long field)
1502{
1503 int offset = get_evmcs_offset(field, NULL);
1504
1505 if (offset < 0)
1506 return 0;
1507
1508 return *(u32 *)((char *)current_evmcs + offset);
1509}
1510
1511static inline u16 evmcs_read16(unsigned long field)
1512{
1513 int offset = get_evmcs_offset(field, NULL);
1514
1515 if (offset < 0)
1516 return 0;
1517
1518 return *(u16 *)((char *)current_evmcs + offset);
1519}
1520
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001521static inline void evmcs_touch_msr_bitmap(void)
1522{
1523 if (unlikely(!current_evmcs))
1524 return;
1525
1526 if (current_evmcs->hv_enlightenments_control.msr_bitmap)
1527 current_evmcs->hv_clean_fields &=
1528 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
1529}
1530
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001531static void evmcs_load(u64 phys_addr)
1532{
1533 struct hv_vp_assist_page *vp_ap =
1534 hv_get_vp_assist_page(smp_processor_id());
1535
1536 vp_ap->current_nested_vmcs = phys_addr;
1537 vp_ap->enlighten_vmentry = 1;
1538}
1539
1540static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
1541{
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001542 vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
1543 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001544
Vitaly Kuznetsov5d7a6442018-10-16 18:50:00 +02001545 vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
1546 vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001547
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001548}
Tianyu Lan877ad952018-07-19 08:40:23 +00001549
1550/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1551static void check_ept_pointer_match(struct kvm *kvm)
1552{
1553 struct kvm_vcpu *vcpu;
1554 u64 tmp_eptp = INVALID_PAGE;
1555 int i;
1556
1557 kvm_for_each_vcpu(i, vcpu, kvm) {
1558 if (!VALID_PAGE(tmp_eptp)) {
1559 tmp_eptp = to_vmx(vcpu)->ept_pointer;
1560 } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1561 to_kvm_vmx(kvm)->ept_pointers_match
1562 = EPT_POINTERS_MISMATCH;
1563 return;
1564 }
1565 }
1566
1567 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1568}
1569
1570static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
1571{
Lan Tianyua5c214d2018-10-13 22:54:05 +08001572 struct kvm_vcpu *vcpu;
1573 int ret = -ENOTSUPP, i;
Tianyu Lan877ad952018-07-19 08:40:23 +00001574
1575 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1576
1577 if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1578 check_ept_pointer_match(kvm);
1579
1580 if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
Lan Tianyua5c214d2018-10-13 22:54:05 +08001581 kvm_for_each_vcpu(i, vcpu, kvm)
1582 ret |= hyperv_flush_guest_mapping(
1583 to_vmx(kvm_get_vcpu(kvm, i))->ept_pointer);
1584 } else {
1585 ret = hyperv_flush_guest_mapping(
1586 to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
Tianyu Lan877ad952018-07-19 08:40:23 +00001587 }
1588
Tianyu Lan877ad952018-07-19 08:40:23 +00001589 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1590 return ret;
1591}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001592#else /* !IS_ENABLED(CONFIG_HYPERV) */
1593static inline void evmcs_write64(unsigned long field, u64 value) {}
1594static inline void evmcs_write32(unsigned long field, u32 value) {}
1595static inline void evmcs_write16(unsigned long field, u16 value) {}
1596static inline u64 evmcs_read64(unsigned long field) { return 0; }
1597static inline u32 evmcs_read32(unsigned long field) { return 0; }
1598static inline u16 evmcs_read16(unsigned long field) { return 0; }
1599static inline void evmcs_load(u64 phys_addr) {}
1600static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02001601static inline void evmcs_touch_msr_bitmap(void) {}
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01001602#endif /* IS_ENABLED(CONFIG_HYPERV) */
1603
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +02001604static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
1605 uint16_t *vmcs_version)
1606{
1607 struct vcpu_vmx *vmx = to_vmx(vcpu);
1608
1609 /* We don't support disabling the feature for simplicity. */
1610 if (vmx->nested.enlightened_vmcs_enabled)
1611 return 0;
1612
1613 vmx->nested.enlightened_vmcs_enabled = true;
1614
1615 /*
1616 * vmcs_version represents the range of supported Enlightened VMCS
1617 * versions: lower 8 bits is the minimal version, higher 8 bits is the
1618 * maximum supported version. KVM supports versions from 1 to
1619 * KVM_EVMCS_VERSION.
1620 */
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +02001621 if (vmcs_version)
1622 *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +02001623
1624 vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
1625 vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
1626 vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
1627 vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
1628 vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
1629
1630 return 0;
1631}
1632
Jan Kiszka5bb16012016-02-09 20:14:21 +01001633static inline bool is_exception_n(u32 intr_info, u8 vector)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001634{
1635 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1636 INTR_INFO_VALID_MASK)) ==
Jan Kiszka5bb16012016-02-09 20:14:21 +01001637 (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
1638}
1639
Jan Kiszka6f054852016-02-09 20:15:18 +01001640static inline bool is_debug(u32 intr_info)
1641{
1642 return is_exception_n(intr_info, DB_VECTOR);
1643}
1644
1645static inline bool is_breakpoint(u32 intr_info)
1646{
1647 return is_exception_n(intr_info, BP_VECTOR);
1648}
1649
Jan Kiszka5bb16012016-02-09 20:14:21 +01001650static inline bool is_page_fault(u32 intr_info)
1651{
1652 return is_exception_n(intr_info, PF_VECTOR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001653}
1654
Gui Jianfeng31299942010-03-15 17:29:09 +08001655static inline bool is_invalid_opcode(u32 intr_info)
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001656{
Jan Kiszka5bb16012016-02-09 20:14:21 +01001657 return is_exception_n(intr_info, UD_VECTOR);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05001658}
1659
Liran Alon9e869482018-03-12 13:12:51 +02001660static inline bool is_gp_fault(u32 intr_info)
1661{
1662 return is_exception_n(intr_info, GP_VECTOR);
1663}
1664
Gui Jianfeng31299942010-03-15 17:29:09 +08001665static inline bool is_machine_check(u32 intr_info)
Andi Kleena0861c02009-06-08 17:37:09 +08001666{
1667 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
1668 INTR_INFO_VALID_MASK)) ==
1669 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
1670}
1671
Linus Torvalds32d43cd2018-03-20 12:16:59 -07001672/* Undocumented: icebp/int1 */
1673static inline bool is_icebp(u32 intr_info)
1674{
1675 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
1676 == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
1677}
1678
Gui Jianfeng31299942010-03-15 17:29:09 +08001679static inline bool cpu_has_vmx_msr_bitmap(void)
Sheng Yang25c5f222008-03-28 13:18:56 +08001680{
Sheng Yang04547152009-04-01 15:52:31 +08001681 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
Sheng Yang25c5f222008-03-28 13:18:56 +08001682}
1683
Gui Jianfeng31299942010-03-15 17:29:09 +08001684static inline bool cpu_has_vmx_tpr_shadow(void)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001685{
Sheng Yang04547152009-04-01 15:52:31 +08001686 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001687}
1688
Paolo Bonzini35754c92015-07-29 12:05:37 +02001689static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001690{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001691 return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08001692}
1693
Gui Jianfeng31299942010-03-15 17:29:09 +08001694static inline bool cpu_has_secondary_exec_ctrls(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001695{
Sheng Yang04547152009-04-01 15:52:31 +08001696 return vmcs_config.cpu_based_exec_ctrl &
1697 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Sheng Yangf78e0e22007-10-29 09:40:42 +08001698}
1699
Avi Kivity774ead32007-12-26 13:57:04 +02001700static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001701{
Sheng Yang04547152009-04-01 15:52:31 +08001702 return vmcs_config.cpu_based_2nd_exec_ctrl &
1703 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1704}
1705
Yang Zhang8d146952013-01-25 10:18:50 +08001706static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
1707{
1708 return vmcs_config.cpu_based_2nd_exec_ctrl &
1709 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
1710}
1711
Yang Zhang83d4c282013-01-25 10:18:49 +08001712static inline bool cpu_has_vmx_apic_register_virt(void)
1713{
1714 return vmcs_config.cpu_based_2nd_exec_ctrl &
1715 SECONDARY_EXEC_APIC_REGISTER_VIRT;
1716}
1717
Yang Zhangc7c9c562013-01-25 10:18:51 +08001718static inline bool cpu_has_vmx_virtual_intr_delivery(void)
1719{
1720 return vmcs_config.cpu_based_2nd_exec_ctrl &
1721 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
1722}
1723
Sean Christopherson0b665d32018-08-14 09:33:34 -07001724static inline bool cpu_has_vmx_encls_vmexit(void)
1725{
1726 return vmcs_config.cpu_based_2nd_exec_ctrl &
1727 SECONDARY_EXEC_ENCLS_EXITING;
1728}
1729
Yunhong Jiang64672c92016-06-13 14:19:59 -07001730/*
1731 * Comment's format: document - errata name - stepping - processor name.
1732 * Refer from
1733 * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1734 */
1735static u32 vmx_preemption_cpu_tfms[] = {
1736/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
17370x000206E6,
1738/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1739/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1740/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
17410x00020652,
1742/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
17430x00020655,
1744/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1745/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1746/*
1747 * 320767.pdf - AAP86 - B1 -
1748 * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1749 */
17500x000106E5,
1751/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
17520x000106A0,
1753/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
17540x000106A1,
1755/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
17560x000106A4,
1757 /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1758 /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1759 /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
17600x000106A5,
1761};
1762
1763static inline bool cpu_has_broken_vmx_preemption_timer(void)
1764{
1765 u32 eax = cpuid_eax(0x00000001), i;
1766
1767 /* Clear the reserved bits */
1768 eax &= ~(0x3U << 14 | 0xfU << 28);
Wei Yongjun03f6a222016-07-04 15:13:07 +00001769 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
Yunhong Jiang64672c92016-06-13 14:19:59 -07001770 if (eax == vmx_preemption_cpu_tfms[i])
1771 return true;
1772
1773 return false;
1774}
1775
1776static inline bool cpu_has_vmx_preemption_timer(void)
1777{
Yunhong Jiang64672c92016-06-13 14:19:59 -07001778 return vmcs_config.pin_based_exec_ctrl &
1779 PIN_BASED_VMX_PREEMPTION_TIMER;
1780}
1781
Yang Zhang01e439b2013-04-11 19:25:12 +08001782static inline bool cpu_has_vmx_posted_intr(void)
1783{
Paolo Bonzinid6a858d2015-09-28 11:58:14 +02001784 return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
1785 vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
Yang Zhang01e439b2013-04-11 19:25:12 +08001786}
1787
1788static inline bool cpu_has_vmx_apicv(void)
1789{
1790 return cpu_has_vmx_apic_register_virt() &&
1791 cpu_has_vmx_virtual_intr_delivery() &&
1792 cpu_has_vmx_posted_intr();
1793}
1794
Sheng Yang04547152009-04-01 15:52:31 +08001795static inline bool cpu_has_vmx_flexpriority(void)
1796{
1797 return cpu_has_vmx_tpr_shadow() &&
1798 cpu_has_vmx_virtualize_apic_accesses();
Sheng Yangf78e0e22007-10-29 09:40:42 +08001799}
1800
Marcelo Tosattie7997942009-06-11 12:07:40 -03001801static inline bool cpu_has_vmx_ept_execute_only(void)
1802{
Gui Jianfeng31299942010-03-15 17:29:09 +08001803 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001804}
1805
Marcelo Tosattie7997942009-06-11 12:07:40 -03001806static inline bool cpu_has_vmx_ept_2m_page(void)
1807{
Gui Jianfeng31299942010-03-15 17:29:09 +08001808 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
Marcelo Tosattie7997942009-06-11 12:07:40 -03001809}
1810
Sheng Yang878403b2010-01-05 19:02:29 +08001811static inline bool cpu_has_vmx_ept_1g_page(void)
1812{
Gui Jianfeng31299942010-03-15 17:29:09 +08001813 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
Sheng Yang878403b2010-01-05 19:02:29 +08001814}
1815
Sheng Yang4bc9b982010-06-02 14:05:24 +08001816static inline bool cpu_has_vmx_ept_4levels(void)
1817{
1818 return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
1819}
1820
David Hildenbrand42aa53b2017-08-10 23:15:29 +02001821static inline bool cpu_has_vmx_ept_mt_wb(void)
1822{
1823 return vmx_capability.ept & VMX_EPTP_WB_BIT;
1824}
1825
Yu Zhang855feb62017-08-24 20:27:55 +08001826static inline bool cpu_has_vmx_ept_5levels(void)
1827{
1828 return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
1829}
1830
Xudong Hao83c3a332012-05-28 19:33:35 +08001831static inline bool cpu_has_vmx_ept_ad_bits(void)
1832{
1833 return vmx_capability.ept & VMX_EPT_AD_BIT;
1834}
1835
Gui Jianfeng31299942010-03-15 17:29:09 +08001836static inline bool cpu_has_vmx_invept_context(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001837{
Gui Jianfeng31299942010-03-15 17:29:09 +08001838 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001839}
1840
Gui Jianfeng31299942010-03-15 17:29:09 +08001841static inline bool cpu_has_vmx_invept_global(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001842{
Gui Jianfeng31299942010-03-15 17:29:09 +08001843 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001844}
1845
Liran Aloncd9a4912018-05-22 17:16:15 +03001846static inline bool cpu_has_vmx_invvpid_individual_addr(void)
1847{
1848 return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
1849}
1850
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08001851static inline bool cpu_has_vmx_invvpid_single(void)
1852{
1853 return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
1854}
1855
Gui Jianfengb9d762f2010-06-07 10:32:29 +08001856static inline bool cpu_has_vmx_invvpid_global(void)
1857{
1858 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
1859}
1860
Wanpeng Li08d839c2017-03-23 05:30:08 -07001861static inline bool cpu_has_vmx_invvpid(void)
1862{
1863 return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
1864}
1865
Gui Jianfeng31299942010-03-15 17:29:09 +08001866static inline bool cpu_has_vmx_ept(void)
Sheng Yangd56f5462008-04-25 10:13:16 +08001867{
Sheng Yang04547152009-04-01 15:52:31 +08001868 return vmcs_config.cpu_based_2nd_exec_ctrl &
1869 SECONDARY_EXEC_ENABLE_EPT;
Sheng Yangd56f5462008-04-25 10:13:16 +08001870}
1871
Gui Jianfeng31299942010-03-15 17:29:09 +08001872static inline bool cpu_has_vmx_unrestricted_guest(void)
Nitin A Kamble3a624e22009-06-08 11:34:16 -07001873{
1874 return vmcs_config.cpu_based_2nd_exec_ctrl &
1875 SECONDARY_EXEC_UNRESTRICTED_GUEST;
1876}
1877
Gui Jianfeng31299942010-03-15 17:29:09 +08001878static inline bool cpu_has_vmx_ple(void)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08001879{
1880 return vmcs_config.cpu_based_2nd_exec_ctrl &
1881 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1882}
1883
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03001884static inline bool cpu_has_vmx_basic_inout(void)
1885{
1886 return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
1887}
1888
Paolo Bonzini35754c92015-07-29 12:05:37 +02001889static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08001890{
Paolo Bonzini35754c92015-07-29 12:05:37 +02001891 return flexpriority_enabled && lapic_in_kernel(vcpu);
Sheng Yangf78e0e22007-10-29 09:40:42 +08001892}
1893
Gui Jianfeng31299942010-03-15 17:29:09 +08001894static inline bool cpu_has_vmx_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08001895{
Sheng Yang04547152009-04-01 15:52:31 +08001896 return vmcs_config.cpu_based_2nd_exec_ctrl &
1897 SECONDARY_EXEC_ENABLE_VPID;
Sheng Yang2384d2b2008-01-17 15:14:33 +08001898}
1899
Gui Jianfeng31299942010-03-15 17:29:09 +08001900static inline bool cpu_has_vmx_rdtscp(void)
Sheng Yang4e47c7a2009-12-18 16:48:47 +08001901{
1902 return vmcs_config.cpu_based_2nd_exec_ctrl &
1903 SECONDARY_EXEC_RDTSCP;
1904}
1905
Mao, Junjiead756a12012-07-02 01:18:48 +00001906static inline bool cpu_has_vmx_invpcid(void)
1907{
1908 return vmcs_config.cpu_based_2nd_exec_ctrl &
1909 SECONDARY_EXEC_ENABLE_INVPCID;
1910}
1911
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01001912static inline bool cpu_has_virtual_nmis(void)
1913{
1914 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
1915}
1916
Sheng Yangf5f48ee2010-06-30 12:25:15 +08001917static inline bool cpu_has_vmx_wbinvd_exit(void)
1918{
1919 return vmcs_config.cpu_based_2nd_exec_ctrl &
1920 SECONDARY_EXEC_WBINVD_EXITING;
1921}
1922
Abel Gordonabc4fc52013-04-18 14:35:25 +03001923static inline bool cpu_has_vmx_shadow_vmcs(void)
1924{
1925 u64 vmx_msr;
1926 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
1927 /* check if the cpu supports writing r/o exit information fields */
1928 if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
1929 return false;
1930
1931 return vmcs_config.cpu_based_2nd_exec_ctrl &
1932 SECONDARY_EXEC_SHADOW_VMCS;
1933}
1934
Kai Huang843e4332015-01-28 10:54:28 +08001935static inline bool cpu_has_vmx_pml(void)
1936{
1937 return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1938}
1939
Haozhong Zhang64903d62015-10-20 15:39:09 +08001940static inline bool cpu_has_vmx_tsc_scaling(void)
1941{
1942 return vmcs_config.cpu_based_2nd_exec_ctrl &
1943 SECONDARY_EXEC_TSC_SCALING;
1944}
1945
Bandan Das2a499e42017-08-03 15:54:41 -04001946static inline bool cpu_has_vmx_vmfunc(void)
1947{
1948 return vmcs_config.cpu_based_2nd_exec_ctrl &
1949 SECONDARY_EXEC_ENABLE_VMFUNC;
1950}
1951
Sean Christopherson64f7a112018-04-30 10:01:06 -07001952static bool vmx_umip_emulated(void)
1953{
1954 return vmcs_config.cpu_based_2nd_exec_ctrl &
1955 SECONDARY_EXEC_DESC;
1956}
1957
Sheng Yang04547152009-04-01 15:52:31 +08001958static inline bool report_flexpriority(void)
1959{
1960 return flexpriority_enabled;
1961}
1962
Jim Mattsonc7c2c7092017-05-05 11:28:09 -07001963static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
1964{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01001965 return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
Jim Mattsonc7c2c7092017-05-05 11:28:09 -07001966}
1967
Jim Mattsonf4160e42018-05-29 09:11:33 -07001968/*
1969 * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
1970 * to modify any valid field of the VMCS, or are the VM-exit
1971 * information fields read-only?
1972 */
1973static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
1974{
1975 return to_vmx(vcpu)->nested.msrs.misc_low &
1976 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
1977}
1978
Marc Orr04473782018-06-20 17:21:29 -07001979static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
1980{
1981 return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
1982}
1983
1984static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
1985{
1986 return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
1987 CPU_BASED_MONITOR_TRAP_FLAG;
1988}
1989
Liran Alonfa97d7d2018-07-18 14:07:59 +02001990static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
1991{
1992 return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
1993 SECONDARY_EXEC_SHADOW_VMCS;
1994}
1995
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03001996static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
1997{
1998 return vmcs12->cpu_based_vm_exec_control & bit;
1999}
2000
2001static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
2002{
2003 return (vmcs12->cpu_based_vm_exec_control &
2004 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2005 (vmcs12->secondary_vm_exec_control & bit);
2006}
2007
Jan Kiszkaf41245002014-03-07 20:03:13 +01002008static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
2009{
2010 return vmcs12->pin_based_vm_exec_control &
2011 PIN_BASED_VMX_PREEMPTION_TIMER;
2012}
2013
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05002014static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
2015{
2016 return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
2017}
2018
2019static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
2020{
2021 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
2022}
2023
Nadav Har'El155a97a2013-08-05 11:07:16 +03002024static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
2025{
2026 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
2027}
2028
Wanpeng Li81dc01f2014-12-04 19:11:07 +08002029static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
2030{
Paolo Bonzini3db13482017-08-24 14:48:03 +02002031 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li81dc01f2014-12-04 19:11:07 +08002032}
2033
Bandan Dasc5f983f2017-05-05 15:25:14 -04002034static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
2035{
2036 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
2037}
2038
Wincy Vanf2b93282015-02-03 23:56:03 +08002039static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
2040{
2041 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
2042}
2043
Wanpeng Li5c614b32015-10-13 09:18:36 -07002044static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
2045{
2046 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
2047}
2048
Wincy Van82f0dd42015-02-03 23:57:18 +08002049static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
2050{
2051 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
2052}
2053
Wincy Van608406e2015-02-03 23:57:51 +08002054static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
2055{
2056 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2057}
2058
Wincy Van705699a2015-02-03 23:58:17 +08002059static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
2060{
2061 return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
2062}
2063
Bandan Das27c42a12017-08-03 15:54:42 -04002064static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
2065{
2066 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
2067}
2068
Bandan Das41ab9372017-08-03 15:54:43 -04002069static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
2070{
2071 return nested_cpu_has_vmfunc(vmcs12) &&
2072 (vmcs12->vm_function_control &
2073 VMX_VMFUNC_EPTP_SWITCHING);
2074}
2075
Liran Alonf792d272018-06-23 02:35:05 +03002076static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
2077{
2078 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
2079}
2080
Jim Mattsonef85b672016-12-12 11:01:37 -08002081static inline bool is_nmi(u32 intr_info)
Nadav Har'El644d7112011-05-25 23:12:35 +03002082{
2083 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
Jim Mattsonef85b672016-12-12 11:01:37 -08002084 == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
Nadav Har'El644d7112011-05-25 23:12:35 +03002085}
2086
Jan Kiszka533558b2014-01-04 18:47:20 +01002087static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
2088 u32 exit_intr_info,
2089 unsigned long exit_qualification);
Nadav Har'El7c177932011-05-25 23:12:04 +03002090
Rusty Russell8b9cf982007-07-30 16:31:43 +10002091static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
Avi Kivity7725f0b2006-12-13 00:34:01 -08002092{
2093 int i;
2094
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002095 for (i = 0; i < vmx->nmsrs; ++i)
Avi Kivity26bb0982009-09-07 11:14:12 +03002096 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
Eddie Donga75beee2007-05-17 18:55:15 +03002097 return i;
2098 return -1;
2099}
2100
Uros Bizjak5ebb2722018-10-11 19:40:43 +02002101static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002102{
2103 struct {
2104 u64 vpid : 16;
2105 u64 rsvd : 48;
2106 u64 gva;
2107 } operand = { vpid, 0, gva };
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002108 bool error;
Sheng Yang2384d2b2008-01-17 15:14:33 +08002109
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002110 asm volatile (__ex("invvpid %2, %1") CC_SET(na)
2111 : CC_OUT(na) (error) : "r"(ext), "m"(operand));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002112 BUG_ON(error);
Sheng Yang2384d2b2008-01-17 15:14:33 +08002113}
2114
Uros Bizjak5ebb2722018-10-11 19:40:43 +02002115static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
Sheng Yang14394422008-04-28 12:24:45 +08002116{
2117 struct {
2118 u64 eptp, gpa;
2119 } operand = {eptp, gpa};
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002120 bool error;
Sheng Yang14394422008-04-28 12:24:45 +08002121
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002122 asm volatile (__ex("invept %2, %1") CC_SET(na)
2123 : CC_OUT(na) (error) : "r"(ext), "m"(operand));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002124 BUG_ON(error);
Sheng Yang14394422008-04-28 12:24:45 +08002125}
2126
Avi Kivity26bb0982009-09-07 11:14:12 +03002127static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
Eddie Donga75beee2007-05-17 18:55:15 +03002128{
2129 int i;
2130
Rusty Russell8b9cf982007-07-30 16:31:43 +10002131 i = __find_msr_index(vmx, msr);
Eddie Donga75beee2007-05-17 18:55:15 +03002132 if (i >= 0)
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04002133 return &vmx->guest_msrs[i];
Al Viro8b6d44c2007-02-09 16:38:40 +00002134 return NULL;
Avi Kivity7725f0b2006-12-13 00:34:01 -08002135}
2136
Avi Kivity6aa8b732006-12-10 02:21:36 -08002137static void vmcs_clear(struct vmcs *vmcs)
2138{
2139 u64 phys_addr = __pa(vmcs);
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002140 bool error;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002141
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002142 asm volatile (__ex("vmclear %1") CC_SET(na)
2143 : CC_OUT(na) (error) : "m"(phys_addr));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002144 if (unlikely(error))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002145 printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
2146 vmcs, phys_addr);
2147}
2148
Nadav Har'Eld462b812011-05-24 15:26:10 +03002149static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
2150{
2151 vmcs_clear(loaded_vmcs->vmcs);
Jim Mattson355f4fb2016-10-28 08:29:39 -07002152 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
2153 vmcs_clear(loaded_vmcs->shadow_vmcs);
Nadav Har'Eld462b812011-05-24 15:26:10 +03002154 loaded_vmcs->cpu = -1;
2155 loaded_vmcs->launched = 0;
2156}
2157
Dongxiao Xu7725b892010-05-11 18:29:38 +08002158static void vmcs_load(struct vmcs *vmcs)
2159{
2160 u64 phys_addr = __pa(vmcs);
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002161 bool error;
Dongxiao Xu7725b892010-05-11 18:29:38 +08002162
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002163 if (static_branch_unlikely(&enable_evmcs))
2164 return evmcs_load(phys_addr);
2165
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002166 asm volatile (__ex("vmptrld %1") CC_SET(na)
2167 : CC_OUT(na) (error) : "m"(phys_addr));
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002168 if (unlikely(error))
Nadav Har'El2844d842011-05-25 23:16:40 +03002169 printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
Dongxiao Xu7725b892010-05-11 18:29:38 +08002170 vmcs, phys_addr);
2171}
2172
Dave Young2965faa2015-09-09 15:38:55 -07002173#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002174/*
2175 * This bitmap is used to indicate whether the vmclear
2176 * operation is enabled on all cpus. All disabled by
2177 * default.
2178 */
2179static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
2180
2181static inline void crash_enable_local_vmclear(int cpu)
2182{
2183 cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
2184}
2185
2186static inline void crash_disable_local_vmclear(int cpu)
2187{
2188 cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
2189}
2190
2191static inline int crash_local_vmclear_enabled(int cpu)
2192{
2193 return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
2194}
2195
2196static void crash_vmclear_local_loaded_vmcss(void)
2197{
2198 int cpu = raw_smp_processor_id();
2199 struct loaded_vmcs *v;
2200
2201 if (!crash_local_vmclear_enabled(cpu))
2202 return;
2203
2204 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
2205 loaded_vmcss_on_cpu_link)
2206 vmcs_clear(v->vmcs);
2207}
2208#else
2209static inline void crash_enable_local_vmclear(int cpu) { }
2210static inline void crash_disable_local_vmclear(int cpu) { }
Dave Young2965faa2015-09-09 15:38:55 -07002211#endif /* CONFIG_KEXEC_CORE */
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002212
Nadav Har'Eld462b812011-05-24 15:26:10 +03002213static void __loaded_vmcs_clear(void *arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002214{
Nadav Har'Eld462b812011-05-24 15:26:10 +03002215 struct loaded_vmcs *loaded_vmcs = arg;
Ingo Molnard3b2c332007-01-05 16:36:23 -08002216 int cpu = raw_smp_processor_id();
Avi Kivity6aa8b732006-12-10 02:21:36 -08002217
Nadav Har'Eld462b812011-05-24 15:26:10 +03002218 if (loaded_vmcs->cpu != cpu)
2219 return; /* vcpu migration can race with cpu offline */
2220 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002221 per_cpu(current_vmcs, cpu) = NULL;
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002222 crash_disable_local_vmclear(cpu);
Nadav Har'Eld462b812011-05-24 15:26:10 +03002223 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08002224
2225 /*
2226 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
2227 * is before setting loaded_vmcs->vcpu to -1 which is done in
2228 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
2229 * then adds the vmcs into percpu list before it is deleted.
2230 */
2231 smp_wmb();
2232
Nadav Har'Eld462b812011-05-24 15:26:10 +03002233 loaded_vmcs_init(loaded_vmcs);
Zhang Yanfei8f536b72012-12-06 23:43:34 +08002234 crash_enable_local_vmclear(cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002235}
2236
Nadav Har'Eld462b812011-05-24 15:26:10 +03002237static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002238{
Xiao Guangronge6c7d322012-11-28 20:53:15 +08002239 int cpu = loaded_vmcs->cpu;
2240
2241 if (cpu != -1)
2242 smp_call_function_single(cpu,
2243 __loaded_vmcs_clear, loaded_vmcs, 1);
Avi Kivity8d0be2b2007-02-12 00:54:46 -08002244}
2245
Junaid Shahidfaff8752018-06-29 13:10:05 -07002246static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
2247{
2248 if (vpid == 0)
2249 return true;
2250
2251 if (cpu_has_vmx_invvpid_individual_addr()) {
2252 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
2253 return true;
2254 }
2255
2256 return false;
2257}
2258
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002259static inline void vpid_sync_vcpu_single(int vpid)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002260{
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002261 if (vpid == 0)
Sheng Yang2384d2b2008-01-17 15:14:33 +08002262 return;
2263
Gui Jianfeng518c8ae2010-06-04 08:51:39 +08002264 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002265 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
Sheng Yang2384d2b2008-01-17 15:14:33 +08002266}
2267
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002268static inline void vpid_sync_vcpu_global(void)
2269{
2270 if (cpu_has_vmx_invvpid_global())
2271 __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
2272}
2273
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002274static inline void vpid_sync_context(int vpid)
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002275{
2276 if (cpu_has_vmx_invvpid_single())
Wanpeng Lidd5f5342015-09-23 18:26:57 +08002277 vpid_sync_vcpu_single(vpid);
Gui Jianfengb9d762f2010-06-07 10:32:29 +08002278 else
2279 vpid_sync_vcpu_global();
2280}
2281
Sheng Yang14394422008-04-28 12:24:45 +08002282static inline void ept_sync_global(void)
2283{
David Hildenbrandf5f51582017-08-24 20:51:30 +02002284 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
Sheng Yang14394422008-04-28 12:24:45 +08002285}
2286
2287static inline void ept_sync_context(u64 eptp)
2288{
David Hildenbrand0e1252d2017-08-24 20:51:28 +02002289 if (cpu_has_vmx_invept_context())
2290 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
2291 else
2292 ept_sync_global();
Sheng Yang14394422008-04-28 12:24:45 +08002293}
2294
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002295static __always_inline void vmcs_check16(unsigned long field)
2296{
2297 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2298 "16-bit accessor invalid for 64-bit field");
2299 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2300 "16-bit accessor invalid for 64-bit high field");
2301 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2302 "16-bit accessor invalid for 32-bit high field");
2303 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2304 "16-bit accessor invalid for natural width field");
2305}
2306
2307static __always_inline void vmcs_check32(unsigned long field)
2308{
2309 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2310 "32-bit accessor invalid for 16-bit field");
2311 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2312 "32-bit accessor invalid for natural width field");
2313}
2314
2315static __always_inline void vmcs_check64(unsigned long field)
2316{
2317 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2318 "64-bit accessor invalid for 16-bit field");
2319 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2320 "64-bit accessor invalid for 64-bit high field");
2321 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2322 "64-bit accessor invalid for 32-bit field");
2323 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
2324 "64-bit accessor invalid for natural width field");
2325}
2326
2327static __always_inline void vmcs_checkl(unsigned long field)
2328{
2329 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
2330 "Natural width accessor invalid for 16-bit field");
2331 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
2332 "Natural width accessor invalid for 64-bit field");
2333 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
2334 "Natural width accessor invalid for 64-bit high field");
2335 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
2336 "Natural width accessor invalid for 32-bit field");
2337}
2338
2339static __always_inline unsigned long __vmcs_readl(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002340{
Avi Kivity5e520e62011-05-15 10:13:12 -04002341 unsigned long value;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002342
Uros Bizjak44c2d662018-10-11 19:40:45 +02002343 asm volatile (__ex_clear("vmread %1, %0", "%k0")
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002344 : "=r"(value) : "r"(field));
Avi Kivity6aa8b732006-12-10 02:21:36 -08002345 return value;
2346}
2347
Avi Kivity96304212011-05-15 10:13:13 -04002348static __always_inline u16 vmcs_read16(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002349{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002350 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002351 if (static_branch_unlikely(&enable_evmcs))
2352 return evmcs_read16(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002353 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002354}
2355
Avi Kivity96304212011-05-15 10:13:13 -04002356static __always_inline u32 vmcs_read32(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002357{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002358 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002359 if (static_branch_unlikely(&enable_evmcs))
2360 return evmcs_read32(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002361 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002362}
2363
Avi Kivity96304212011-05-15 10:13:13 -04002364static __always_inline u64 vmcs_read64(unsigned long field)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002365{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002366 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002367 if (static_branch_unlikely(&enable_evmcs))
2368 return evmcs_read64(field);
Avi Kivity05b3e0c2006-12-13 00:33:45 -08002369#ifdef CONFIG_X86_64
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002370 return __vmcs_readl(field);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002371#else
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002372 return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002373#endif
2374}
2375
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002376static __always_inline unsigned long vmcs_readl(unsigned long field)
2377{
2378 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002379 if (static_branch_unlikely(&enable_evmcs))
2380 return evmcs_read64(field);
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002381 return __vmcs_readl(field);
2382}
2383
Avi Kivitye52de1b2007-01-05 16:36:56 -08002384static noinline void vmwrite_error(unsigned long field, unsigned long value)
2385{
2386 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
2387 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
2388 dump_stack();
2389}
2390
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002391static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002392{
Uros Bizjakfd8ca6d2018-08-06 16:42:49 +02002393 bool error;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002394
Uros Bizjak4b1e5472018-10-11 19:40:44 +02002395 asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
2396 : CC_OUT(na) (error) : "r"(field), "rm"(value));
Avi Kivitye52de1b2007-01-05 16:36:56 -08002397 if (unlikely(error))
2398 vmwrite_error(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002399}
2400
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002401static __always_inline void vmcs_write16(unsigned long field, u16 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002402{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002403 vmcs_check16(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002404 if (static_branch_unlikely(&enable_evmcs))
2405 return evmcs_write16(field, value);
2406
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002407 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002408}
2409
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002410static __always_inline void vmcs_write32(unsigned long field, u32 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002411{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002412 vmcs_check32(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002413 if (static_branch_unlikely(&enable_evmcs))
2414 return evmcs_write32(field, value);
2415
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002416 __vmcs_writel(field, value);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002417}
2418
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002419static __always_inline void vmcs_write64(unsigned long field, u64 value)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002420{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002421 vmcs_check64(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002422 if (static_branch_unlikely(&enable_evmcs))
2423 return evmcs_write64(field, value);
2424
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002425 __vmcs_writel(field, value);
Avi Kivity7682f2d2008-05-12 19:25:43 +03002426#ifndef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08002427 asm volatile ("");
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002428 __vmcs_writel(field+1, value >> 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002429#endif
2430}
2431
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002432static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002433{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002434 vmcs_checkl(field);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002435 if (static_branch_unlikely(&enable_evmcs))
2436 return evmcs_write64(field, value);
2437
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002438 __vmcs_writel(field, value);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002439}
2440
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002441static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002442{
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002443 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2444 "vmcs_clear_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002445 if (static_branch_unlikely(&enable_evmcs))
2446 return evmcs_write32(field, evmcs_read32(field) & ~mask);
2447
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002448 __vmcs_writel(field, __vmcs_readl(field) & ~mask);
2449}
2450
2451static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
2452{
2453 BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
2454 "vmcs_set_bits does not support 64-bit fields");
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01002455 if (static_branch_unlikely(&enable_evmcs))
2456 return evmcs_write32(field, evmcs_read32(field) | mask);
2457
Paolo Bonzini8a86aea92015-12-03 15:56:55 +01002458 __vmcs_writel(field, __vmcs_readl(field) | mask);
Anthony Liguori2ab455c2007-04-27 09:29:49 +03002459}
2460
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002461static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
2462{
2463 vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
2464}
2465
Gleb Natapov2961e8762013-11-25 15:37:13 +02002466static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
2467{
2468 vmcs_write32(VM_ENTRY_CONTROLS, val);
2469 vmx->vm_entry_controls_shadow = val;
2470}
2471
2472static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
2473{
2474 if (vmx->vm_entry_controls_shadow != val)
2475 vm_entry_controls_init(vmx, val);
2476}
2477
2478static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
2479{
2480 return vmx->vm_entry_controls_shadow;
2481}
2482
2483
2484static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2485{
2486 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
2487}
2488
2489static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2490{
2491 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
2492}
2493
Paolo Bonzini8391ce42016-07-07 14:58:33 +02002494static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
2495{
2496 vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
2497}
2498
Gleb Natapov2961e8762013-11-25 15:37:13 +02002499static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
2500{
2501 vmcs_write32(VM_EXIT_CONTROLS, val);
2502 vmx->vm_exit_controls_shadow = val;
2503}
2504
2505static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
2506{
2507 if (vmx->vm_exit_controls_shadow != val)
2508 vm_exit_controls_init(vmx, val);
2509}
2510
2511static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
2512{
2513 return vmx->vm_exit_controls_shadow;
2514}
2515
2516
2517static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
2518{
2519 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
2520}
2521
2522static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
2523{
2524 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
2525}
2526
Avi Kivity2fb92db2011-04-27 19:42:18 +03002527static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
2528{
2529 vmx->segment_cache.bitmask = 0;
2530}
2531
2532static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
2533 unsigned field)
2534{
2535 bool ret;
2536 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
2537
2538 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
2539 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
2540 vmx->segment_cache.bitmask = 0;
2541 }
2542 ret = vmx->segment_cache.bitmask & mask;
2543 vmx->segment_cache.bitmask |= mask;
2544 return ret;
2545}
2546
2547static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
2548{
2549 u16 *p = &vmx->segment_cache.seg[seg].selector;
2550
2551 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
2552 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
2553 return *p;
2554}
2555
2556static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
2557{
2558 ulong *p = &vmx->segment_cache.seg[seg].base;
2559
2560 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
2561 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
2562 return *p;
2563}
2564
2565static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
2566{
2567 u32 *p = &vmx->segment_cache.seg[seg].limit;
2568
2569 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
2570 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
2571 return *p;
2572}
2573
2574static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
2575{
2576 u32 *p = &vmx->segment_cache.seg[seg].ar;
2577
2578 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
2579 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
2580 return *p;
2581}
2582
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002583static void update_exception_bitmap(struct kvm_vcpu *vcpu)
2584{
2585 u32 eb;
2586
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002587 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08002588 (1u << DB_VECTOR) | (1u << AC_VECTOR);
Liran Alon9e869482018-03-12 13:12:51 +02002589 /*
2590 * Guest access to VMware backdoor ports could legitimately
2591 * trigger #GP because of TSS I/O permission bitmap.
2592 * We intercept those #GP and allow access to them anyway
2593 * as VMware does.
2594 */
2595 if (enable_vmware_backdoor)
2596 eb |= (1u << GP_VECTOR);
Jan Kiszkafd7373c2010-01-20 18:20:20 +01002597 if ((vcpu->guest_debug &
2598 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
2599 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
2600 eb |= 1u << BP_VECTOR;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03002601 if (to_vmx(vcpu)->rmode.vm86_active)
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002602 eb = ~0;
Avi Kivity089d0342009-03-23 18:26:32 +02002603 if (enable_ept)
Sheng Yang14394422008-04-28 12:24:45 +08002604 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
Nadav Har'El36cf24e2011-05-25 23:15:08 +03002605
2606 /* When we are running a nested L2 guest and L1 specified for it a
2607 * certain exception bitmap, we must trap the same exceptions and pass
2608 * them to L1. When running L2, we will only handle the exceptions
2609 * specified above if L1 did not want them.
2610 */
2611 if (is_guest_mode(vcpu))
2612 eb |= get_vmcs12(vcpu)->exception_bitmap;
2613
Avi Kivityabd3f2d2007-05-02 17:57:40 +03002614 vmcs_write32(EXCEPTION_BITMAP, eb);
2615}
2616
Ashok Raj15d45072018-02-01 22:59:43 +01002617/*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01002618 * Check if MSR is intercepted for currently loaded MSR bitmap.
2619 */
2620static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
2621{
2622 unsigned long *msr_bitmap;
2623 int f = sizeof(unsigned long);
2624
2625 if (!cpu_has_vmx_msr_bitmap())
2626 return true;
2627
2628 msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
2629
2630 if (msr <= 0x1fff) {
2631 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2632 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2633 msr &= 0x1fff;
2634 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2635 }
2636
2637 return true;
2638}
2639
2640/*
Ashok Raj15d45072018-02-01 22:59:43 +01002641 * Check if MSR is intercepted for L01 MSR bitmap.
2642 */
2643static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
2644{
2645 unsigned long *msr_bitmap;
2646 int f = sizeof(unsigned long);
2647
2648 if (!cpu_has_vmx_msr_bitmap())
2649 return true;
2650
2651 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
2652
2653 if (msr <= 0x1fff) {
2654 return !!test_bit(msr, msr_bitmap + 0x800 / f);
2655 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
2656 msr &= 0x1fff;
2657 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
2658 }
2659
2660 return true;
2661}
2662
Gleb Natapov2961e8762013-11-25 15:37:13 +02002663static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2664 unsigned long entry, unsigned long exit)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002665{
Gleb Natapov2961e8762013-11-25 15:37:13 +02002666 vm_entry_controls_clearbit(vmx, entry);
2667 vm_exit_controls_clearbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002668}
2669
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002670static int find_msr(struct vmx_msrs *m, unsigned int msr)
2671{
2672 unsigned int i;
2673
2674 for (i = 0; i < m->nr; ++i) {
2675 if (m->val[i].index == msr)
2676 return i;
2677 }
2678 return -ENOENT;
2679}
2680
Avi Kivity61d2ef22010-04-28 16:40:38 +03002681static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
2682{
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002683 int i;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002684 struct msr_autoload *m = &vmx->msr_autoload;
2685
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002686 switch (msr) {
2687 case MSR_EFER:
2688 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002689 clear_atomic_switch_msr_special(vmx,
2690 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002691 VM_EXIT_LOAD_IA32_EFER);
2692 return;
2693 }
2694 break;
2695 case MSR_CORE_PERF_GLOBAL_CTRL:
2696 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002697 clear_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002698 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2699 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
2700 return;
2701 }
2702 break;
Avi Kivity110312c2010-12-21 12:54:20 +02002703 }
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002704 i = find_msr(&m->guest, msr);
2705 if (i < 0)
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002706 goto skip_guest;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002707 --m->guest.nr;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002708 m->guest.val[i] = m->guest.val[m->guest.nr];
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002709 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
Avi Kivity110312c2010-12-21 12:54:20 +02002710
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002711skip_guest:
2712 i = find_msr(&m->host, msr);
2713 if (i < 0)
Avi Kivity61d2ef22010-04-28 16:40:38 +03002714 return;
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002715
2716 --m->host.nr;
2717 m->host.val[i] = m->host.val[m->host.nr];
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002718 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002719}
2720
Gleb Natapov2961e8762013-11-25 15:37:13 +02002721static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
2722 unsigned long entry, unsigned long exit,
2723 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
2724 u64 guest_val, u64 host_val)
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002725{
2726 vmcs_write64(guest_val_vmcs, guest_val);
Sean Christopherson5a5e8a12018-09-26 09:23:56 -07002727 if (host_val_vmcs != HOST_IA32_EFER)
2728 vmcs_write64(host_val_vmcs, host_val);
Gleb Natapov2961e8762013-11-25 15:37:13 +02002729 vm_entry_controls_setbit(vmx, entry);
2730 vm_exit_controls_setbit(vmx, exit);
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002731}
2732
Avi Kivity61d2ef22010-04-28 16:40:38 +03002733static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002734 u64 guest_val, u64 host_val, bool entry_only)
Avi Kivity61d2ef22010-04-28 16:40:38 +03002735{
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002736 int i, j = 0;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002737 struct msr_autoload *m = &vmx->msr_autoload;
2738
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002739 switch (msr) {
2740 case MSR_EFER:
2741 if (cpu_has_load_ia32_efer) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002742 add_atomic_switch_msr_special(vmx,
2743 VM_ENTRY_LOAD_IA32_EFER,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002744 VM_EXIT_LOAD_IA32_EFER,
2745 GUEST_IA32_EFER,
2746 HOST_IA32_EFER,
2747 guest_val, host_val);
2748 return;
2749 }
2750 break;
2751 case MSR_CORE_PERF_GLOBAL_CTRL:
2752 if (cpu_has_load_perf_global_ctrl) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02002753 add_atomic_switch_msr_special(vmx,
Gleb Natapov8bf00a52011-10-05 14:01:22 +02002754 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
2755 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
2756 GUEST_IA32_PERF_GLOBAL_CTRL,
2757 HOST_IA32_PERF_GLOBAL_CTRL,
2758 guest_val, host_val);
2759 return;
2760 }
2761 break;
Radim Krčmář7099e2e2016-03-04 15:08:42 +01002762 case MSR_IA32_PEBS_ENABLE:
2763 /* PEBS needs a quiescent period after being disabled (to write
2764 * a record). Disabling PEBS through VMX MSR swapping doesn't
2765 * provide that period, so a CPU could write host's record into
2766 * guest's memory.
2767 */
2768 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
Avi Kivity110312c2010-12-21 12:54:20 +02002769 }
2770
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002771 i = find_msr(&m->guest, msr);
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002772 if (!entry_only)
2773 j = find_msr(&m->host, msr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002774
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002775 if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) {
Michael S. Tsirkin60266202013-10-31 00:34:56 +02002776 printk_once(KERN_WARNING "Not enough msr switch entries. "
Gleb Natapove7fc6f93b2011-10-05 14:01:24 +02002777 "Can't add msr %x\n", msr);
2778 return;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002779 }
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002780 if (i < 0) {
Konrad Rzeszutek Wilkca83b4a2018-06-20 20:11:39 -04002781 i = m->guest.nr++;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002782 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002783 }
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002784 m->guest.val[i].index = msr;
2785 m->guest.val[i].value = guest_val;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002786
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002787 if (entry_only)
2788 return;
2789
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002790 if (j < 0) {
2791 j = m->host.nr++;
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04002792 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
Avi Kivity61d2ef22010-04-28 16:40:38 +03002793 }
Konrad Rzeszutek Wilk31907092018-06-20 22:00:47 -04002794 m->host.val[j].index = msr;
2795 m->host.val[j].value = host_val;
Avi Kivity61d2ef22010-04-28 16:40:38 +03002796}
2797
Avi Kivity92c0d902009-10-29 11:00:16 +02002798static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
Eddie Dong2cc51562007-05-21 07:28:09 +03002799{
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002800 u64 guest_efer = vmx->vcpu.arch.efer;
2801 u64 ignore_bits = 0;
Eddie Dong2cc51562007-05-21 07:28:09 +03002802
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002803 if (!enable_ept) {
2804 /*
2805 * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
2806 * host CPUID is more efficient than testing guest CPUID
2807 * or CR4. Host SMEP is anyway a requirement for guest SMEP.
2808 */
2809 if (boot_cpu_has(X86_FEATURE_SMEP))
2810 guest_efer |= EFER_NX;
2811 else if (!(guest_efer & EFER_NX))
2812 ignore_bits |= EFER_NX;
2813 }
Roel Kluin3a34a882009-08-04 02:08:45 -07002814
Avi Kivity51c6cf62007-08-29 03:48:05 +03002815 /*
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002816 * LMA and LME handled by hardware; SCE meaningless outside long mode.
Avi Kivity51c6cf62007-08-29 03:48:05 +03002817 */
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002818 ignore_bits |= EFER_SCE;
Avi Kivity51c6cf62007-08-29 03:48:05 +03002819#ifdef CONFIG_X86_64
2820 ignore_bits |= EFER_LMA | EFER_LME;
2821 /* SCE is meaningful only in long mode on Intel */
2822 if (guest_efer & EFER_LMA)
2823 ignore_bits &= ~(u64)EFER_SCE;
2824#endif
Avi Kivity84ad33e2010-04-28 16:42:29 +03002825
Andy Lutomirskif6577a5f2014-11-07 18:25:18 -08002826 /*
2827 * On EPT, we can't emulate NX, so we must switch EFER atomically.
2828 * On CPUs that support "load IA32_EFER", always switch EFER
2829 * atomically, since it's faster than switching it manually.
2830 */
2831 if (cpu_has_load_ia32_efer ||
2832 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
Avi Kivity84ad33e2010-04-28 16:42:29 +03002833 if (!(guest_efer & EFER_LMA))
2834 guest_efer &= ~EFER_LME;
Andy Lutomirski54b98bf2014-11-10 11:19:15 -08002835 if (guest_efer != host_efer)
2836 add_atomic_switch_msr(vmx, MSR_EFER,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04002837 guest_efer, host_efer, false);
Sean Christopherson02343cf2018-09-26 09:23:43 -07002838 else
2839 clear_atomic_switch_msr(vmx, MSR_EFER);
Avi Kivity84ad33e2010-04-28 16:42:29 +03002840 return false;
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002841 } else {
Sean Christopherson02343cf2018-09-26 09:23:43 -07002842 clear_atomic_switch_msr(vmx, MSR_EFER);
2843
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002844 guest_efer &= ~ignore_bits;
2845 guest_efer |= host_efer & ignore_bits;
Avi Kivity84ad33e2010-04-28 16:42:29 +03002846
Paolo Bonzini844a5fe2016-03-08 12:13:39 +01002847 vmx->guest_msrs[efer_offset].data = guest_efer;
2848 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
2849
2850 return true;
2851 }
Avi Kivity51c6cf62007-08-29 03:48:05 +03002852}
2853
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002854#ifdef CONFIG_X86_32
2855/*
2856 * On 32-bit kernels, VM exits still load the FS and GS bases from the
2857 * VMCS rather than the segment table. KVM uses this helper to figure
2858 * out the current bases to poke them into the VMCS before entry.
2859 */
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002860static unsigned long segment_base(u16 selector)
2861{
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002862 struct desc_struct *table;
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002863 unsigned long v;
2864
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002865 if (!(selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002866 return 0;
2867
Thomas Garnier45fc8752017-03-14 10:05:08 -07002868 table = get_current_gdt_ro();
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002869
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002870 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002871 u16 ldt_selector = kvm_read_ldt();
2872
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002873 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002874 return 0;
2875
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002876 table = (struct desc_struct *)segment_base(ldt_selector);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002877 }
Andy Lutomirski8c2e41f2017-02-20 08:56:12 -08002878 v = get_desc_base(&table[selector >> 3]);
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002879 return v;
2880}
Andy Lutomirskie28baea2017-02-20 08:56:11 -08002881#endif
Gleb Natapov2d49ec72010-02-25 12:43:09 +02002882
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002883static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
Avi Kivity33ed6322007-05-02 16:54:03 +03002884{
Avi Kivity04d2cc72007-09-10 18:10:54 +03002885 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002886 struct vmcs_host_state *host_state;
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002887#ifdef CONFIG_X86_64
Vitaly Kuznetsov35060ed2018-03-13 18:48:05 +01002888 int cpu = raw_smp_processor_id();
Arnd Bergmann51e8a8c2018-04-04 12:44:14 +02002889#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002890 unsigned long fs_base, gs_base;
2891 u16 fs_sel, gs_sel;
Avi Kivity26bb0982009-09-07 11:14:12 +03002892 int i;
Avi Kivity04d2cc72007-09-10 18:10:54 +03002893
Sean Christophersond264ee02018-08-27 15:21:12 -07002894 vmx->req_immediate_exit = false;
2895
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002896 if (vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002897 return;
2898
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002899 vmx->loaded_cpu_state = vmx->loaded_vmcs;
Sean Christophersond7ee0392018-07-23 12:32:47 -07002900 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002901
Avi Kivity33ed6322007-05-02 16:54:03 +03002902 /*
2903 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
2904 * allow segment selectors with cpl > 0 or ti == 1.
2905 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07002906 host_state->ldt_sel = kvm_read_ldt();
Vitaly Kuznetsov42b933b2018-03-13 18:48:04 +01002907
2908#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002909 savesegment(ds, host_state->ds_sel);
2910 savesegment(es, host_state->es_sel);
Sean Christophersone368b872018-07-23 12:32:41 -07002911
2912 gs_base = cpu_kernelmode_gs_base(cpu);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002913 if (likely(is_64bit_mm(current->mm))) {
2914 save_fsgs_for_kvm();
Sean Christophersone368b872018-07-23 12:32:41 -07002915 fs_sel = current->thread.fsindex;
2916 gs_sel = current->thread.gsindex;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002917 fs_base = current->thread.fsbase;
Sean Christophersone368b872018-07-23 12:32:41 -07002918 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002919 } else {
Sean Christophersone368b872018-07-23 12:32:41 -07002920 savesegment(fs, fs_sel);
2921 savesegment(gs, gs_sel);
Vitaly Kuznetsovb062b792018-07-11 19:37:18 +02002922 fs_base = read_msr(MSR_FS_BASE);
Sean Christophersone368b872018-07-23 12:32:41 -07002923 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
Avi Kivity33ed6322007-05-02 16:54:03 +03002924 }
2925
Paolo Bonzini4679b612018-09-24 17:23:01 +02002926 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
Avi Kivity33ed6322007-05-02 16:54:03 +03002927#else
Sean Christophersone368b872018-07-23 12:32:41 -07002928 savesegment(fs, fs_sel);
2929 savesegment(gs, gs_sel);
2930 fs_base = segment_base(fs_sel);
2931 gs_base = segment_base(gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002932#endif
Sean Christophersone368b872018-07-23 12:32:41 -07002933
Sean Christopherson8f21a0b2018-07-23 12:32:49 -07002934 if (unlikely(fs_sel != host_state->fs_sel)) {
2935 if (!(fs_sel & 7))
2936 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
2937 else
2938 vmcs_write16(HOST_FS_SELECTOR, 0);
2939 host_state->fs_sel = fs_sel;
2940 }
2941 if (unlikely(gs_sel != host_state->gs_sel)) {
2942 if (!(gs_sel & 7))
2943 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
2944 else
2945 vmcs_write16(HOST_GS_SELECTOR, 0);
2946 host_state->gs_sel = gs_sel;
2947 }
Sean Christopherson5e079c72018-07-23 12:32:50 -07002948 if (unlikely(fs_base != host_state->fs_base)) {
2949 vmcs_writel(HOST_FS_BASE, fs_base);
2950 host_state->fs_base = fs_base;
2951 }
2952 if (unlikely(gs_base != host_state->gs_base)) {
2953 vmcs_writel(HOST_GS_BASE, gs_base);
2954 host_state->gs_base = gs_base;
2955 }
Avi Kivity33ed6322007-05-02 16:54:03 +03002956
Avi Kivity26bb0982009-09-07 11:14:12 +03002957 for (i = 0; i < vmx->save_nmsrs; ++i)
2958 kvm_set_shared_msr(vmx->guest_msrs[i].index,
Avi Kivityd5696722009-12-02 12:28:47 +02002959 vmx->guest_msrs[i].data,
2960 vmx->guest_msrs[i].mask);
Avi Kivity33ed6322007-05-02 16:54:03 +03002961}
2962
Sean Christopherson6d6095b2018-07-23 12:32:44 -07002963static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
Avi Kivity33ed6322007-05-02 16:54:03 +03002964{
Sean Christophersond7ee0392018-07-23 12:32:47 -07002965 struct vmcs_host_state *host_state;
2966
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002967 if (!vmx->loaded_cpu_state)
Avi Kivity33ed6322007-05-02 16:54:03 +03002968 return;
2969
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002970 WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
Sean Christophersond7ee0392018-07-23 12:32:47 -07002971 host_state = &vmx->loaded_cpu_state->host_state;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002972
Avi Kivitye1beb1d2007-11-18 13:50:24 +02002973 ++vmx->vcpu.stat.host_state_reload;
Sean Christophersonbd9966d2018-07-23 12:32:42 -07002974 vmx->loaded_cpu_state = NULL;
2975
Avi Kivityc8770e72010-11-11 12:37:26 +02002976#ifdef CONFIG_X86_64
Paolo Bonzini4679b612018-09-24 17:23:01 +02002977 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
Avi Kivityc8770e72010-11-11 12:37:26 +02002978#endif
Sean Christophersond7ee0392018-07-23 12:32:47 -07002979 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2980 kvm_load_ldt(host_state->ldt_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002981#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002982 load_gs_index(host_state->gs_sel);
Avi Kivity9581d442010-10-19 16:46:55 +02002983#else
Sean Christophersond7ee0392018-07-23 12:32:47 -07002984 loadsegment(gs, host_state->gs_sel);
Avi Kivity33ed6322007-05-02 16:54:03 +03002985#endif
Avi Kivity33ed6322007-05-02 16:54:03 +03002986 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07002987 if (host_state->fs_sel & 7)
2988 loadsegment(fs, host_state->fs_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002989#ifdef CONFIG_X86_64
Sean Christophersond7ee0392018-07-23 12:32:47 -07002990 if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2991 loadsegment(ds, host_state->ds_sel);
2992 loadsegment(es, host_state->es_sel);
Avi Kivityb2da15a2012-05-13 19:53:24 +03002993 }
Avi Kivityb2da15a2012-05-13 19:53:24 +03002994#endif
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08002995 invalidate_tss_limit();
Avi Kivity44ea2b12009-09-06 15:55:37 +03002996#ifdef CONFIG_X86_64
Avi Kivityc8770e72010-11-11 12:37:26 +02002997 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
Avi Kivity44ea2b12009-09-06 15:55:37 +03002998#endif
Thomas Garnier45fc8752017-03-14 10:05:08 -07002999 load_fixmap_gdt(raw_smp_processor_id());
Avi Kivity33ed6322007-05-02 16:54:03 +03003000}
3001
Sean Christopherson678e3152018-07-23 12:32:43 -07003002#ifdef CONFIG_X86_64
3003static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
Avi Kivitya9b21b62008-06-24 11:48:49 +03003004{
Paolo Bonzini4679b612018-09-24 17:23:01 +02003005 preempt_disable();
3006 if (vmx->loaded_cpu_state)
3007 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
3008 preempt_enable();
Sean Christopherson678e3152018-07-23 12:32:43 -07003009 return vmx->msr_guest_kernel_gs_base;
Avi Kivitya9b21b62008-06-24 11:48:49 +03003010}
3011
Sean Christopherson678e3152018-07-23 12:32:43 -07003012static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
3013{
Paolo Bonzini4679b612018-09-24 17:23:01 +02003014 preempt_disable();
3015 if (vmx->loaded_cpu_state)
3016 wrmsrl(MSR_KERNEL_GS_BASE, data);
3017 preempt_enable();
Sean Christopherson678e3152018-07-23 12:32:43 -07003018 vmx->msr_guest_kernel_gs_base = data;
3019}
3020#endif
3021
Feng Wu28b835d2015-09-18 22:29:54 +08003022static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
3023{
3024 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
3025 struct pi_desc old, new;
3026 unsigned int dest;
3027
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003028 /*
3029 * In case of hot-plug or hot-unplug, we may have to undo
3030 * vmx_vcpu_pi_put even if there is no assigned device. And we
3031 * always keep PI.NDST up to date for simplicity: it makes the
3032 * code easier, and CPU migration is not a fast path.
3033 */
3034 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
Feng Wu28b835d2015-09-18 22:29:54 +08003035 return;
3036
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003037 /*
3038 * First handle the simple case where no cmpxchg is necessary; just
3039 * allow posting non-urgent interrupts.
3040 *
3041 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
3042 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
3043 * expects the VCPU to be on the blocked_vcpu_list that matches
3044 * PI.NDST.
3045 */
3046 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
3047 vcpu->cpu == cpu) {
3048 pi_clear_sn(pi_desc);
3049 return;
3050 }
3051
3052 /* The full case. */
Feng Wu28b835d2015-09-18 22:29:54 +08003053 do {
3054 old.control = new.control = pi_desc->control;
3055
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003056 dest = cpu_physical_id(cpu);
Feng Wu28b835d2015-09-18 22:29:54 +08003057
Paolo Bonzini31afb2e2017-06-06 12:57:06 +02003058 if (x2apic_enabled())
3059 new.ndst = dest;
3060 else
3061 new.ndst = (dest << 8) & 0xFF00;
Feng Wu28b835d2015-09-18 22:29:54 +08003062
Feng Wu28b835d2015-09-18 22:29:54 +08003063 new.sn = 0;
Paolo Bonzinic0a16662017-09-28 17:58:41 +02003064 } while (cmpxchg64(&pi_desc->control, old.control,
3065 new.control) != old.control);
Feng Wu28b835d2015-09-18 22:29:54 +08003066}
Xiao Guangrong1be0e612016-03-22 16:51:18 +08003067
Peter Feinerc95ba922016-08-17 09:36:47 -07003068static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
3069{
3070 vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
3071 vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
3072}
3073
Avi Kivity6aa8b732006-12-10 02:21:36 -08003074/*
3075 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
3076 * vcpu mutex is already taken.
3077 */
Avi Kivity15ad7142007-07-11 18:17:21 +03003078static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003079{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003080 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003081 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003082
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003083 if (!already_loaded) {
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01003084 loaded_vmcs_clear(vmx->loaded_vmcs);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003085 local_irq_disable();
Zhang Yanfei8f536b72012-12-06 23:43:34 +08003086 crash_disable_local_vmclear(cpu);
Xiao Guangrong5a560f82012-11-28 20:54:14 +08003087
3088 /*
3089 * Read loaded_vmcs->cpu should be before fetching
3090 * loaded_vmcs->loaded_vmcss_on_cpu_link.
3091 * See the comments in __loaded_vmcs_clear().
3092 */
3093 smp_rmb();
3094
Nadav Har'Eld462b812011-05-24 15:26:10 +03003095 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
3096 &per_cpu(loaded_vmcss_on_cpu, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08003097 crash_enable_local_vmclear(cpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003098 local_irq_enable();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003099 }
3100
3101 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
3102 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
3103 vmcs_load(vmx->loaded_vmcs->vmcs);
Ashok Raj15d45072018-02-01 22:59:43 +01003104 indirect_branch_prediction_barrier();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003105 }
3106
3107 if (!already_loaded) {
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07003108 void *gdt = get_current_gdt_ro();
Jim Mattsonb80c76e2016-07-29 18:56:53 -07003109 unsigned long sysenter_esp;
3110
3111 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Dongxiao Xu92fe13b2010-05-11 18:29:42 +08003112
Avi Kivity6aa8b732006-12-10 02:21:36 -08003113 /*
3114 * Linux uses per-cpu TSS and GDT, so set these when switching
Andy Lutomirskie0c23062017-02-20 08:56:10 -08003115 * processors. See 22.2.4.
Avi Kivity6aa8b732006-12-10 02:21:36 -08003116 */
Andy Lutomirskie0c23062017-02-20 08:56:10 -08003117 vmcs_writel(HOST_TR_BASE,
Andy Lutomirski72f5e082017-12-04 15:07:20 +01003118 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
Andy Lutomirski59c58ceb2017-03-22 14:32:33 -07003119 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08003120
Andy Lutomirskib7ffc442017-02-20 08:56:14 -08003121 /*
3122 * VM exits change the host TR limit to 0x67 after a VM
3123 * exit. This is okay, since 0x67 covers everything except
3124 * the IO bitmap and have have code to handle the IO bitmap
3125 * being lost after a VM exit.
3126 */
3127 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
3128
Avi Kivity6aa8b732006-12-10 02:21:36 -08003129 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
3130 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
Haozhong Zhangff2c3a12015-10-20 15:39:10 +08003131
Nadav Har'Eld462b812011-05-24 15:26:10 +03003132 vmx->loaded_vmcs->cpu = cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003133 }
Feng Wu28b835d2015-09-18 22:29:54 +08003134
Owen Hofmann2680d6d2016-03-01 13:36:13 -08003135 /* Setup TSC multiplier */
3136 if (kvm_has_tsc_control &&
Peter Feinerc95ba922016-08-17 09:36:47 -07003137 vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
3138 decache_tsc_multiplier(vmx);
Owen Hofmann2680d6d2016-03-01 13:36:13 -08003139
Feng Wu28b835d2015-09-18 22:29:54 +08003140 vmx_vcpu_pi_load(vcpu, cpu);
Xiao Guangrong1be0e612016-03-22 16:51:18 +08003141 vmx->host_pkru = read_pkru();
Wanpeng Li74c55932017-11-29 01:31:20 -08003142 vmx->host_debugctlmsr = get_debugctlmsr();
Feng Wu28b835d2015-09-18 22:29:54 +08003143}
3144
3145static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
3146{
3147 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
3148
3149 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +08003150 !irq_remapping_cap(IRQ_POSTING_CAP) ||
3151 !kvm_vcpu_apicv_active(vcpu))
Feng Wu28b835d2015-09-18 22:29:54 +08003152 return;
3153
3154 /* Set SN when the vCPU is preempted */
3155 if (vcpu->preempted)
3156 pi_set_sn(pi_desc);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003157}
3158
3159static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
3160{
Feng Wu28b835d2015-09-18 22:29:54 +08003161 vmx_vcpu_pi_put(vcpu);
3162
Sean Christopherson6d6095b2018-07-23 12:32:44 -07003163 vmx_prepare_switch_to_host(to_vmx(vcpu));
Avi Kivity6aa8b732006-12-10 02:21:36 -08003164}
3165
Wanpeng Lif244dee2017-07-20 01:11:54 -07003166static bool emulation_required(struct kvm_vcpu *vcpu)
3167{
3168 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
3169}
3170
Avi Kivityedcafe32009-12-30 18:07:40 +02003171static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
3172
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03003173/*
3174 * Return the cr0 value that a nested guest would read. This is a combination
3175 * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
3176 * its hypervisor (cr0_read_shadow).
3177 */
3178static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
3179{
3180 return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
3181 (fields->cr0_read_shadow & fields->cr0_guest_host_mask);
3182}
3183static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
3184{
3185 return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
3186 (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
3187}
3188
Avi Kivity6aa8b732006-12-10 02:21:36 -08003189static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
3190{
Avi Kivity78ac8b42010-04-08 18:19:35 +03003191 unsigned long rflags, save_rflags;
Avi Kivity345dcaa2009-08-12 15:29:37 +03003192
Avi Kivity6de12732011-03-07 12:51:22 +02003193 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
3194 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
3195 rflags = vmcs_readl(GUEST_RFLAGS);
3196 if (to_vmx(vcpu)->rmode.vm86_active) {
3197 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
3198 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
3199 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
3200 }
3201 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003202 }
Avi Kivity6de12732011-03-07 12:51:22 +02003203 return to_vmx(vcpu)->rflags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003204}
3205
3206static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3207{
Wanpeng Lif244dee2017-07-20 01:11:54 -07003208 unsigned long old_rflags = vmx_get_rflags(vcpu);
3209
Avi Kivity6de12732011-03-07 12:51:22 +02003210 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
3211 to_vmx(vcpu)->rflags = rflags;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003212 if (to_vmx(vcpu)->rmode.vm86_active) {
3213 to_vmx(vcpu)->rmode.save_rflags = rflags;
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01003214 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity78ac8b42010-04-08 18:19:35 +03003215 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003216 vmcs_writel(GUEST_RFLAGS, rflags);
Wanpeng Lif244dee2017-07-20 01:11:54 -07003217
3218 if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
3219 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003220}
3221
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003222static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003223{
3224 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3225 int ret = 0;
3226
3227 if (interruptibility & GUEST_INTR_STATE_STI)
Jan Kiszka48005f62010-02-19 19:38:07 +01003228 ret |= KVM_X86_SHADOW_INT_STI;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003229 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
Jan Kiszka48005f62010-02-19 19:38:07 +01003230 ret |= KVM_X86_SHADOW_INT_MOV_SS;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003231
Paolo Bonzini37ccdcb2014-05-20 14:29:47 +02003232 return ret;
Glauber Costa2809f5d2009-05-12 16:21:05 -04003233}
3234
3235static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
3236{
3237 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3238 u32 interruptibility = interruptibility_old;
3239
3240 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
3241
Jan Kiszka48005f62010-02-19 19:38:07 +01003242 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003243 interruptibility |= GUEST_INTR_STATE_MOV_SS;
Jan Kiszka48005f62010-02-19 19:38:07 +01003244 else if (mask & KVM_X86_SHADOW_INT_STI)
Glauber Costa2809f5d2009-05-12 16:21:05 -04003245 interruptibility |= GUEST_INTR_STATE_STI;
3246
3247 if ((interruptibility != interruptibility_old))
3248 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
3249}
3250
Avi Kivity6aa8b732006-12-10 02:21:36 -08003251static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
3252{
3253 unsigned long rip;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003254
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003255 rip = kvm_rip_read(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003256 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003257 kvm_rip_write(vcpu, rip);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003258
Glauber Costa2809f5d2009-05-12 16:21:05 -04003259 /* skipping an emulated instruction also counts */
3260 vmx_set_interrupt_shadow(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003261}
3262
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003263static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3264 unsigned long exit_qual)
3265{
3266 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3267 unsigned int nr = vcpu->arch.exception.nr;
3268 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3269
3270 if (vcpu->arch.exception.has_error_code) {
3271 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3272 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3273 }
3274
3275 if (kvm_exception_is_soft(nr))
3276 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3277 else
3278 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3279
3280 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3281 vmx_get_nmi_mask(vcpu))
3282 intr_info |= INTR_INFO_UNBLOCK_NMI;
3283
3284 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3285}
3286
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003287/*
3288 * KVM wants to inject page-faults which it got to the guest. This function
3289 * checks whether in a nested guest, we need to inject them to L1 or L2.
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003290 */
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003291static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003292{
3293 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003294 unsigned int nr = vcpu->arch.exception.nr;
Jim Mattsonda998b42018-10-16 14:29:22 -07003295 bool has_payload = vcpu->arch.exception.has_payload;
3296 unsigned long payload = vcpu->arch.exception.payload;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003297
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003298 if (nr == PF_VECTOR) {
3299 if (vcpu->arch.exception.nested_apf) {
Wanpeng Libfcf83b2017-08-24 03:35:11 -07003300 *exit_qual = vcpu->arch.apf.nested_apf_token;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003301 return 1;
3302 }
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003303 if (nested_vmx_is_page_fault_vmexit(vmcs12,
3304 vcpu->arch.exception.error_code)) {
Jim Mattsonda998b42018-10-16 14:29:22 -07003305 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003306 return 1;
3307 }
Jim Mattsonf10c7292018-10-16 14:29:23 -07003308 } else if (vmcs12->exception_bitmap & (1u << nr)) {
3309 if (nr == DB_VECTOR) {
3310 if (!has_payload) {
3311 payload = vcpu->arch.dr6;
3312 payload &= ~(DR6_FIXED_1 | DR6_BT);
3313 payload ^= DR6_RTM;
Jim Mattsoncfb634f2018-09-21 10:36:17 -07003314 }
Jim Mattsonf10c7292018-10-16 14:29:23 -07003315 *exit_qual = payload;
3316 } else
3317 *exit_qual = 0;
3318 return 1;
Wanpeng Liadfe20f2017-07-13 18:30:41 -07003319 }
3320
Paolo Bonzinib96fb432017-07-27 12:29:32 +02003321 return 0;
Nadav Har'El0b6ac342011-05-25 23:13:36 +03003322}
3323
Wanpeng Licaa057a2018-03-12 04:53:03 -07003324static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
3325{
3326 /*
3327 * Ensure that we clear the HLT state in the VMCS. We don't need to
3328 * explicitly skip the instruction because if the HLT state is set,
3329 * then the instruction is already executing and RIP has already been
3330 * advanced.
3331 */
3332 if (kvm_hlt_in_guest(vcpu->kvm) &&
3333 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
3334 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
3335}
3336
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003337static void vmx_queue_exception(struct kvm_vcpu *vcpu)
Avi Kivity298101d2007-11-25 13:41:11 +02003338{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003339 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003340 unsigned nr = vcpu->arch.exception.nr;
3341 bool has_error_code = vcpu->arch.exception.has_error_code;
Wanpeng Licfcd20e2017-07-13 18:30:39 -07003342 u32 error_code = vcpu->arch.exception.error_code;
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003343 u32 intr_info = nr | INTR_INFO_VALID_MASK;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003344
Jim Mattsonda998b42018-10-16 14:29:22 -07003345 kvm_deliver_exception_payload(vcpu);
3346
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003347 if (has_error_code) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003348 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003349 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3350 }
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003351
Avi Kivity7ffd92c2009-06-09 14:10:45 +03003352 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05003353 int inc_eip = 0;
3354 if (kvm_exception_is_soft(nr))
3355 inc_eip = vcpu->arch.event_exit_inst_len;
3356 if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02003357 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka77ab6db2008-07-14 12:28:51 +02003358 return;
3359 }
3360
Sean Christophersonadd5ff72018-03-23 09:34:00 -07003361 WARN_ON_ONCE(vmx->emulation_required);
3362
Gleb Natapov66fd3f72009-05-11 13:35:50 +03003363 if (kvm_exception_is_soft(nr)) {
3364 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
3365 vmx->vcpu.arch.event_exit_inst_len);
Jan Kiszka8ab2d2e2008-12-15 13:52:10 +01003366 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3367 } else
3368 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3369
3370 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
Wanpeng Licaa057a2018-03-12 04:53:03 -07003371
3372 vmx_clear_hlt(vcpu);
Avi Kivity298101d2007-11-25 13:41:11 +02003373}
3374
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003375static bool vmx_rdtscp_supported(void)
3376{
3377 return cpu_has_vmx_rdtscp();
3378}
3379
Mao, Junjiead756a12012-07-02 01:18:48 +00003380static bool vmx_invpcid_supported(void)
3381{
Junaid Shahideb4b2482018-06-27 14:59:14 -07003382 return cpu_has_vmx_invpcid();
Mao, Junjiead756a12012-07-02 01:18:48 +00003383}
3384
Avi Kivity6aa8b732006-12-10 02:21:36 -08003385/*
Eddie Donga75beee2007-05-17 18:55:15 +03003386 * Swap MSR entry in host/guest MSR entry array.
3387 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003388static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
Eddie Donga75beee2007-05-17 18:55:15 +03003389{
Avi Kivity26bb0982009-09-07 11:14:12 +03003390 struct shared_msr_entry tmp;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04003391
3392 tmp = vmx->guest_msrs[to];
3393 vmx->guest_msrs[to] = vmx->guest_msrs[from];
3394 vmx->guest_msrs[from] = tmp;
Eddie Donga75beee2007-05-17 18:55:15 +03003395}
3396
3397/*
Avi Kivitye38aea32007-04-19 13:22:48 +03003398 * Set up the vmcs to automatically save and restore system
3399 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
3400 * mode, as fiddling with msrs is very expensive.
3401 */
Rusty Russell8b9cf982007-07-30 16:31:43 +10003402static void setup_msrs(struct vcpu_vmx *vmx)
Avi Kivitye38aea32007-04-19 13:22:48 +03003403{
Avi Kivity26bb0982009-09-07 11:14:12 +03003404 int save_nmsrs, index;
Avi Kivitye38aea32007-04-19 13:22:48 +03003405
Eddie Donga75beee2007-05-17 18:55:15 +03003406 save_nmsrs = 0;
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003407#ifdef CONFIG_X86_64
Rusty Russell8b9cf982007-07-30 16:31:43 +10003408 if (is_long_mode(&vmx->vcpu)) {
Rusty Russell8b9cf982007-07-30 16:31:43 +10003409 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
Eddie Donga75beee2007-05-17 18:55:15 +03003410 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003411 move_msr_up(vmx, index, save_nmsrs++);
3412 index = __find_msr_index(vmx, MSR_LSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003413 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003414 move_msr_up(vmx, index, save_nmsrs++);
3415 index = __find_msr_index(vmx, MSR_CSTAR);
Eddie Donga75beee2007-05-17 18:55:15 +03003416 if (index >= 0)
Rusty Russell8b9cf982007-07-30 16:31:43 +10003417 move_msr_up(vmx, index, save_nmsrs++);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003418 index = __find_msr_index(vmx, MSR_TSC_AUX);
Radim Krčmářd6321d42017-08-05 00:12:49 +02003419 if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08003420 move_msr_up(vmx, index, save_nmsrs++);
Eddie Donga75beee2007-05-17 18:55:15 +03003421 /*
Brian Gerst8c065852010-07-17 09:03:26 -04003422 * MSR_STAR is only needed on long mode guests, and only
Eddie Donga75beee2007-05-17 18:55:15 +03003423 * if efer.sce is enabled.
3424 */
Brian Gerst8c065852010-07-17 09:03:26 -04003425 index = __find_msr_index(vmx, MSR_STAR);
Avi Kivityf6801df2010-01-21 15:31:50 +02003426 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
Rusty Russell8b9cf982007-07-30 16:31:43 +10003427 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003428 }
Eddie Donga75beee2007-05-17 18:55:15 +03003429#endif
Avi Kivity92c0d902009-10-29 11:00:16 +02003430 index = __find_msr_index(vmx, MSR_EFER);
3431 if (index >= 0 && update_transition_efer(vmx, index))
Avi Kivity26bb0982009-09-07 11:14:12 +03003432 move_msr_up(vmx, index, save_nmsrs++);
Avi Kivity4d56c8a2007-04-19 14:28:44 +03003433
Avi Kivity26bb0982009-09-07 11:14:12 +03003434 vmx->save_nmsrs = save_nmsrs;
Avi Kivity58972972009-02-24 22:26:47 +02003435
Yang Zhang8d146952013-01-25 10:18:50 +08003436 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01003437 vmx_update_msr_bitmap(&vmx->vcpu);
Avi Kivitye38aea32007-04-19 13:22:48 +03003438}
3439
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003440static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003441{
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003442 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003443
KarimAllah Ahmede79f2452018-04-14 05:10:52 +02003444 if (is_guest_mode(vcpu) &&
3445 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
3446 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
3447
3448 return vcpu->arch.tsc_offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003449}
3450
3451/*
Zachary Amsden99e3e302010-08-19 22:07:17 -10003452 * writes 'offset' into guest's timestamp counter offset register
Avi Kivity6aa8b732006-12-10 02:21:36 -08003453 */
Zachary Amsden99e3e302010-08-19 22:07:17 -10003454static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003455{
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003456 if (is_guest_mode(vcpu)) {
Nadav Har'El79918252011-05-25 23:15:39 +03003457 /*
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003458 * We're here if L1 chose not to trap WRMSR to TSC. According
3459 * to the spec, this should set L1's TSC; The offset that L1
3460 * set for L2 remains unchanged, and still needs to be added
3461 * to the newly set TSC to get L2's TSC.
Nadav Har'El79918252011-05-25 23:15:39 +03003462 */
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003463 struct vmcs12 *vmcs12;
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003464 /* recalculate vmcs02.TSC_OFFSET: */
3465 vmcs12 = get_vmcs12(vcpu);
3466 vmcs_write64(TSC_OFFSET, offset +
3467 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
3468 vmcs12->tsc_offset : 0));
3469 } else {
Yoshihiro YUNOMAE489223e2013-06-12 16:43:44 +09003470 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
3471 vmcs_read64(TSC_OFFSET), offset);
Nadav Har'El27fc51b2011-08-02 15:54:52 +03003472 vmcs_write64(TSC_OFFSET, offset);
3473 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08003474}
3475
Nadav Har'El801d3422011-05-25 23:02:23 +03003476/*
3477 * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
3478 * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
3479 * all guests if the "nested" module option is off, and can also be disabled
3480 * for a single guest by disabling its VMX cpuid bit.
3481 */
3482static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
3483{
Radim Krčmářd6321d42017-08-05 00:12:49 +02003484 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
Nadav Har'El801d3422011-05-25 23:02:23 +03003485}
3486
Avi Kivity6aa8b732006-12-10 02:21:36 -08003487/*
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003488 * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
3489 * returned for the various VMX controls MSRs when nested VMX is enabled.
3490 * The same values should also be used to verify that vmcs12 control fields are
3491 * valid during nested entry from L1 to L2.
3492 * Each of these control msrs has a low and high 32-bit half: A low bit is on
3493 * if the corresponding bit in the (32-bit) control field *must* be on, and a
3494 * bit in the high half is on if the corresponding bit in the control field
3495 * may be on. See also vmx_control_verify().
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003496 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003497static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003498{
Paolo Bonzini13893092018-02-26 13:40:09 +01003499 if (!nested) {
3500 memset(msrs, 0, sizeof(*msrs));
3501 return;
3502 }
3503
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003504 /*
3505 * Note that as a general rule, the high half of the MSRs (bits in
3506 * the control fields which may be 1) should be initialized by the
3507 * intersection of the underlying hardware's MSR (i.e., features which
3508 * can be supported) and the list of features we want to expose -
3509 * because they are known to be properly supported in our code.
3510 * Also, usually, the low half of the MSRs (bits which must be 1) can
3511 * be set to 0, meaning that L1 may turn off any of these bits. The
3512 * reason is that if one of these bits is necessary, it will appear
3513 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
3514 * fields of vmcs01 and vmcs02, will turn these bits off - and
Paolo Bonzini7313c692017-07-27 10:31:25 +02003515 * nested_vmx_exit_reflected() will not pass related exits to L1.
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003516 * These rules have exceptions below.
3517 */
3518
3519 /* pin-based controls */
Jan Kiszkaeabeaac2013-03-13 11:30:50 +01003520 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003521 msrs->pinbased_ctls_low,
3522 msrs->pinbased_ctls_high);
3523 msrs->pinbased_ctls_low |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003524 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003525 msrs->pinbased_ctls_high &=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003526 PIN_BASED_EXT_INTR_MASK |
3527 PIN_BASED_NMI_EXITING |
Paolo Bonzini13893092018-02-26 13:40:09 +01003528 PIN_BASED_VIRTUAL_NMIS |
3529 (apicv ? PIN_BASED_POSTED_INTR : 0);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003530 msrs->pinbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003531 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka0238ea92013-03-13 11:31:24 +01003532 PIN_BASED_VMX_PREEMPTION_TIMER;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003533
Jan Kiszka3dbcd8d2014-06-16 13:59:40 +02003534 /* exit controls */
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003535 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003536 msrs->exit_ctls_low,
3537 msrs->exit_ctls_high);
3538 msrs->exit_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003539 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Bandan Dase0ba1a62014-04-19 18:17:46 -04003540
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003541 msrs->exit_ctls_high &=
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003542#ifdef CONFIG_X86_64
Arthur Chunqi Lic0dfee52013-08-06 18:41:45 +08003543 VM_EXIT_HOST_ADDR_SPACE_SIZE |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003544#endif
Jan Kiszkaf41245002014-03-07 20:03:13 +01003545 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003546 msrs->exit_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003547 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszkaf41245002014-03-07 20:03:13 +01003548 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
Bandan Dase0ba1a62014-04-19 18:17:46 -04003549 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
3550
Jan Kiszka2996fca2014-06-16 13:59:43 +02003551 /* We support free control of debug control saving. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003552 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003553
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003554 /* entry controls */
3555 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003556 msrs->entry_ctls_low,
3557 msrs->entry_ctls_high);
3558 msrs->entry_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003559 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003560 msrs->entry_ctls_high &=
Jan Kiszka57435342013-08-06 10:39:56 +02003561#ifdef CONFIG_X86_64
3562 VM_ENTRY_IA32E_MODE |
3563#endif
3564 VM_ENTRY_LOAD_IA32_PAT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003565 msrs->entry_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003566 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
Jan Kiszka57435342013-08-06 10:39:56 +02003567
Jan Kiszka2996fca2014-06-16 13:59:43 +02003568 /* We support free control of debug control loading. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003569 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
Jan Kiszka2996fca2014-06-16 13:59:43 +02003570
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003571 /* cpu-based controls */
3572 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003573 msrs->procbased_ctls_low,
3574 msrs->procbased_ctls_high);
3575 msrs->procbased_ctls_low =
Wincy Vanb9c237b2015-02-03 23:56:30 +08003576 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003577 msrs->procbased_ctls_high &=
Jan Kiszkaa294c9b2013-10-23 17:43:09 +01003578 CPU_BASED_VIRTUAL_INTR_PENDING |
3579 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003580 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
3581 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
3582 CPU_BASED_CR3_STORE_EXITING |
3583#ifdef CONFIG_X86_64
3584 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
3585#endif
3586 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03003587 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
3588 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
3589 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
3590 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003591 /*
3592 * We can allow some features even when not supported by the
3593 * hardware. For example, L1 can specify an MSR bitmap - and we
3594 * can use it to avoid exits to L1 - even when L0 runs L2
3595 * without MSR bitmaps.
3596 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003597 msrs->procbased_ctls_high |=
Wincy Vanb9c237b2015-02-03 23:56:30 +08003598 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
Jan Kiszka560b7ee2014-06-16 13:59:42 +02003599 CPU_BASED_USE_MSR_BITMAPS;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003600
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003601 /* We support free control of CR3 access interception. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003602 msrs->procbased_ctls_low &=
Jan Kiszka3dcdf3ec2014-06-16 13:59:41 +02003603 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
3604
Paolo Bonzini80154d72017-08-24 13:55:35 +02003605 /*
3606 * secondary cpu-based controls. Do not include those that
3607 * depend on CPUID bits, they are added later by vmx_cpuid_update.
3608 */
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003609 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003610 msrs->secondary_ctls_low,
3611 msrs->secondary_ctls_high);
3612 msrs->secondary_ctls_low = 0;
3613 msrs->secondary_ctls_high &=
Paolo Bonzini1b073042016-10-25 16:06:30 +02003614 SECONDARY_EXEC_DESC |
Wincy Vanf2b93282015-02-03 23:56:03 +08003615 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Wincy Van82f0dd42015-02-03 23:57:18 +08003616 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Wincy Van608406e2015-02-03 23:57:51 +08003617 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Paolo Bonzini3db13482017-08-24 14:48:03 +02003618 SECONDARY_EXEC_WBINVD_EXITING;
Paolo Bonzini2cf7ea92018-10-03 10:34:00 +02003619
Liran Alon32c7acf2018-06-23 02:35:11 +03003620 /*
3621 * We can emulate "VMCS shadowing," even if the hardware
3622 * doesn't support it.
3623 */
3624 msrs->secondary_ctls_high |=
3625 SECONDARY_EXEC_SHADOW_VMCS;
Jan Kiszkac18911a2013-03-13 16:06:41 +01003626
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02003627 if (enable_ept) {
3628 /* nested EPT: emulate EPT also to L1 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003629 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003630 SECONDARY_EXEC_ENABLE_EPT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003631 msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003632 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
Bandan Das02120c42016-07-12 18:18:52 -04003633 if (cpu_has_vmx_ept_execute_only())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003634 msrs->ept_caps |=
Bandan Das02120c42016-07-12 18:18:52 -04003635 VMX_EPT_EXECUTE_ONLY_BIT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003636 msrs->ept_caps &= vmx_capability.ept;
3637 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
Paolo Bonzini7db74262017-03-08 10:49:19 +01003638 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
3639 VMX_EPT_1GB_PAGE_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003640 if (enable_ept_ad_bits) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003641 msrs->secondary_ctls_high |=
Bandan Das03efce62017-05-05 15:25:15 -04003642 SECONDARY_EXEC_ENABLE_PML;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003643 msrs->ept_caps |= VMX_EPT_AD_BIT;
Bandan Das03efce62017-05-05 15:25:15 -04003644 }
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003645 }
Nadav Har'Elafa61f7522013-08-07 14:59:22 +02003646
Bandan Das27c42a12017-08-03 15:54:42 -04003647 if (cpu_has_vmx_vmfunc()) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003648 msrs->secondary_ctls_high |=
Bandan Das27c42a12017-08-03 15:54:42 -04003649 SECONDARY_EXEC_ENABLE_VMFUNC;
Bandan Das41ab9372017-08-03 15:54:43 -04003650 /*
3651 * Advertise EPTP switching unconditionally
3652 * since we emulate it
3653 */
Wanpeng Li575b3a22017-10-19 07:00:34 +08003654 if (enable_ept)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003655 msrs->vmfunc_controls =
Wanpeng Li575b3a22017-10-19 07:00:34 +08003656 VMX_VMFUNC_EPTP_SWITCHING;
Bandan Das27c42a12017-08-03 15:54:42 -04003657 }
3658
Paolo Bonzinief697a72016-03-18 16:58:38 +01003659 /*
3660 * Old versions of KVM use the single-context version without
3661 * checking for support, so declare that it is supported even
3662 * though it is treated as global context. The alternative is
3663 * not failing the single-context invvpid, and it is worse.
3664 */
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003665 if (enable_vpid) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003666 msrs->secondary_ctls_high |=
Wanpeng Li63cb6d52017-03-20 21:18:53 -07003667 SECONDARY_EXEC_ENABLE_VPID;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003668 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
Jan Dakinevichbcdde302016-10-28 07:00:30 +03003669 VMX_VPID_EXTENT_SUPPORTED_MASK;
David Hildenbrand1c13bff2017-08-24 20:51:33 +02003670 }
Wanpeng Li99b83ac2015-10-13 09:12:21 -07003671
Radim Krčmář0790ec12015-03-17 14:02:32 +01003672 if (enable_unrestricted_guest)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003673 msrs->secondary_ctls_high |=
Radim Krčmář0790ec12015-03-17 14:02:32 +01003674 SECONDARY_EXEC_UNRESTRICTED_GUEST;
3675
Paolo Bonzini2cf7ea92018-10-03 10:34:00 +02003676 if (flexpriority_enabled)
3677 msrs->secondary_ctls_high |=
3678 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
3679
Jan Kiszkac18911a2013-03-13 16:06:41 +01003680 /* miscellaneous data */
Wincy Vanb9c237b2015-02-03 23:56:30 +08003681 rdmsr(MSR_IA32_VMX_MISC,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003682 msrs->misc_low,
3683 msrs->misc_high);
3684 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
3685 msrs->misc_low |=
Jim Mattsonf4160e42018-05-29 09:11:33 -07003686 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
Wincy Vanb9c237b2015-02-03 23:56:30 +08003687 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
Jan Kiszkaf41245002014-03-07 20:03:13 +01003688 VMX_MISC_ACTIVITY_HLT;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003689 msrs->misc_high = 0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003690
3691 /*
3692 * This MSR reports some information about VMX support. We
3693 * should return information about the VMX we emulate for the
3694 * guest, and the VMCS structure we give it - not about the
3695 * VMX support of the underlying hardware.
3696 */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003697 msrs->basic =
David Matlack62cc6b9d2016-11-29 18:14:07 -08003698 VMCS12_REVISION |
3699 VMX_BASIC_TRUE_CTLS |
3700 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
3701 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
3702
3703 if (cpu_has_vmx_basic_inout())
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003704 msrs->basic |= VMX_BASIC_INOUT;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003705
3706 /*
David Matlack8322ebb2016-11-29 18:14:09 -08003707 * These MSRs specify bits which the guest must keep fixed on
David Matlack62cc6b9d2016-11-29 18:14:07 -08003708 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
3709 * We picked the standard core2 setting.
3710 */
3711#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
3712#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003713 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
3714 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
David Matlack8322ebb2016-11-29 18:14:09 -08003715
3716 /* These MSRs specify bits which the guest must keep fixed off. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003717 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
3718 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003719
3720 /* highest index: VMX_PREEMPTION_TIMER_VALUE */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003721 msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003722}
3723
David Matlack38991522016-11-29 18:14:08 -08003724/*
3725 * if fixed0[i] == 1: val[i] must be 1
3726 * if fixed1[i] == 0: val[i] must be 0
3727 */
3728static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
3729{
3730 return ((val & fixed1) | fixed0) == val;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003731}
3732
3733static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
3734{
David Matlack38991522016-11-29 18:14:08 -08003735 return fixed_bits_valid(control, low, high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003736}
3737
3738static inline u64 vmx_control_msr(u32 low, u32 high)
3739{
3740 return low | ((u64)high << 32);
3741}
3742
David Matlack62cc6b9d2016-11-29 18:14:07 -08003743static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
3744{
3745 superset &= mask;
3746 subset &= mask;
3747
3748 return (superset | subset) == superset;
3749}
3750
3751static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
3752{
3753 const u64 feature_and_reserved =
3754 /* feature (except bit 48; see below) */
3755 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
3756 /* reserved */
3757 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003758 u64 vmx_basic = vmx->nested.msrs.basic;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003759
3760 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
3761 return -EINVAL;
3762
3763 /*
3764 * KVM does not emulate a version of VMX that constrains physical
3765 * addresses of VMX structures (e.g. VMCS) to 32-bits.
3766 */
3767 if (data & BIT_ULL(48))
3768 return -EINVAL;
3769
3770 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
3771 vmx_basic_vmcs_revision_id(data))
3772 return -EINVAL;
3773
3774 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
3775 return -EINVAL;
3776
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003777 vmx->nested.msrs.basic = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003778 return 0;
3779}
3780
3781static int
3782vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3783{
3784 u64 supported;
3785 u32 *lowp, *highp;
3786
3787 switch (msr_index) {
3788 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003789 lowp = &vmx->nested.msrs.pinbased_ctls_low;
3790 highp = &vmx->nested.msrs.pinbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003791 break;
3792 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003793 lowp = &vmx->nested.msrs.procbased_ctls_low;
3794 highp = &vmx->nested.msrs.procbased_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003795 break;
3796 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003797 lowp = &vmx->nested.msrs.exit_ctls_low;
3798 highp = &vmx->nested.msrs.exit_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003799 break;
3800 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003801 lowp = &vmx->nested.msrs.entry_ctls_low;
3802 highp = &vmx->nested.msrs.entry_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003803 break;
3804 case MSR_IA32_VMX_PROCBASED_CTLS2:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003805 lowp = &vmx->nested.msrs.secondary_ctls_low;
3806 highp = &vmx->nested.msrs.secondary_ctls_high;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003807 break;
3808 default:
3809 BUG();
3810 }
3811
3812 supported = vmx_control_msr(*lowp, *highp);
3813
3814 /* Check must-be-1 bits are still 1. */
3815 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
3816 return -EINVAL;
3817
3818 /* Check must-be-0 bits are still 0. */
3819 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
3820 return -EINVAL;
3821
3822 *lowp = data;
3823 *highp = data >> 32;
3824 return 0;
3825}
3826
3827static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
3828{
3829 const u64 feature_and_reserved_bits =
3830 /* feature */
3831 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
3832 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
3833 /* reserved */
3834 GENMASK_ULL(13, 9) | BIT_ULL(31);
3835 u64 vmx_misc;
3836
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003837 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
3838 vmx->nested.msrs.misc_high);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003839
3840 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
3841 return -EINVAL;
3842
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003843 if ((vmx->nested.msrs.pinbased_ctls_high &
David Matlack62cc6b9d2016-11-29 18:14:07 -08003844 PIN_BASED_VMX_PREEMPTION_TIMER) &&
3845 vmx_misc_preemption_timer_rate(data) !=
3846 vmx_misc_preemption_timer_rate(vmx_misc))
3847 return -EINVAL;
3848
3849 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
3850 return -EINVAL;
3851
3852 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
3853 return -EINVAL;
3854
3855 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
3856 return -EINVAL;
3857
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003858 vmx->nested.msrs.misc_low = data;
3859 vmx->nested.msrs.misc_high = data >> 32;
Jim Mattsonf4160e42018-05-29 09:11:33 -07003860
3861 /*
3862 * If L1 has read-only VM-exit information fields, use the
3863 * less permissive vmx_vmwrite_bitmap to specify write
3864 * permissions for the shadow VMCS.
3865 */
3866 if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
3867 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
3868
David Matlack62cc6b9d2016-11-29 18:14:07 -08003869 return 0;
3870}
3871
3872static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
3873{
3874 u64 vmx_ept_vpid_cap;
3875
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003876 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
3877 vmx->nested.msrs.vpid_caps);
David Matlack62cc6b9d2016-11-29 18:14:07 -08003878
3879 /* Every bit is either reserved or a feature bit. */
3880 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
3881 return -EINVAL;
3882
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003883 vmx->nested.msrs.ept_caps = data;
3884 vmx->nested.msrs.vpid_caps = data >> 32;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003885 return 0;
3886}
3887
3888static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
3889{
3890 u64 *msr;
3891
3892 switch (msr_index) {
3893 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003894 msr = &vmx->nested.msrs.cr0_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003895 break;
3896 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003897 msr = &vmx->nested.msrs.cr4_fixed0;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003898 break;
3899 default:
3900 BUG();
3901 }
3902
3903 /*
3904 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
3905 * must be 1 in the restored value.
3906 */
3907 if (!is_bitwise_subset(data, *msr, -1ULL))
3908 return -EINVAL;
3909
3910 *msr = data;
3911 return 0;
3912}
3913
3914/*
3915 * Called when userspace is restoring VMX MSRs.
3916 *
3917 * Returns 0 on success, non-0 otherwise.
3918 */
3919static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
3920{
3921 struct vcpu_vmx *vmx = to_vmx(vcpu);
3922
Jim Mattsona943ac52018-05-29 09:11:32 -07003923 /*
3924 * Don't allow changes to the VMX capability MSRs while the vCPU
3925 * is in VMX operation.
3926 */
3927 if (vmx->nested.vmxon)
3928 return -EBUSY;
3929
David Matlack62cc6b9d2016-11-29 18:14:07 -08003930 switch (msr_index) {
3931 case MSR_IA32_VMX_BASIC:
3932 return vmx_restore_vmx_basic(vmx, data);
3933 case MSR_IA32_VMX_PINBASED_CTLS:
3934 case MSR_IA32_VMX_PROCBASED_CTLS:
3935 case MSR_IA32_VMX_EXIT_CTLS:
3936 case MSR_IA32_VMX_ENTRY_CTLS:
3937 /*
3938 * The "non-true" VMX capability MSRs are generated from the
3939 * "true" MSRs, so we do not support restoring them directly.
3940 *
3941 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
3942 * should restore the "true" MSRs with the must-be-1 bits
3943 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
3944 * DEFAULT SETTINGS".
3945 */
3946 return -EINVAL;
3947 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3948 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3949 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
3950 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
3951 case MSR_IA32_VMX_PROCBASED_CTLS2:
3952 return vmx_restore_control_msr(vmx, msr_index, data);
3953 case MSR_IA32_VMX_MISC:
3954 return vmx_restore_vmx_misc(vmx, data);
3955 case MSR_IA32_VMX_CR0_FIXED0:
3956 case MSR_IA32_VMX_CR4_FIXED0:
3957 return vmx_restore_fixed0_msr(vmx, msr_index, data);
3958 case MSR_IA32_VMX_CR0_FIXED1:
3959 case MSR_IA32_VMX_CR4_FIXED1:
3960 /*
3961 * These MSRs are generated based on the vCPU's CPUID, so we
3962 * do not support restoring them directly.
3963 */
3964 return -EINVAL;
3965 case MSR_IA32_VMX_EPT_VPID_CAP:
3966 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
3967 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003968 vmx->nested.msrs.vmcs_enum = data;
David Matlack62cc6b9d2016-11-29 18:14:07 -08003969 return 0;
3970 default:
3971 /*
3972 * The rest of the VMX capability MSRs do not support restore.
3973 */
3974 return -EINVAL;
3975 }
3976}
3977
Jan Kiszkacae50132014-01-04 18:47:22 +01003978/* Returns 0 on success, non-0 otherwise. */
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003979static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003980{
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003981 switch (msr_index) {
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003982 case MSR_IA32_VMX_BASIC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003983 *pdata = msrs->basic;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003984 break;
3985 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
3986 case MSR_IA32_VMX_PINBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003987 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003988 msrs->pinbased_ctls_low,
3989 msrs->pinbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003990 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
3991 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03003992 break;
3993 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
3994 case MSR_IA32_VMX_PROCBASED_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08003995 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01003996 msrs->procbased_ctls_low,
3997 msrs->procbased_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08003998 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
3999 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004000 break;
4001 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
4002 case MSR_IA32_VMX_EXIT_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004003 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004004 msrs->exit_ctls_low,
4005 msrs->exit_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08004006 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
4007 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004008 break;
4009 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
4010 case MSR_IA32_VMX_ENTRY_CTLS:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004011 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004012 msrs->entry_ctls_low,
4013 msrs->entry_ctls_high);
David Matlack0115f9c2016-11-29 18:14:06 -08004014 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
4015 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004016 break;
4017 case MSR_IA32_VMX_MISC:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004018 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004019 msrs->misc_low,
4020 msrs->misc_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004021 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004022 case MSR_IA32_VMX_CR0_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004023 *pdata = msrs->cr0_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004024 break;
4025 case MSR_IA32_VMX_CR0_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004026 *pdata = msrs->cr0_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004027 break;
4028 case MSR_IA32_VMX_CR4_FIXED0:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004029 *pdata = msrs->cr4_fixed0;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004030 break;
4031 case MSR_IA32_VMX_CR4_FIXED1:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004032 *pdata = msrs->cr4_fixed1;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004033 break;
4034 case MSR_IA32_VMX_VMCS_ENUM:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004035 *pdata = msrs->vmcs_enum;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004036 break;
4037 case MSR_IA32_VMX_PROCBASED_CTLS2:
Wincy Vanb9c237b2015-02-03 23:56:30 +08004038 *pdata = vmx_control_msr(
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004039 msrs->secondary_ctls_low,
4040 msrs->secondary_ctls_high);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004041 break;
4042 case MSR_IA32_VMX_EPT_VPID_CAP:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004043 *pdata = msrs->ept_caps |
4044 ((u64)msrs->vpid_caps << 32);
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004045 break;
Bandan Das27c42a12017-08-03 15:54:42 -04004046 case MSR_IA32_VMX_VMFUNC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004047 *pdata = msrs->vmfunc_controls;
Bandan Das27c42a12017-08-03 15:54:42 -04004048 break;
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004049 default:
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004050 return 1;
Nadav Har'Elb3897a42013-07-08 19:12:35 +08004051 }
4052
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004053 return 0;
4054}
4055
Haozhong Zhang37e4c992016-06-22 14:59:55 +08004056static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
4057 uint64_t val)
4058{
4059 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
4060
4061 return !(val & ~valid_bits);
4062}
4063
Tom Lendacky801e4592018-02-21 13:39:51 -06004064static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
4065{
Paolo Bonzini13893092018-02-26 13:40:09 +01004066 switch (msr->index) {
4067 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
4068 if (!nested)
4069 return 1;
4070 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
4071 default:
4072 return 1;
4073 }
4074
4075 return 0;
Tom Lendacky801e4592018-02-21 13:39:51 -06004076}
4077
Nadav Har'Elb87a51a2011-05-25 23:04:25 +03004078/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08004079 * Reads an msr value (of 'msr_index') into 'pdata'.
4080 * Returns 0 on success, non-0 otherwise.
4081 * Assumes vcpu_load() was already called.
4082 */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004083static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004084{
Borislav Petkova6cb0992017-12-20 12:50:28 +01004085 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004086 struct shared_msr_entry *msr;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004087
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004088 switch (msr_info->index) {
Avi Kivity05b3e0c2006-12-13 00:33:45 -08004089#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004090 case MSR_FS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004091 msr_info->data = vmcs_readl(GUEST_FS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004092 break;
4093 case MSR_GS_BASE:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004094 msr_info->data = vmcs_readl(GUEST_GS_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004095 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03004096 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07004097 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
Avi Kivity44ea2b12009-09-06 15:55:37 +03004098 break;
Avi Kivity26bb0982009-09-07 11:14:12 +03004099#endif
Avi Kivity6aa8b732006-12-10 02:21:36 -08004100 case MSR_EFER:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004101 return kvm_get_msr_common(vcpu, msr_info);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004102 case MSR_IA32_SPEC_CTRL:
4103 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004104 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4105 return 1;
4106
4107 msr_info->data = to_vmx(vcpu)->spec_ctrl;
4108 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01004109 case MSR_IA32_ARCH_CAPABILITIES:
4110 if (!msr_info->host_initiated &&
4111 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
4112 return 1;
4113 msr_info->data = to_vmx(vcpu)->arch_capabilities;
4114 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004115 case MSR_IA32_SYSENTER_CS:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004116 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004117 break;
4118 case MSR_IA32_SYSENTER_EIP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004119 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004120 break;
4121 case MSR_IA32_SYSENTER_ESP:
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004122 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004123 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004124 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08004125 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02004126 (!msr_info->host_initiated &&
4127 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01004128 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004129 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004130 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004131 case MSR_IA32_MCG_EXT_CTL:
4132 if (!msr_info->host_initiated &&
Borislav Petkova6cb0992017-12-20 12:50:28 +01004133 !(vmx->msr_ia32_feature_control &
Ashok Rajc45dcc72016-06-22 14:59:56 +08004134 FEATURE_CONTROL_LMCE))
Jan Kiszkacae50132014-01-04 18:47:22 +01004135 return 1;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004136 msr_info->data = vcpu->arch.mcg_ext_ctl;
4137 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01004138 case MSR_IA32_FEATURE_CONTROL:
Borislav Petkova6cb0992017-12-20 12:50:28 +01004139 msr_info->data = vmx->msr_ia32_feature_control;
Jan Kiszkacae50132014-01-04 18:47:22 +01004140 break;
4141 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
4142 if (!nested_vmx_allowed(vcpu))
4143 return 1;
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01004144 return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
4145 &msr_info->data);
Wanpeng Li20300092014-12-02 19:14:59 +08004146 case MSR_IA32_XSS:
4147 if (!vmx_xsaves_supported())
4148 return 1;
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004149 msr_info->data = vcpu->arch.ia32_xss;
Wanpeng Li20300092014-12-02 19:14:59 +08004150 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004151 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02004152 if (!msr_info->host_initiated &&
4153 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004154 return 1;
4155 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08004156 default:
Borislav Petkova6cb0992017-12-20 12:50:28 +01004157 msr = find_msr_entry(vmx, msr_info->index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08004158 if (msr) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004159 msr_info->data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08004160 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004161 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +02004162 return kvm_get_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004163 }
4164
Avi Kivity6aa8b732006-12-10 02:21:36 -08004165 return 0;
4166}
4167
Jan Kiszkacae50132014-01-04 18:47:22 +01004168static void vmx_leave_nested(struct kvm_vcpu *vcpu);
4169
Avi Kivity6aa8b732006-12-10 02:21:36 -08004170/*
4171 * Writes msr value into into the appropriate "register".
4172 * Returns 0 on success, non-0 otherwise.
4173 * Assumes vcpu_load() was already called.
4174 */
Will Auld8fe8ab42012-11-29 12:42:12 -08004175static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004176{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04004177 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03004178 struct shared_msr_entry *msr;
Eddie Dong2cc51562007-05-21 07:28:09 +03004179 int ret = 0;
Will Auld8fe8ab42012-11-29 12:42:12 -08004180 u32 msr_index = msr_info->index;
4181 u64 data = msr_info->data;
Eddie Dong2cc51562007-05-21 07:28:09 +03004182
Avi Kivity6aa8b732006-12-10 02:21:36 -08004183 switch (msr_index) {
Avi Kivity3bab1f52006-12-29 16:49:48 -08004184 case MSR_EFER:
Will Auld8fe8ab42012-11-29 12:42:12 -08004185 ret = kvm_set_msr_common(vcpu, msr_info);
Eddie Dong2cc51562007-05-21 07:28:09 +03004186 break;
Avi Kivity16175a72009-03-23 22:13:44 +02004187#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08004188 case MSR_FS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03004189 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004190 vmcs_writel(GUEST_FS_BASE, data);
4191 break;
4192 case MSR_GS_BASE:
Avi Kivity2fb92db2011-04-27 19:42:18 +03004193 vmx_segment_cache_clear(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004194 vmcs_writel(GUEST_GS_BASE, data);
4195 break;
Avi Kivity44ea2b12009-09-06 15:55:37 +03004196 case MSR_KERNEL_GS_BASE:
Sean Christopherson678e3152018-07-23 12:32:43 -07004197 vmx_write_guest_kernel_gs_base(vmx, data);
Avi Kivity44ea2b12009-09-06 15:55:37 +03004198 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004199#endif
4200 case MSR_IA32_SYSENTER_CS:
4201 vmcs_write32(GUEST_SYSENTER_CS, data);
4202 break;
4203 case MSR_IA32_SYSENTER_EIP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004204 vmcs_writel(GUEST_SYSENTER_EIP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004205 break;
4206 case MSR_IA32_SYSENTER_ESP:
Avi Kivityf5b42c32007-03-06 12:05:53 +02004207 vmcs_writel(GUEST_SYSENTER_ESP, data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004208 break;
Liu, Jinsong0dd376e2014-02-24 10:56:53 +00004209 case MSR_IA32_BNDCFGS:
Haozhong Zhang691bd432017-07-04 10:27:41 +08004210 if (!kvm_mpx_supported() ||
Radim Krčmářd6321d42017-08-05 00:12:49 +02004211 (!msr_info->host_initiated &&
4212 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
Paolo Bonzini93c4adc2014-03-05 23:19:52 +01004213 return 1;
Yu Zhangfd8cb432017-08-24 20:27:56 +08004214 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
Jim Mattson45316622017-05-23 11:52:54 -07004215 (data & MSR_IA32_BNDCFGS_RSVD))
Avi Kivity6aa8b732006-12-10 02:21:36 -08004216 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004217 vmcs_write64(GUEST_BNDCFGS, data);
4218 break;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004219 case MSR_IA32_SPEC_CTRL:
4220 if (!msr_info->host_initiated &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004221 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4222 return 1;
4223
4224 /* The STIBP bit doesn't fault even if it's not advertised */
Konrad Rzeszutek Wilk9f65fb22018-05-09 21:41:38 +02004225 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01004226 return 1;
4227
4228 vmx->spec_ctrl = data;
4229
4230 if (!data)
4231 break;
4232
4233 /*
4234 * For non-nested:
4235 * When it's written (to non-zero) for the first time, pass
4236 * it through.
4237 *
4238 * For nested:
4239 * The handling of the MSR bitmap for L2 guests is done in
4240 * nested_vmx_merge_msr_bitmap. We should not touch the
4241 * vmcs02.msr_bitmap here since it gets completely overwritten
4242 * in the merging. We update the vmcs01 here for L1 as well
4243 * since it will end up touching the MSR anyway now.
4244 */
4245 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
4246 MSR_IA32_SPEC_CTRL,
4247 MSR_TYPE_RW);
4248 break;
Ashok Raj15d45072018-02-01 22:59:43 +01004249 case MSR_IA32_PRED_CMD:
4250 if (!msr_info->host_initiated &&
Ashok Raj15d45072018-02-01 22:59:43 +01004251 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4252 return 1;
4253
4254 if (data & ~PRED_CMD_IBPB)
4255 return 1;
4256
4257 if (!data)
4258 break;
4259
4260 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4261
4262 /*
4263 * For non-nested:
4264 * When it's written (to non-zero) for the first time, pass
4265 * it through.
4266 *
4267 * For nested:
4268 * The handling of the MSR bitmap for L2 guests is done in
4269 * nested_vmx_merge_msr_bitmap. We should not touch the
4270 * vmcs02.msr_bitmap here since it gets completely overwritten
4271 * in the merging.
4272 */
4273 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
4274 MSR_TYPE_W);
4275 break;
KarimAllah Ahmed28c1c9f2018-02-01 22:59:44 +01004276 case MSR_IA32_ARCH_CAPABILITIES:
4277 if (!msr_info->host_initiated)
4278 return 1;
4279 vmx->arch_capabilities = data;
4280 break;
Sheng Yang468d4722008-10-09 16:01:55 +08004281 case MSR_IA32_CR_PAT:
4282 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Amit45666542014-09-18 22:39:44 +03004283 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4284 return 1;
Sheng Yang468d4722008-10-09 16:01:55 +08004285 vmcs_write64(GUEST_IA32_PAT, data);
4286 vcpu->arch.pat = data;
4287 break;
4288 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004289 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004290 break;
Will Auldba904632012-11-29 12:42:50 -08004291 case MSR_IA32_TSC_ADJUST:
4292 ret = kvm_set_msr_common(vcpu, msr_info);
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004293 break;
Ashok Rajc45dcc72016-06-22 14:59:56 +08004294 case MSR_IA32_MCG_EXT_CTL:
4295 if ((!msr_info->host_initiated &&
4296 !(to_vmx(vcpu)->msr_ia32_feature_control &
4297 FEATURE_CONTROL_LMCE)) ||
4298 (data & ~MCG_EXT_CTL_LMCE_EN))
4299 return 1;
4300 vcpu->arch.mcg_ext_ctl = data;
4301 break;
Jan Kiszkacae50132014-01-04 18:47:22 +01004302 case MSR_IA32_FEATURE_CONTROL:
Haozhong Zhang37e4c992016-06-22 14:59:55 +08004303 if (!vmx_feature_control_msr_valid(vcpu, data) ||
Haozhong Zhang3b840802016-06-22 14:59:54 +08004304 (to_vmx(vcpu)->msr_ia32_feature_control &
Jan Kiszkacae50132014-01-04 18:47:22 +01004305 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
4306 return 1;
Haozhong Zhang3b840802016-06-22 14:59:54 +08004307 vmx->msr_ia32_feature_control = data;
Jan Kiszkacae50132014-01-04 18:47:22 +01004308 if (msr_info->host_initiated && data == 0)
4309 vmx_leave_nested(vcpu);
4310 break;
4311 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
David Matlack62cc6b9d2016-11-29 18:14:07 -08004312 if (!msr_info->host_initiated)
4313 return 1; /* they are read-only */
4314 if (!nested_vmx_allowed(vcpu))
4315 return 1;
4316 return vmx_set_vmx_msr(vcpu, msr_index, data);
Wanpeng Li20300092014-12-02 19:14:59 +08004317 case MSR_IA32_XSS:
4318 if (!vmx_xsaves_supported())
4319 return 1;
4320 /*
4321 * The only supported bit as of Skylake is bit 8, but
4322 * it is not supported on KVM.
4323 */
4324 if (data != 0)
4325 return 1;
4326 vcpu->arch.ia32_xss = data;
4327 if (vcpu->arch.ia32_xss != host_xss)
4328 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -04004329 vcpu->arch.ia32_xss, host_xss, false);
Wanpeng Li20300092014-12-02 19:14:59 +08004330 else
4331 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
4332 break;
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004333 case MSR_TSC_AUX:
Radim Krčmářd6321d42017-08-05 00:12:49 +02004334 if (!msr_info->host_initiated &&
4335 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004336 return 1;
4337 /* Check reserved bit, higher 32 bits should be zero */
4338 if ((data >> 32) != 0)
4339 return 1;
4340 /* Otherwise falls through */
Avi Kivity6aa8b732006-12-10 02:21:36 -08004341 default:
Rusty Russell8b9cf982007-07-30 16:31:43 +10004342 msr = find_msr_entry(vmx, msr_index);
Avi Kivity3bab1f52006-12-29 16:49:48 -08004343 if (msr) {
Andy Honig8b3c3102014-08-27 11:16:44 -07004344 u64 old_msr_data = msr->data;
Avi Kivity3bab1f52006-12-29 16:49:48 -08004345 msr->data = data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004346 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
4347 preempt_disable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004348 ret = kvm_set_shared_msr(msr->index, msr->data,
4349 msr->mask);
Avi Kivity2225fd52012-04-18 15:03:04 +03004350 preempt_enable();
Andy Honig8b3c3102014-08-27 11:16:44 -07004351 if (ret)
4352 msr->data = old_msr_data;
Avi Kivity2225fd52012-04-18 15:03:04 +03004353 }
Avi Kivity3bab1f52006-12-29 16:49:48 -08004354 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004355 }
Will Auld8fe8ab42012-11-29 12:42:12 -08004356 ret = kvm_set_msr_common(vcpu, msr_info);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004357 }
4358
Eddie Dong2cc51562007-05-21 07:28:09 +03004359 return ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004360}
4361
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004362static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004363{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004364 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
4365 switch (reg) {
4366 case VCPU_REGS_RSP:
4367 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
4368 break;
4369 case VCPU_REGS_RIP:
4370 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
4371 break;
Avi Kivity6de4f3a2009-05-31 22:58:47 +03004372 case VCPU_EXREG_PDPTR:
4373 if (enable_ept)
4374 ept_save_pdptrs(vcpu);
4375 break;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03004376 default:
4377 break;
4378 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004379}
4380
Avi Kivity6aa8b732006-12-10 02:21:36 -08004381static __init int cpu_has_kvm_support(void)
4382{
Eduardo Habkost6210e372008-11-17 19:03:16 -02004383 return cpu_has_vmx();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004384}
4385
4386static __init int vmx_disabled_by_bios(void)
4387{
4388 u64 msr;
4389
4390 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
Shane Wangcafd6652010-04-29 12:09:01 -04004391 if (msr & FEATURE_CONTROL_LOCKED) {
Joseph Cihula23f3e992011-02-08 11:45:56 -08004392 /* launched w/ TXT and VMX disabled */
Shane Wangcafd6652010-04-29 12:09:01 -04004393 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
4394 && tboot_enabled())
4395 return 1;
Joseph Cihula23f3e992011-02-08 11:45:56 -08004396 /* launched w/o TXT and VMX only enabled w/ TXT */
Shane Wangcafd6652010-04-29 12:09:01 -04004397 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
Joseph Cihula23f3e992011-02-08 11:45:56 -08004398 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
Shane Wangf9335af2010-11-17 11:40:17 +08004399 && !tboot_enabled()) {
4400 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
Joseph Cihula23f3e992011-02-08 11:45:56 -08004401 "activate TXT before enabling KVM\n");
Shane Wangcafd6652010-04-29 12:09:01 -04004402 return 1;
Shane Wangf9335af2010-11-17 11:40:17 +08004403 }
Joseph Cihula23f3e992011-02-08 11:45:56 -08004404 /* launched w/o TXT and VMX disabled */
4405 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
4406 && !tboot_enabled())
4407 return 1;
Shane Wangcafd6652010-04-29 12:09:01 -04004408 }
4409
4410 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004411}
4412
Dongxiao Xu7725b892010-05-11 18:29:38 +08004413static void kvm_cpu_vmxon(u64 addr)
4414{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004415 cr4_set_bits(X86_CR4_VMXE);
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004416 intel_pt_handle_vmx(1);
4417
Uros Bizjak4b1e5472018-10-11 19:40:44 +02004418 asm volatile ("vmxon %0" : : "m"(addr));
Dongxiao Xu7725b892010-05-11 18:29:38 +08004419}
4420
Radim Krčmář13a34e02014-08-28 15:13:03 +02004421static int hardware_enable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004422{
4423 int cpu = raw_smp_processor_id();
4424 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
Shane Wangcafd6652010-04-29 12:09:01 -04004425 u64 old, test_bits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004426
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07004427 if (cr4_read_shadow() & X86_CR4_VMXE)
Alexander Graf10474ae2009-09-15 11:37:46 +02004428 return -EBUSY;
4429
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004430 /*
4431 * This can happen if we hot-added a CPU but failed to allocate
4432 * VP assist page for it.
4433 */
4434 if (static_branch_unlikely(&enable_evmcs) &&
4435 !hv_get_vp_assist_page(cpu))
4436 return -EFAULT;
4437
Nadav Har'Eld462b812011-05-24 15:26:10 +03004438 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
Feng Wubf9f6ac2015-09-18 22:29:55 +08004439 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
4440 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
Zhang Yanfei8f536b72012-12-06 23:43:34 +08004441
4442 /*
4443 * Now we can enable the vmclear operation in kdump
4444 * since the loaded_vmcss_on_cpu list on this cpu
4445 * has been initialized.
4446 *
4447 * Though the cpu is not in VMX operation now, there
4448 * is no problem to enable the vmclear operation
4449 * for the loaded_vmcss_on_cpu list is empty!
4450 */
4451 crash_enable_local_vmclear(cpu);
4452
Avi Kivity6aa8b732006-12-10 02:21:36 -08004453 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
Shane Wangcafd6652010-04-29 12:09:01 -04004454
4455 test_bits = FEATURE_CONTROL_LOCKED;
4456 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
4457 if (tboot_enabled())
4458 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
4459
4460 if ((old & test_bits) != test_bits) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004461 /* enable and lock */
Shane Wangcafd6652010-04-29 12:09:01 -04004462 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
4463 }
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004464 kvm_cpu_vmxon(phys_addr);
David Hildenbrandfdf288b2017-08-24 20:51:29 +02004465 if (enable_ept)
4466 ept_sync_global();
Alexander Graf10474ae2009-09-15 11:37:46 +02004467
4468 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004469}
4470
Nadav Har'Eld462b812011-05-24 15:26:10 +03004471static void vmclear_local_loaded_vmcss(void)
Avi Kivity543e4242008-05-13 16:22:47 +03004472{
4473 int cpu = raw_smp_processor_id();
Nadav Har'Eld462b812011-05-24 15:26:10 +03004474 struct loaded_vmcs *v, *n;
Avi Kivity543e4242008-05-13 16:22:47 +03004475
Nadav Har'Eld462b812011-05-24 15:26:10 +03004476 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
4477 loaded_vmcss_on_cpu_link)
4478 __loaded_vmcs_clear(v);
Avi Kivity543e4242008-05-13 16:22:47 +03004479}
4480
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004481
4482/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
4483 * tricks.
4484 */
4485static void kvm_cpu_vmxoff(void)
4486{
Uros Bizjak4b1e5472018-10-11 19:40:44 +02004487 asm volatile (__ex("vmxoff"));
Alexander Shishkin1c5ac212016-03-29 17:43:10 +03004488
4489 intel_pt_handle_vmx(0);
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004490 cr4_clear_bits(X86_CR4_VMXE);
Eduardo Habkost710ff4a2008-11-17 19:03:18 -02004491}
4492
Radim Krčmář13a34e02014-08-28 15:13:03 +02004493static void hardware_disable(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004494{
David Hildenbrandfe0e80b2017-03-10 12:47:13 +01004495 vmclear_local_loaded_vmcss();
4496 kvm_cpu_vmxoff();
Avi Kivity6aa8b732006-12-10 02:21:36 -08004497}
4498
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004499static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
Mike Dayd77c26f2007-10-08 09:02:08 -04004500 u32 msr, u32 *result)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004501{
4502 u32 vmx_msr_low, vmx_msr_high;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004503 u32 ctl = ctl_min | ctl_opt;
4504
4505 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4506
4507 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
4508 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
4509
4510 /* Ensure minimum (required) set of control bits are supported. */
4511 if (ctl_min & ~ctl)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004512 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004513
4514 *result = ctl;
4515 return 0;
4516}
4517
Avi Kivity110312c2010-12-21 12:54:20 +02004518static __init bool allow_1_setting(u32 msr, u32 ctl)
4519{
4520 u32 vmx_msr_low, vmx_msr_high;
4521
4522 rdmsr(msr, vmx_msr_low, vmx_msr_high);
4523 return vmx_msr_high & ctl;
4524}
4525
Yang, Sheng002c7f72007-07-31 14:23:01 +03004526static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004527{
4528 u32 vmx_msr_low, vmx_msr_high;
Sheng Yangd56f5462008-04-25 10:13:16 +08004529 u32 min, opt, min2, opt2;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004530 u32 _pin_based_exec_control = 0;
4531 u32 _cpu_based_exec_control = 0;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004532 u32 _cpu_based_2nd_exec_control = 0;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004533 u32 _vmexit_control = 0;
4534 u32 _vmentry_control = 0;
4535
Paolo Bonzini13893092018-02-26 13:40:09 +01004536 memset(vmcs_conf, 0, sizeof(*vmcs_conf));
Raghavendra K T10166742012-02-07 23:19:20 +05304537 min = CPU_BASED_HLT_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004538#ifdef CONFIG_X86_64
4539 CPU_BASED_CR8_LOAD_EXITING |
4540 CPU_BASED_CR8_STORE_EXITING |
4541#endif
Sheng Yangd56f5462008-04-25 10:13:16 +08004542 CPU_BASED_CR3_LOAD_EXITING |
4543 CPU_BASED_CR3_STORE_EXITING |
Quan Xu8eb73e2d2017-12-12 16:44:21 +08004544 CPU_BASED_UNCOND_IO_EXITING |
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004545 CPU_BASED_MOV_DR_EXITING |
Marcelo Tosattia7052892008-09-23 13:18:35 -03004546 CPU_BASED_USE_TSC_OFFSETING |
Wanpeng Li4d5422c2018-03-12 04:53:02 -07004547 CPU_BASED_MWAIT_EXITING |
4548 CPU_BASED_MONITOR_EXITING |
Avi Kivityfee84b02011-11-10 14:57:25 +02004549 CPU_BASED_INVLPG_EXITING |
4550 CPU_BASED_RDPMC_EXITING;
Anthony Liguori443381a2010-12-06 10:53:38 -06004551
Sheng Yangf78e0e22007-10-29 09:40:42 +08004552 opt = CPU_BASED_TPR_SHADOW |
Sheng Yang25c5f222008-03-28 13:18:56 +08004553 CPU_BASED_USE_MSR_BITMAPS |
Sheng Yangf78e0e22007-10-29 09:40:42 +08004554 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004555 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
4556 &_cpu_based_exec_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004557 return -EIO;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08004558#ifdef CONFIG_X86_64
4559 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4560 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
4561 ~CPU_BASED_CR8_STORE_EXITING;
4562#endif
Sheng Yangf78e0e22007-10-29 09:40:42 +08004563 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
Sheng Yangd56f5462008-04-25 10:13:16 +08004564 min2 = 0;
4565 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Yang Zhang8d146952013-01-25 10:18:50 +08004566 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Sheng Yang2384d2b2008-01-17 15:14:33 +08004567 SECONDARY_EXEC_WBINVD_EXITING |
Sheng Yangd56f5462008-04-25 10:13:16 +08004568 SECONDARY_EXEC_ENABLE_VPID |
Nitin A Kamble3a624e22009-06-08 11:34:16 -07004569 SECONDARY_EXEC_ENABLE_EPT |
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08004570 SECONDARY_EXEC_UNRESTRICTED_GUEST |
Sheng Yang4e47c7a2009-12-18 16:48:47 +08004571 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
Paolo Bonzini0367f202016-07-12 10:44:55 +02004572 SECONDARY_EXEC_DESC |
Mao, Junjiead756a12012-07-02 01:18:48 +00004573 SECONDARY_EXEC_RDTSCP |
Yang Zhang83d4c282013-01-25 10:18:49 +08004574 SECONDARY_EXEC_ENABLE_INVPCID |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004575 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Abel Gordonabc4fc52013-04-18 14:35:25 +03004576 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Wanpeng Li20300092014-12-02 19:14:59 +08004577 SECONDARY_EXEC_SHADOW_VMCS |
Kai Huang843e4332015-01-28 10:54:28 +08004578 SECONDARY_EXEC_XSAVES |
David Hildenbrand736fdf72017-08-24 20:51:37 +02004579 SECONDARY_EXEC_RDSEED_EXITING |
4580 SECONDARY_EXEC_RDRAND_EXITING |
Xiao Guangrong8b3e34e2015-09-09 14:05:51 +08004581 SECONDARY_EXEC_ENABLE_PML |
Bandan Das2a499e42017-08-03 15:54:41 -04004582 SECONDARY_EXEC_TSC_SCALING |
Sean Christopherson0b665d32018-08-14 09:33:34 -07004583 SECONDARY_EXEC_ENABLE_VMFUNC |
4584 SECONDARY_EXEC_ENCLS_EXITING;
Sheng Yangd56f5462008-04-25 10:13:16 +08004585 if (adjust_vmx_controls(min2, opt2,
4586 MSR_IA32_VMX_PROCBASED_CTLS2,
Sheng Yangf78e0e22007-10-29 09:40:42 +08004587 &_cpu_based_2nd_exec_control) < 0)
4588 return -EIO;
4589 }
4590#ifndef CONFIG_X86_64
4591 if (!(_cpu_based_2nd_exec_control &
4592 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
4593 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
4594#endif
Yang Zhang83d4c282013-01-25 10:18:49 +08004595
4596 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
4597 _cpu_based_2nd_exec_control &= ~(
Yang Zhang8d146952013-01-25 10:18:50 +08004598 SECONDARY_EXEC_APIC_REGISTER_VIRT |
Yang Zhangc7c9c562013-01-25 10:18:51 +08004599 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
4600 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang83d4c282013-01-25 10:18:49 +08004601
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004602 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
4603 &vmx_capability.ept, &vmx_capability.vpid);
4604
Sheng Yangd56f5462008-04-25 10:13:16 +08004605 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
Marcelo Tosattia7052892008-09-23 13:18:35 -03004606 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
4607 enabled */
Gleb Natapov5fff7d22009-08-27 18:41:30 +03004608 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
4609 CPU_BASED_CR3_STORE_EXITING |
4610 CPU_BASED_INVLPG_EXITING);
Wanpeng Li61f1dd92017-10-18 16:02:19 -07004611 } else if (vmx_capability.ept) {
4612 vmx_capability.ept = 0;
4613 pr_warn_once("EPT CAP should not exist if not support "
4614 "1-setting enable EPT VM-execution control\n");
4615 }
4616 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
4617 vmx_capability.vpid) {
4618 vmx_capability.vpid = 0;
4619 pr_warn_once("VPID CAP should not exist if not support "
4620 "1-setting enable VPID VM-execution control\n");
Sheng Yangd56f5462008-04-25 10:13:16 +08004621 }
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004622
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004623 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004624#ifdef CONFIG_X86_64
4625 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
4626#endif
Yang Zhanga547c6d2013-04-11 19:25:10 +08004627 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004628 VM_EXIT_CLEAR_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004629 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
4630 &_vmexit_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004631 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004632
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01004633 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
4634 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
4635 PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004636 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
4637 &_pin_based_exec_control) < 0)
4638 return -EIO;
4639
Paolo Bonzini1c17c3e2016-07-08 11:53:38 +02004640 if (cpu_has_broken_vmx_preemption_timer())
4641 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08004642 if (!(_cpu_based_2nd_exec_control &
Paolo Bonzini91fa0f82016-06-15 20:55:08 +02004643 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
Yang Zhang01e439b2013-04-11 19:25:12 +08004644 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
4645
Paolo Bonzinic845f9c2014-02-21 10:55:44 +01004646 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
Liu, Jinsongda8999d2014-02-24 10:55:46 +00004647 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004648 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
4649 &_vmentry_control) < 0)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004650 return -EIO;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004651
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004652 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004653
4654 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
4655 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004656 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004657
4658#ifdef CONFIG_X86_64
4659 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
4660 if (vmx_msr_high & (1u<<16))
Yang, Sheng002c7f72007-07-31 14:23:01 +03004661 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004662#endif
4663
4664 /* Require Write-Back (WB) memory type for VMCS accesses. */
4665 if (((vmx_msr_high >> 18) & 15) != 6)
Yang, Sheng002c7f72007-07-31 14:23:01 +03004666 return -EIO;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004667
Yang, Sheng002c7f72007-07-31 14:23:01 +03004668 vmcs_conf->size = vmx_msr_high & 0x1fff;
Paolo Bonzini16cb0252016-09-05 15:57:00 +02004669 vmcs_conf->order = get_order(vmcs_conf->size);
Jan Dakinevich9ac7e3e2016-09-04 21:23:15 +03004670 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004671
Liran Alon2307af12018-06-29 22:59:04 +03004672 vmcs_conf->revision_id = vmx_msr_low;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004673
Yang, Sheng002c7f72007-07-31 14:23:01 +03004674 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
4675 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
Sheng Yangf78e0e22007-10-29 09:40:42 +08004676 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
Yang, Sheng002c7f72007-07-31 14:23:01 +03004677 vmcs_conf->vmexit_ctrl = _vmexit_control;
4678 vmcs_conf->vmentry_ctrl = _vmentry_control;
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004679
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +01004680 if (static_branch_unlikely(&enable_evmcs))
4681 evmcs_sanitize_exec_ctrls(vmcs_conf);
4682
Avi Kivity110312c2010-12-21 12:54:20 +02004683 cpu_has_load_ia32_efer =
4684 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4685 VM_ENTRY_LOAD_IA32_EFER)
4686 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4687 VM_EXIT_LOAD_IA32_EFER);
4688
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004689 cpu_has_load_perf_global_ctrl =
4690 allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
4691 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
4692 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
4693 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
4694
4695 /*
4696 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
Andrea Gelminibb3541f2016-05-21 14:14:44 +02004697 * but due to errata below it can't be used. Workaround is to use
Gleb Natapov8bf00a52011-10-05 14:01:22 +02004698 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
4699 *
4700 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
4701 *
4702 * AAK155 (model 26)
4703 * AAP115 (model 30)
4704 * AAT100 (model 37)
4705 * BC86,AAY89,BD102 (model 44)
4706 * BA97 (model 46)
4707 *
4708 */
4709 if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
4710 switch (boot_cpu_data.x86_model) {
4711 case 26:
4712 case 30:
4713 case 37:
4714 case 44:
4715 case 46:
4716 cpu_has_load_perf_global_ctrl = false;
4717 printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
4718 "does not work properly. Using workaround\n");
4719 break;
4720 default:
4721 break;
4722 }
4723 }
4724
Borislav Petkov782511b2016-04-04 22:25:03 +02004725 if (boot_cpu_has(X86_FEATURE_XSAVES))
Wanpeng Li20300092014-12-02 19:14:59 +08004726 rdmsrl(MSR_IA32_XSS, host_xss);
4727
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004728 return 0;
Nguyen Anh Quynhc68876f2006-12-29 16:49:54 -08004729}
Avi Kivity6aa8b732006-12-10 02:21:36 -08004730
Liran Alon491a6032018-06-23 02:35:12 +03004731static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004732{
4733 int node = cpu_to_node(cpu);
4734 struct page *pages;
4735 struct vmcs *vmcs;
4736
Vlastimil Babka96db8002015-09-08 15:03:50 -07004737 pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004738 if (!pages)
4739 return NULL;
4740 vmcs = page_address(pages);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004741 memset(vmcs, 0, vmcs_config.size);
Liran Alon2307af12018-06-29 22:59:04 +03004742
4743 /* KVM supports Enlightened VMCS v1 only */
4744 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004745 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
Liran Alon2307af12018-06-29 22:59:04 +03004746 else
Liran Alon392b2f22018-06-23 02:35:01 +03004747 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004748
Liran Alon491a6032018-06-23 02:35:12 +03004749 if (shadow)
4750 vmcs->hdr.shadow_vmcs = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004751 return vmcs;
4752}
4753
Avi Kivity6aa8b732006-12-10 02:21:36 -08004754static void free_vmcs(struct vmcs *vmcs)
4755{
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03004756 free_pages((unsigned long)vmcs, vmcs_config.order);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004757}
4758
Nadav Har'Eld462b812011-05-24 15:26:10 +03004759/*
4760 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
4761 */
4762static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4763{
4764 if (!loaded_vmcs->vmcs)
4765 return;
4766 loaded_vmcs_clear(loaded_vmcs);
4767 free_vmcs(loaded_vmcs->vmcs);
4768 loaded_vmcs->vmcs = NULL;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004769 if (loaded_vmcs->msr_bitmap)
4770 free_page((unsigned long)loaded_vmcs->msr_bitmap);
Jim Mattson355f4fb2016-10-28 08:29:39 -07004771 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
Nadav Har'Eld462b812011-05-24 15:26:10 +03004772}
4773
Liran Alon491a6032018-06-23 02:35:12 +03004774static struct vmcs *alloc_vmcs(bool shadow)
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004775{
Liran Alon491a6032018-06-23 02:35:12 +03004776 return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004777}
4778
4779static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
4780{
Liran Alon491a6032018-06-23 02:35:12 +03004781 loaded_vmcs->vmcs = alloc_vmcs(false);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004782 if (!loaded_vmcs->vmcs)
4783 return -ENOMEM;
4784
4785 loaded_vmcs->shadow_vmcs = NULL;
4786 loaded_vmcs_init(loaded_vmcs);
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004787
4788 if (cpu_has_vmx_msr_bitmap()) {
4789 loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
4790 if (!loaded_vmcs->msr_bitmap)
4791 goto out_vmcs;
4792 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004793
Arnd Bergmann1f008e12018-05-25 17:36:17 +02004794 if (IS_ENABLED(CONFIG_HYPERV) &&
4795 static_branch_unlikely(&enable_evmcs) &&
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02004796 (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
4797 struct hv_enlightened_vmcs *evmcs =
4798 (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
4799
4800 evmcs->hv_enlightenments_control.msr_bitmap = 1;
4801 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004802 }
Sean Christophersond7ee0392018-07-23 12:32:47 -07004803
4804 memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
4805
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004806 return 0;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01004807
4808out_vmcs:
4809 free_loaded_vmcs(loaded_vmcs);
4810 return -ENOMEM;
Paolo Bonzinif21f1652018-01-11 12:16:15 +01004811}
4812
Sam Ravnborg39959582007-06-01 00:47:13 -07004813static void free_kvm_area(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004814{
4815 int cpu;
4816
Zachary Amsden3230bb42009-09-29 11:38:37 -10004817 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004818 free_vmcs(per_cpu(vmxarea, cpu));
Zachary Amsden3230bb42009-09-29 11:38:37 -10004819 per_cpu(vmxarea, cpu) = NULL;
4820 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08004821}
4822
Jim Mattsond37f4262017-12-22 12:12:16 -08004823enum vmcs_field_width {
4824 VMCS_FIELD_WIDTH_U16 = 0,
4825 VMCS_FIELD_WIDTH_U64 = 1,
4826 VMCS_FIELD_WIDTH_U32 = 2,
4827 VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
Jim Mattson85fd5142017-07-07 12:51:41 -07004828};
4829
Jim Mattsond37f4262017-12-22 12:12:16 -08004830static inline int vmcs_field_width(unsigned long field)
Jim Mattson85fd5142017-07-07 12:51:41 -07004831{
4832 if (0x1 & field) /* the *_HIGH fields are all 32 bit */
Jim Mattsond37f4262017-12-22 12:12:16 -08004833 return VMCS_FIELD_WIDTH_U32;
Jim Mattson85fd5142017-07-07 12:51:41 -07004834 return (field >> 13) & 0x3 ;
4835}
4836
4837static inline int vmcs_field_readonly(unsigned long field)
4838{
4839 return (((field >> 10) & 0x3) == 1);
4840}
4841
Bandan Dasfe2b2012014-04-21 15:20:14 -04004842static void init_vmcs_shadow_fields(void)
4843{
4844 int i, j;
4845
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004846 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
4847 u16 field = shadow_read_only_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004848 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004849 (i + 1 == max_shadow_read_only_fields ||
4850 shadow_read_only_fields[i + 1] != field + 1))
4851 pr_err("Missing field from shadow_read_only_field %x\n",
4852 field + 1);
4853
4854 clear_bit(field, vmx_vmread_bitmap);
4855#ifdef CONFIG_X86_64
4856 if (field & 1)
4857 continue;
4858#endif
4859 if (j < i)
4860 shadow_read_only_fields[j] = field;
4861 j++;
4862 }
4863 max_shadow_read_only_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004864
4865 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004866 u16 field = shadow_read_write_fields[i];
Jim Mattsond37f4262017-12-22 12:12:16 -08004867 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004868 (i + 1 == max_shadow_read_write_fields ||
4869 shadow_read_write_fields[i + 1] != field + 1))
4870 pr_err("Missing field from shadow_read_write_field %x\n",
4871 field + 1);
4872
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004873 /*
4874 * PML and the preemption timer can be emulated, but the
4875 * processor cannot vmwrite to fields that don't exist
4876 * on bare metal.
4877 */
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004878 switch (field) {
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01004879 case GUEST_PML_INDEX:
4880 if (!cpu_has_vmx_pml())
4881 continue;
4882 break;
4883 case VMX_PREEMPTION_TIMER_VALUE:
4884 if (!cpu_has_vmx_preemption_timer())
4885 continue;
4886 break;
4887 case GUEST_INTR_STATUS:
4888 if (!cpu_has_vmx_apicv())
Bandan Dasfe2b2012014-04-21 15:20:14 -04004889 continue;
4890 break;
4891 default:
4892 break;
4893 }
4894
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004895 clear_bit(field, vmx_vmwrite_bitmap);
4896 clear_bit(field, vmx_vmread_bitmap);
4897#ifdef CONFIG_X86_64
4898 if (field & 1)
4899 continue;
4900#endif
Bandan Dasfe2b2012014-04-21 15:20:14 -04004901 if (j < i)
Paolo Bonzini44900ba2017-12-13 12:58:02 +01004902 shadow_read_write_fields[j] = field;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004903 j++;
4904 }
4905 max_shadow_read_write_fields = j;
Bandan Dasfe2b2012014-04-21 15:20:14 -04004906}
4907
Avi Kivity6aa8b732006-12-10 02:21:36 -08004908static __init int alloc_kvm_area(void)
4909{
4910 int cpu;
4911
Zachary Amsden3230bb42009-09-29 11:38:37 -10004912 for_each_possible_cpu(cpu) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08004913 struct vmcs *vmcs;
4914
Liran Alon491a6032018-06-23 02:35:12 +03004915 vmcs = alloc_vmcs_cpu(false, cpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004916 if (!vmcs) {
4917 free_kvm_area();
4918 return -ENOMEM;
4919 }
4920
Liran Alon2307af12018-06-29 22:59:04 +03004921 /*
4922 * When eVMCS is enabled, alloc_vmcs_cpu() sets
4923 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
4924 * revision_id reported by MSR_IA32_VMX_BASIC.
4925 *
4926 * However, even though not explictly documented by
4927 * TLFS, VMXArea passed as VMXON argument should
4928 * still be marked with revision_id reported by
4929 * physical CPU.
4930 */
4931 if (static_branch_unlikely(&enable_evmcs))
Liran Alon392b2f22018-06-23 02:35:01 +03004932 vmcs->hdr.revision_id = vmcs_config.revision_id;
Liran Alon2307af12018-06-29 22:59:04 +03004933
Avi Kivity6aa8b732006-12-10 02:21:36 -08004934 per_cpu(vmxarea, cpu) = vmcs;
4935 }
4936 return 0;
4937}
4938
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004939static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
Gleb Natapovd99e4152012-12-20 16:57:45 +02004940 struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004941{
Gleb Natapovd99e4152012-12-20 16:57:45 +02004942 if (!emulate_invalid_guest_state) {
4943 /*
4944 * CS and SS RPL should be equal during guest entry according
4945 * to VMX spec, but in reality it is not always so. Since vcpu
4946 * is in the middle of the transition from real mode to
4947 * protected mode it is safe to assume that RPL 0 is a good
4948 * default value.
4949 */
4950 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
Nadav Amitb32a9912015-03-29 16:33:04 +03004951 save->selector &= ~SEGMENT_RPL_MASK;
4952 save->dpl = save->selector & SEGMENT_RPL_MASK;
Gleb Natapovd99e4152012-12-20 16:57:45 +02004953 save->s = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004954 }
Gleb Natapovd99e4152012-12-20 16:57:45 +02004955 vmx_set_segment(vcpu, save, seg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004956}
4957
4958static void enter_pmode(struct kvm_vcpu *vcpu)
4959{
4960 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03004961 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004962
Gleb Natapovd99e4152012-12-20 16:57:45 +02004963 /*
4964 * Update real mode segment cache. It may be not up-to-date if sement
4965 * register was written while vcpu was in a guest mode.
4966 */
4967 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
4968 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
4969 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
4970 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
4971 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
4972 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
4973
Avi Kivity7ffd92c2009-06-09 14:10:45 +03004974 vmx->rmode.vm86_active = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004975
Avi Kivity2fb92db2011-04-27 19:42:18 +03004976 vmx_segment_cache_clear(vmx);
4977
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004978 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004979
4980 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03004981 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
4982 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
Avi Kivity6aa8b732006-12-10 02:21:36 -08004983 vmcs_writel(GUEST_RFLAGS, flags);
4984
Rusty Russell66aee912007-07-17 23:34:16 +10004985 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
4986 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
Avi Kivity6aa8b732006-12-10 02:21:36 -08004987
4988 update_exception_bitmap(vcpu);
4989
Gleb Natapov91b0aa22013-01-21 15:36:47 +02004990 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
4991 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
4992 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
4993 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
4994 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
4995 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
Avi Kivity6aa8b732006-12-10 02:21:36 -08004996}
4997
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03004998static void fix_rmode_seg(int seg, struct kvm_segment *save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08004999{
Mathias Krause772e0312012-08-30 01:30:19 +02005000 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Gleb Natapovd99e4152012-12-20 16:57:45 +02005001 struct kvm_segment var = *save;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005002
Gleb Natapovd99e4152012-12-20 16:57:45 +02005003 var.dpl = 0x3;
5004 if (seg == VCPU_SREG_CS)
5005 var.type = 0x3;
5006
5007 if (!emulate_invalid_guest_state) {
5008 var.selector = var.base >> 4;
5009 var.base = var.base & 0xffff0;
5010 var.limit = 0xffff;
5011 var.g = 0;
5012 var.db = 0;
5013 var.present = 1;
5014 var.s = 1;
5015 var.l = 0;
5016 var.unusable = 0;
5017 var.type = 0x3;
5018 var.avl = 0;
5019 if (save->base & 0xf)
5020 printk_once(KERN_WARNING "kvm: segment base is not "
5021 "paragraph aligned when entering "
5022 "protected mode (seg=%d)", seg);
5023 }
5024
5025 vmcs_write16(sf->selector, var.selector);
Chao Peng96794e42017-02-21 03:50:01 -05005026 vmcs_writel(sf->base, var.base);
Gleb Natapovd99e4152012-12-20 16:57:45 +02005027 vmcs_write32(sf->limit, var.limit);
5028 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
Avi Kivity6aa8b732006-12-10 02:21:36 -08005029}
5030
5031static void enter_rmode(struct kvm_vcpu *vcpu)
5032{
5033 unsigned long flags;
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03005034 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005035 struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005036
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005037 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
5038 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
5039 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
5040 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
5041 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
Gleb Natapovc6ad11532012-12-12 19:10:51 +02005042 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
5043 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005044
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005045 vmx->rmode.vm86_active = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005046
Gleb Natapov776e58e2011-03-13 12:34:27 +02005047 /*
5048 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
Jan Kiszka4918c6c2013-03-15 08:38:56 +01005049 * vcpu. Warn the user that an update is overdue.
Gleb Natapov776e58e2011-03-13 12:34:27 +02005050 */
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005051 if (!kvm_vmx->tss_addr)
Gleb Natapov776e58e2011-03-13 12:34:27 +02005052 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
5053 "called before entering vcpu\n");
Gleb Natapov776e58e2011-03-13 12:34:27 +02005054
Avi Kivity2fb92db2011-04-27 19:42:18 +03005055 vmx_segment_cache_clear(vmx);
5056
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005057 vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005058 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005059 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5060
5061 flags = vmcs_readl(GUEST_RFLAGS);
Avi Kivity78ac8b42010-04-08 18:19:35 +03005062 vmx->rmode.save_rflags = flags;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005063
Glauber de Oliveira Costa053de042008-01-30 13:31:27 +01005064 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005065
5066 vmcs_writel(GUEST_RFLAGS, flags);
Rusty Russell66aee912007-07-17 23:34:16 +10005067 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005068 update_exception_bitmap(vcpu);
5069
Gleb Natapovd99e4152012-12-20 16:57:45 +02005070 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
5071 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
5072 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
5073 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
5074 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
5075 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
Mohammed Gamala89a8fb2008-08-17 16:42:16 +03005076
Eddie Dong8668a3c2007-10-10 14:26:45 +08005077 kvm_mmu_reset_context(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005078}
5079
Amit Shah401d10d2009-02-20 22:53:37 +05305080static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
5081{
5082 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity26bb0982009-09-07 11:14:12 +03005083 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
5084
5085 if (!msr)
5086 return;
Amit Shah401d10d2009-02-20 22:53:37 +05305087
Avi Kivityf6801df2010-01-21 15:31:50 +02005088 vcpu->arch.efer = efer;
Amit Shah401d10d2009-02-20 22:53:37 +05305089 if (efer & EFER_LMA) {
Gleb Natapov2961e8762013-11-25 15:37:13 +02005090 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05305091 msr->data = efer;
5092 } else {
Gleb Natapov2961e8762013-11-25 15:37:13 +02005093 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Amit Shah401d10d2009-02-20 22:53:37 +05305094
5095 msr->data = efer & ~EFER_LME;
5096 }
5097 setup_msrs(vmx);
5098}
5099
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005100#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08005101
5102static void enter_lmode(struct kvm_vcpu *vcpu)
5103{
5104 u32 guest_tr_ar;
5105
Avi Kivity2fb92db2011-04-27 19:42:18 +03005106 vmx_segment_cache_clear(to_vmx(vcpu));
5107
Avi Kivity6aa8b732006-12-10 02:21:36 -08005108 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005109 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
Jan Kiszkabd801582011-09-12 11:26:22 +02005110 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
5111 __func__);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005112 vmcs_write32(GUEST_TR_AR_BYTES,
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005113 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
5114 | VMX_AR_TYPE_BUSY_64_TSS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005115 }
Avi Kivityda38f432010-07-06 11:30:49 +03005116 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005117}
5118
5119static void exit_lmode(struct kvm_vcpu *vcpu)
5120{
Gleb Natapov2961e8762013-11-25 15:37:13 +02005121 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
Avi Kivityda38f432010-07-06 11:30:49 +03005122 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005123}
5124
5125#endif
5126
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005127static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
5128 bool invalidate_gpa)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005129{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005130 if (enable_ept && (invalidate_gpa || !enable_vpid)) {
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02005131 if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
Xiao Guangrongdd180b32010-07-03 16:02:42 +08005132 return;
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02005133 ept_sync_context(construct_eptp(vcpu,
5134 vcpu->arch.mmu->root_hpa));
Jim Mattsonf0b98c02017-03-15 07:56:11 -07005135 } else {
5136 vpid_sync_context(vpid);
Xiao Guangrongdd180b32010-07-03 16:02:42 +08005137 }
Sheng Yang2384d2b2008-01-17 15:14:33 +08005138}
5139
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005140static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
Wanpeng Lidd5f5342015-09-23 18:26:57 +08005141{
Wanpeng Lic2ba05c2017-12-12 17:33:03 -08005142 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
Wanpeng Lidd5f5342015-09-23 18:26:57 +08005143}
5144
Junaid Shahidfaff8752018-06-29 13:10:05 -07005145static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
5146{
5147 int vpid = to_vmx(vcpu)->vpid;
5148
5149 if (!vpid_sync_vcpu_addr(vpid, addr))
5150 vpid_sync_context(vpid);
5151
5152 /*
5153 * If VPIDs are not supported or enabled, then the above is a no-op.
5154 * But we don't really need a TLB flush in that case anyway, because
5155 * each VM entry/exit includes an implicit flush when VPID is 0.
5156 */
5157}
5158
Avi Kivitye8467fd2009-12-29 18:43:06 +02005159static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
5160{
5161 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
5162
5163 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
5164 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
5165}
5166
Avi Kivityaff48ba2010-12-05 18:56:11 +02005167static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
5168{
Sean Christophersonb4d18512018-03-05 12:04:40 -08005169 if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
Avi Kivityaff48ba2010-12-05 18:56:11 +02005170 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
5171 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5172}
5173
Anthony Liguori25c4c272007-04-27 09:29:21 +03005174static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
Avi Kivity399badf2007-01-05 16:36:38 -08005175{
Avi Kivityfc78f512009-12-07 12:16:48 +02005176 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
5177
5178 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
5179 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
Avi Kivity399badf2007-01-05 16:36:38 -08005180}
5181
Sheng Yang14394422008-04-28 12:24:45 +08005182static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
5183{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005184 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5185
Avi Kivity6de4f3a2009-05-31 22:58:47 +03005186 if (!test_bit(VCPU_EXREG_PDPTR,
5187 (unsigned long *)&vcpu->arch.regs_dirty))
5188 return;
5189
Sheng Yang14394422008-04-28 12:24:45 +08005190 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005191 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
5192 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
5193 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
5194 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
Sheng Yang14394422008-04-28 12:24:45 +08005195 }
5196}
5197
Avi Kivity8f5d5492009-05-31 18:41:29 +03005198static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
5199{
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005200 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5201
Avi Kivity8f5d5492009-05-31 18:41:29 +03005202 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
Gleb Natapovd0d538b2013-10-09 19:13:19 +03005203 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
5204 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
5205 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
5206 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005207 }
Avi Kivity6de4f3a2009-05-31 22:58:47 +03005208
5209 __set_bit(VCPU_EXREG_PDPTR,
5210 (unsigned long *)&vcpu->arch.regs_avail);
5211 __set_bit(VCPU_EXREG_PDPTR,
5212 (unsigned long *)&vcpu->arch.regs_dirty);
Avi Kivity8f5d5492009-05-31 18:41:29 +03005213}
5214
David Matlack38991522016-11-29 18:14:08 -08005215static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5216{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005217 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5218 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005219 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5220
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005221 if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
David Matlack38991522016-11-29 18:14:08 -08005222 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
5223 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
5224 fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
5225
5226 return fixed_bits_valid(val, fixed0, fixed1);
5227}
5228
5229static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
5230{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005231 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
5232 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005233
5234 return fixed_bits_valid(val, fixed0, fixed1);
5235}
5236
5237static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
5238{
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01005239 u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
5240 u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
David Matlack38991522016-11-29 18:14:08 -08005241
5242 return fixed_bits_valid(val, fixed0, fixed1);
5243}
5244
5245/* No difference in the restrictions on guest and host CR4 in VMX operation. */
5246#define nested_guest_cr4_valid nested_cr4_valid
5247#define nested_host_cr4_valid nested_cr4_valid
5248
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005249static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
Sheng Yang14394422008-04-28 12:24:45 +08005250
5251static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
5252 unsigned long cr0,
5253 struct kvm_vcpu *vcpu)
5254{
Marcelo Tosatti5233dd52011-06-06 14:27:47 -03005255 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
5256 vmx_decache_cr3(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005257 if (!(cr0 & X86_CR0_PG)) {
5258 /* From paging/starting to nonpaging */
5259 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005260 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
Sheng Yang14394422008-04-28 12:24:45 +08005261 (CPU_BASED_CR3_LOAD_EXITING |
5262 CPU_BASED_CR3_STORE_EXITING));
5263 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005264 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005265 } else if (!is_paging(vcpu)) {
5266 /* From nonpaging to paging */
5267 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
Sheng Yang65267ea2008-06-18 14:43:38 +08005268 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
Sheng Yang14394422008-04-28 12:24:45 +08005269 ~(CPU_BASED_CR3_LOAD_EXITING |
5270 CPU_BASED_CR3_STORE_EXITING));
5271 vcpu->arch.cr0 = cr0;
Avi Kivityfc78f512009-12-07 12:16:48 +02005272 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
Sheng Yang14394422008-04-28 12:24:45 +08005273 }
Sheng Yang95eb84a2009-08-19 09:52:18 +08005274
5275 if (!(cr0 & X86_CR0_WP))
5276 *hw_cr0 &= ~X86_CR0_WP;
Sheng Yang14394422008-04-28 12:24:45 +08005277}
5278
Avi Kivity6aa8b732006-12-10 02:21:36 -08005279static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
5280{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005281 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005282 unsigned long hw_cr0;
5283
Sean Christopherson3de63472018-07-13 08:42:30 -07005284 hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005285 if (enable_unrestricted_guest)
Gleb Natapov50378782013-02-04 16:00:28 +02005286 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
Gleb Natapov218e7632013-01-21 15:36:45 +02005287 else {
Gleb Natapov50378782013-02-04 16:00:28 +02005288 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005289
Gleb Natapov218e7632013-01-21 15:36:45 +02005290 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
5291 enter_pmode(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005292
Gleb Natapov218e7632013-01-21 15:36:45 +02005293 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
5294 enter_rmode(vcpu);
5295 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08005296
Avi Kivity05b3e0c2006-12-13 00:33:45 -08005297#ifdef CONFIG_X86_64
Avi Kivityf6801df2010-01-21 15:31:50 +02005298 if (vcpu->arch.efer & EFER_LME) {
Rusty Russell707d92fa2007-07-17 23:19:08 +10005299 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005300 enter_lmode(vcpu);
Rusty Russell707d92fa2007-07-17 23:19:08 +10005301 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
Avi Kivity6aa8b732006-12-10 02:21:36 -08005302 exit_lmode(vcpu);
5303 }
5304#endif
5305
Sean Christophersonb4d18512018-03-05 12:04:40 -08005306 if (enable_ept && !enable_unrestricted_guest)
Sheng Yang14394422008-04-28 12:24:45 +08005307 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
5308
Avi Kivity6aa8b732006-12-10 02:21:36 -08005309 vmcs_writel(CR0_READ_SHADOW, cr0);
Sheng Yang14394422008-04-28 12:24:45 +08005310 vmcs_writel(GUEST_CR0, hw_cr0);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005311 vcpu->arch.cr0 = cr0;
Gleb Natapov14168782013-01-21 15:36:49 +02005312
5313 /* depends on vcpu->arch.cr0 to be set to a new value */
5314 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005315}
5316
Yu Zhang855feb62017-08-24 20:27:55 +08005317static int get_ept_level(struct kvm_vcpu *vcpu)
5318{
5319 if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
5320 return 5;
5321 return 4;
5322}
5323
Peter Feiner995f00a2017-06-30 17:26:32 -07005324static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
Sheng Yang14394422008-04-28 12:24:45 +08005325{
Yu Zhang855feb62017-08-24 20:27:55 +08005326 u64 eptp = VMX_EPTP_MT_WB;
Sheng Yang14394422008-04-28 12:24:45 +08005327
Yu Zhang855feb62017-08-24 20:27:55 +08005328 eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
Sheng Yang14394422008-04-28 12:24:45 +08005329
Peter Feiner995f00a2017-06-30 17:26:32 -07005330 if (enable_ept_ad_bits &&
5331 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
David Hildenbrandbb97a012017-08-10 23:15:28 +02005332 eptp |= VMX_EPTP_AD_ENABLE_BIT;
Sheng Yang14394422008-04-28 12:24:45 +08005333 eptp |= (root_hpa & PAGE_MASK);
5334
5335 return eptp;
5336}
5337
Avi Kivity6aa8b732006-12-10 02:21:36 -08005338static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
5339{
Tianyu Lan877ad952018-07-19 08:40:23 +00005340 struct kvm *kvm = vcpu->kvm;
Sheng Yang14394422008-04-28 12:24:45 +08005341 unsigned long guest_cr3;
5342 u64 eptp;
5343
5344 guest_cr3 = cr3;
Avi Kivity089d0342009-03-23 18:26:32 +02005345 if (enable_ept) {
Peter Feiner995f00a2017-06-30 17:26:32 -07005346 eptp = construct_eptp(vcpu, cr3);
Sheng Yang14394422008-04-28 12:24:45 +08005347 vmcs_write64(EPT_POINTER, eptp);
Tianyu Lan877ad952018-07-19 08:40:23 +00005348
5349 if (kvm_x86_ops->tlb_remote_flush) {
5350 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5351 to_vmx(vcpu)->ept_pointer = eptp;
5352 to_kvm_vmx(kvm)->ept_pointers_match
5353 = EPT_POINTERS_CHECK;
5354 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
5355 }
5356
Sean Christophersone90008d2018-03-05 12:04:37 -08005357 if (enable_unrestricted_guest || is_paging(vcpu) ||
5358 is_guest_mode(vcpu))
Jan Kiszka59ab5a82013-08-08 16:26:29 +02005359 guest_cr3 = kvm_read_cr3(vcpu);
5360 else
Tianyu Lan877ad952018-07-19 08:40:23 +00005361 guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
Marcelo Tosatti7c93be442009-10-26 16:48:33 -02005362 ept_load_pdptrs(vcpu);
Sheng Yang14394422008-04-28 12:24:45 +08005363 }
5364
Sheng Yang14394422008-04-28 12:24:45 +08005365 vmcs_writel(GUEST_CR3, guest_cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005366}
5367
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005368static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005369{
Ben Serebrin085e68e2015-04-16 11:58:05 -07005370 /*
5371 * Pass through host's Machine Check Enable value to hw_cr4, which
5372 * is in force while we are in guest mode. Do not let guests control
5373 * this bit, even if host CR4.MCE == 0.
5374 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005375 unsigned long hw_cr4;
5376
5377 hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
5378 if (enable_unrestricted_guest)
5379 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
5380 else if (to_vmx(vcpu)->rmode.vm86_active)
5381 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
5382 else
5383 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
Sheng Yang14394422008-04-28 12:24:45 +08005384
Sean Christopherson64f7a112018-04-30 10:01:06 -07005385 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
5386 if (cr4 & X86_CR4_UMIP) {
5387 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini0367f202016-07-12 10:44:55 +02005388 SECONDARY_EXEC_DESC);
Sean Christopherson64f7a112018-04-30 10:01:06 -07005389 hw_cr4 &= ~X86_CR4_UMIP;
5390 } else if (!is_guest_mode(vcpu) ||
5391 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
5392 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
5393 SECONDARY_EXEC_DESC);
5394 }
Paolo Bonzini0367f202016-07-12 10:44:55 +02005395
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005396 if (cr4 & X86_CR4_VMXE) {
5397 /*
5398 * To use VMXON (and later other VMX instructions), a guest
5399 * must first be able to turn on cr4.VMXE (see handle_vmon()).
5400 * So basically the check on whether to allow nested VMX
Paolo Bonzini5bea5122018-09-18 15:19:17 +02005401 * is here. We operate under the default treatment of SMM,
5402 * so VMX cannot be enabled under SMM.
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005403 */
Paolo Bonzini5bea5122018-09-18 15:19:17 +02005404 if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005405 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01005406 }
David Matlack38991522016-11-29 18:14:08 -08005407
5408 if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005409 return 1;
5410
Zhang Xiantaoad312c72007-12-13 23:50:52 +08005411 vcpu->arch.cr4 = cr4;
Sheng Yang14394422008-04-28 12:24:45 +08005412
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005413 if (!enable_unrestricted_guest) {
5414 if (enable_ept) {
5415 if (!is_paging(vcpu)) {
5416 hw_cr4 &= ~X86_CR4_PAE;
5417 hw_cr4 |= X86_CR4_PSE;
5418 } else if (!(cr4 & X86_CR4_PAE)) {
5419 hw_cr4 &= ~X86_CR4_PAE;
5420 }
5421 }
5422
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005423 /*
Huaitong Handdba2622016-03-22 16:51:15 +08005424 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
5425 * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
5426 * to be manually disabled when guest switches to non-paging
5427 * mode.
5428 *
5429 * If !enable_unrestricted_guest, the CPU is always running
5430 * with CR0.PG=1 and CR4 needs to be modified.
5431 * If enable_unrestricted_guest, the CPU automatically
5432 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005433 */
Sean Christopherson5dc1f042018-03-05 12:04:39 -08005434 if (!is_paging(vcpu))
5435 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
5436 }
Radim Krčmář656ec4a2015-11-02 22:20:00 +01005437
Sheng Yang14394422008-04-28 12:24:45 +08005438 vmcs_writel(CR4_READ_SHADOW, cr4);
5439 vmcs_writel(GUEST_CR4, hw_cr4);
Nadav Har'El5e1746d2011-05-25 23:03:24 +03005440 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005441}
5442
Avi Kivity6aa8b732006-12-10 02:21:36 -08005443static void vmx_get_segment(struct kvm_vcpu *vcpu,
5444 struct kvm_segment *var, int seg)
5445{
Avi Kivitya9179492011-01-03 14:28:52 +02005446 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005447 u32 ar;
5448
Gleb Natapovc6ad11532012-12-12 19:10:51 +02005449 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005450 *var = vmx->rmode.segs[seg];
Avi Kivitya9179492011-01-03 14:28:52 +02005451 if (seg == VCPU_SREG_TR
Avi Kivity2fb92db2011-04-27 19:42:18 +03005452 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
Avi Kivityf5f7b2f2012-08-21 17:07:00 +03005453 return;
Avi Kivity1390a282012-08-21 17:07:08 +03005454 var->base = vmx_read_guest_seg_base(vmx, seg);
5455 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5456 return;
Avi Kivitya9179492011-01-03 14:28:52 +02005457 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005458 var->base = vmx_read_guest_seg_base(vmx, seg);
5459 var->limit = vmx_read_guest_seg_limit(vmx, seg);
5460 var->selector = vmx_read_guest_seg_selector(vmx, seg);
5461 ar = vmx_read_guest_seg_ar(vmx, seg);
Gleb Natapov03617c12013-06-28 13:17:18 +03005462 var->unusable = (ar >> 16) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005463 var->type = ar & 15;
5464 var->s = (ar >> 4) & 1;
5465 var->dpl = (ar >> 5) & 3;
Gleb Natapov03617c12013-06-28 13:17:18 +03005466 /*
5467 * Some userspaces do not preserve unusable property. Since usable
5468 * segment has to be present according to VMX spec we can use present
5469 * property to amend userspace bug by making unusable segment always
5470 * nonpresent. vmx_segment_access_rights() already marks nonpresent
5471 * segment as unusable.
5472 */
5473 var->present = !var->unusable;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005474 var->avl = (ar >> 12) & 1;
5475 var->l = (ar >> 13) & 1;
5476 var->db = (ar >> 14) & 1;
5477 var->g = (ar >> 15) & 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005478}
5479
Avi Kivitya9179492011-01-03 14:28:52 +02005480static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
5481{
Avi Kivitya9179492011-01-03 14:28:52 +02005482 struct kvm_segment s;
5483
5484 if (to_vmx(vcpu)->rmode.vm86_active) {
5485 vmx_get_segment(vcpu, &s, seg);
5486 return s.base;
5487 }
Avi Kivity2fb92db2011-04-27 19:42:18 +03005488 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
Avi Kivitya9179492011-01-03 14:28:52 +02005489}
5490
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005491static int vmx_get_cpl(struct kvm_vcpu *vcpu)
Izik Eidus2e4d2652008-03-24 19:38:34 +02005492{
Marcelo Tosattib09408d2013-01-07 19:27:06 -02005493 struct vcpu_vmx *vmx = to_vmx(vcpu);
5494
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005495 if (unlikely(vmx->rmode.vm86_active))
Izik Eidus2e4d2652008-03-24 19:38:34 +02005496 return 0;
Paolo Bonziniae9fedc2014-05-14 09:39:49 +02005497 else {
5498 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005499 return VMX_AR_DPL(ar);
Avi Kivity69c73022011-03-07 15:26:44 +02005500 }
Avi Kivity69c73022011-03-07 15:26:44 +02005501}
5502
Avi Kivity653e3102007-05-07 10:55:37 +03005503static u32 vmx_segment_access_rights(struct kvm_segment *var)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005504{
Avi Kivity6aa8b732006-12-10 02:21:36 -08005505 u32 ar;
5506
Avi Kivityf0495f92012-06-07 17:06:10 +03005507 if (var->unusable || !var->present)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005508 ar = 1 << 16;
5509 else {
5510 ar = var->type & 15;
5511 ar |= (var->s & 1) << 4;
5512 ar |= (var->dpl & 3) << 5;
5513 ar |= (var->present & 1) << 7;
5514 ar |= (var->avl & 1) << 12;
5515 ar |= (var->l & 1) << 13;
5516 ar |= (var->db & 1) << 14;
5517 ar |= (var->g & 1) << 15;
5518 }
Avi Kivity653e3102007-05-07 10:55:37 +03005519
5520 return ar;
5521}
5522
5523static void vmx_set_segment(struct kvm_vcpu *vcpu,
5524 struct kvm_segment *var, int seg)
5525{
Avi Kivity7ffd92c2009-06-09 14:10:45 +03005526 struct vcpu_vmx *vmx = to_vmx(vcpu);
Mathias Krause772e0312012-08-30 01:30:19 +02005527 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Avi Kivity653e3102007-05-07 10:55:37 +03005528
Avi Kivity2fb92db2011-04-27 19:42:18 +03005529 vmx_segment_cache_clear(vmx);
5530
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005531 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
5532 vmx->rmode.segs[seg] = *var;
5533 if (seg == VCPU_SREG_TR)
5534 vmcs_write16(sf->selector, var->selector);
5535 else if (var->s)
5536 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
Gleb Natapovd99e4152012-12-20 16:57:45 +02005537 goto out;
Avi Kivity653e3102007-05-07 10:55:37 +03005538 }
Gleb Natapov1ecd50a2012-12-12 19:10:54 +02005539
Avi Kivity653e3102007-05-07 10:55:37 +03005540 vmcs_writel(sf->base, var->base);
5541 vmcs_write32(sf->limit, var->limit);
5542 vmcs_write16(sf->selector, var->selector);
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005543
5544 /*
5545 * Fix the "Accessed" bit in AR field of segment registers for older
5546 * qemu binaries.
5547 * IA32 arch specifies that at the time of processor reset the
5548 * "Accessed" bit in the AR field of segment registers is 1. And qemu
Guo Chao0fa06072012-06-28 15:16:19 +08005549 * is setting it to 0 in the userland code. This causes invalid guest
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005550 * state vmexit when "unrestricted guest" mode is turned on.
5551 * Fix for this setup issue in cpu_reset is being pushed in the qemu
5552 * tree. Newer qemu binaries with that qemu fix would not need this
5553 * kvm hack.
5554 */
5555 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
Gleb Natapovf924d662012-12-12 19:10:55 +02005556 var->type |= 0x1; /* Accessed */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005557
Gleb Natapovf924d662012-12-12 19:10:55 +02005558 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
Gleb Natapovd99e4152012-12-20 16:57:45 +02005559
5560out:
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01005561 vmx->emulation_required = emulation_required(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005562}
5563
Avi Kivity6aa8b732006-12-10 02:21:36 -08005564static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5565{
Avi Kivity2fb92db2011-04-27 19:42:18 +03005566 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005567
5568 *db = (ar >> 14) & 1;
5569 *l = (ar >> 13) & 1;
5570}
5571
Gleb Natapov89a27f42010-02-16 10:51:48 +02005572static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005573{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005574 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
5575 dt->address = vmcs_readl(GUEST_IDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005576}
5577
Gleb Natapov89a27f42010-02-16 10:51:48 +02005578static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005579{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005580 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
5581 vmcs_writel(GUEST_IDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005582}
5583
Gleb Natapov89a27f42010-02-16 10:51:48 +02005584static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005585{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005586 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
5587 dt->address = vmcs_readl(GUEST_GDTR_BASE);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005588}
5589
Gleb Natapov89a27f42010-02-16 10:51:48 +02005590static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005591{
Gleb Natapov89a27f42010-02-16 10:51:48 +02005592 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
5593 vmcs_writel(GUEST_GDTR_BASE, dt->address);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005594}
5595
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005596static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
5597{
5598 struct kvm_segment var;
5599 u32 ar;
5600
5601 vmx_get_segment(vcpu, &var, seg);
Gleb Natapov07f42f52012-12-12 19:10:49 +02005602 var.dpl = 0x3;
Gleb Natapov0647f4a2012-12-12 19:10:50 +02005603 if (seg == VCPU_SREG_CS)
5604 var.type = 0x3;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005605 ar = vmx_segment_access_rights(&var);
5606
5607 if (var.base != (var.selector << 4))
5608 return false;
Gleb Natapov89efbed2012-12-20 16:57:44 +02005609 if (var.limit != 0xffff)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005610 return false;
Gleb Natapov07f42f52012-12-12 19:10:49 +02005611 if (ar != 0xf3)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005612 return false;
5613
5614 return true;
5615}
5616
5617static bool code_segment_valid(struct kvm_vcpu *vcpu)
5618{
5619 struct kvm_segment cs;
5620 unsigned int cs_rpl;
5621
5622 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005623 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005624
Avi Kivity1872a3f2009-01-04 23:26:52 +02005625 if (cs.unusable)
5626 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005627 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005628 return false;
5629 if (!cs.s)
5630 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005631 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005632 if (cs.dpl > cs_rpl)
5633 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005634 } else {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005635 if (cs.dpl != cs_rpl)
5636 return false;
5637 }
5638 if (!cs.present)
5639 return false;
5640
5641 /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
5642 return true;
5643}
5644
5645static bool stack_segment_valid(struct kvm_vcpu *vcpu)
5646{
5647 struct kvm_segment ss;
5648 unsigned int ss_rpl;
5649
5650 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
Nadav Amitb32a9912015-03-29 16:33:04 +03005651 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005652
Avi Kivity1872a3f2009-01-04 23:26:52 +02005653 if (ss.unusable)
5654 return true;
5655 if (ss.type != 3 && ss.type != 7)
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005656 return false;
5657 if (!ss.s)
5658 return false;
5659 if (ss.dpl != ss_rpl) /* DPL != RPL */
5660 return false;
5661 if (!ss.present)
5662 return false;
5663
5664 return true;
5665}
5666
5667static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
5668{
5669 struct kvm_segment var;
5670 unsigned int rpl;
5671
5672 vmx_get_segment(vcpu, &var, seg);
Nadav Amitb32a9912015-03-29 16:33:04 +03005673 rpl = var.selector & SEGMENT_RPL_MASK;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005674
Avi Kivity1872a3f2009-01-04 23:26:52 +02005675 if (var.unusable)
5676 return true;
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005677 if (!var.s)
5678 return false;
5679 if (!var.present)
5680 return false;
Andy Lutomirski4d283ec2015-08-13 13:18:48 -07005681 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005682 if (var.dpl < rpl) /* DPL < RPL */
5683 return false;
5684 }
5685
5686 /* TODO: Add other members to kvm_segment_field to allow checking for other access
5687 * rights flags
5688 */
5689 return true;
5690}
5691
5692static bool tr_valid(struct kvm_vcpu *vcpu)
5693{
5694 struct kvm_segment tr;
5695
5696 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
5697
Avi Kivity1872a3f2009-01-04 23:26:52 +02005698 if (tr.unusable)
5699 return false;
Nadav Amitb32a9912015-03-29 16:33:04 +03005700 if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005701 return false;
Avi Kivity1872a3f2009-01-04 23:26:52 +02005702 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005703 return false;
5704 if (!tr.present)
5705 return false;
5706
5707 return true;
5708}
5709
5710static bool ldtr_valid(struct kvm_vcpu *vcpu)
5711{
5712 struct kvm_segment ldtr;
5713
5714 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
5715
Avi Kivity1872a3f2009-01-04 23:26:52 +02005716 if (ldtr.unusable)
5717 return true;
Nadav Amitb32a9912015-03-29 16:33:04 +03005718 if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005719 return false;
5720 if (ldtr.type != 2)
5721 return false;
5722 if (!ldtr.present)
5723 return false;
5724
5725 return true;
5726}
5727
5728static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
5729{
5730 struct kvm_segment cs, ss;
5731
5732 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
5733 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
5734
Nadav Amitb32a9912015-03-29 16:33:04 +03005735 return ((cs.selector & SEGMENT_RPL_MASK) ==
5736 (ss.selector & SEGMENT_RPL_MASK));
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005737}
5738
5739/*
5740 * Check if guest state is valid. Returns true if valid, false if
5741 * not.
5742 * We assume that registers are always usable
5743 */
5744static bool guest_state_valid(struct kvm_vcpu *vcpu)
5745{
Gleb Natapovc5e97c82013-01-21 15:36:43 +02005746 if (enable_unrestricted_guest)
5747 return true;
5748
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005749 /* real mode guest state checks */
Gleb Natapovf13882d2013-04-14 16:07:37 +03005750 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
Mohammed Gamal648dfaa2008-08-17 16:38:32 +03005751 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
5752 return false;
5753 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
5754 return false;
5755 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
5756 return false;
5757 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
5758 return false;
5759 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
5760 return false;
5761 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
5762 return false;
5763 } else {
5764 /* protected mode guest state checks */
5765 if (!cs_ss_rpl_check(vcpu))
5766 return false;
5767 if (!code_segment_valid(vcpu))
5768 return false;
5769 if (!stack_segment_valid(vcpu))
5770 return false;
5771 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
5772 return false;
5773 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
5774 return false;
5775 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
5776 return false;
5777 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
5778 return false;
5779 if (!tr_valid(vcpu))
5780 return false;
5781 if (!ldtr_valid(vcpu))
5782 return false;
5783 }
5784 /* TODO:
5785 * - Add checks on RIP
5786 * - Add checks on RFLAGS
5787 */
5788
5789 return true;
5790}
5791
Jim Mattson5fa99cb2017-07-06 16:33:07 -07005792static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
5793{
5794 return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
5795}
5796
Mike Dayd77c26f2007-10-08 09:02:08 -04005797static int init_rmode_tss(struct kvm *kvm)
Avi Kivity6aa8b732006-12-10 02:21:36 -08005798{
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005799 gfn_t fn;
Izik Eidus195aefd2007-10-01 22:14:18 +02005800 u16 data = 0;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005801 int idx, r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005802
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005803 idx = srcu_read_lock(&kvm->srcu);
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005804 fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
Izik Eidus195aefd2007-10-01 22:14:18 +02005805 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5806 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005807 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005808 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
Sheng Yang464d17c2008-08-13 14:10:33 +08005809 r = kvm_write_guest_page(kvm, fn++, &data,
5810 TSS_IOPB_BASE_OFFSET, sizeof(u16));
Izik Eidus195aefd2007-10-01 22:14:18 +02005811 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005812 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005813 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
5814 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005815 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005816 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
5817 if (r < 0)
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005818 goto out;
Izik Eidus195aefd2007-10-01 22:14:18 +02005819 data = ~0;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005820 r = kvm_write_guest_page(kvm, fn, &data,
5821 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
5822 sizeof(u8));
Marcelo Tosatti10589a42007-12-20 19:18:22 -05005823out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005824 srcu_read_unlock(&kvm->srcu, idx);
Paolo Bonzini1f755a82014-09-16 13:37:40 +02005825 return r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005826}
5827
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005828static int init_rmode_identity_map(struct kvm *kvm)
5829{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005830 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
Tang Chenf51770e2014-09-16 18:41:59 +08005831 int i, idx, r = 0;
Dan Williamsba049e92016-01-15 16:56:11 -08005832 kvm_pfn_t identity_map_pfn;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005833 u32 tmp;
5834
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005835 /* Protect kvm_vmx->ept_identity_pagetable_done. */
Tang Chena255d472014-09-16 18:41:58 +08005836 mutex_lock(&kvm->slots_lock);
5837
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005838 if (likely(kvm_vmx->ept_identity_pagetable_done))
Tang Chena255d472014-09-16 18:41:58 +08005839 goto out2;
Tang Chena255d472014-09-16 18:41:58 +08005840
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005841 if (!kvm_vmx->ept_identity_map_addr)
5842 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
5843 identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
Tang Chena255d472014-09-16 18:41:58 +08005844
David Hildenbrandd8a6e362017-08-24 20:51:34 +02005845 r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005846 kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
Tang Chenf51770e2014-09-16 18:41:59 +08005847 if (r < 0)
Tang Chena255d472014-09-16 18:41:58 +08005848 goto out2;
5849
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005850 idx = srcu_read_lock(&kvm->srcu);
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005851 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
5852 if (r < 0)
5853 goto out;
5854 /* Set up identity-mapping pagetable for EPT in real mode */
5855 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
5856 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
5857 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
5858 r = kvm_write_guest_page(kvm, identity_map_pfn,
5859 &tmp, i * sizeof(tmp), sizeof(tmp));
5860 if (r < 0)
5861 goto out;
5862 }
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07005863 kvm_vmx->ept_identity_pagetable_done = true;
Tang Chenf51770e2014-09-16 18:41:59 +08005864
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005865out:
Xiao Guangrong40dcaa92011-03-09 15:41:04 +08005866 srcu_read_unlock(&kvm->srcu, idx);
Tang Chena255d472014-09-16 18:41:58 +08005867
5868out2:
5869 mutex_unlock(&kvm->slots_lock);
Tang Chenf51770e2014-09-16 18:41:59 +08005870 return r;
Sheng Yangb7ebfb02008-04-25 21:44:52 +08005871}
5872
Avi Kivity6aa8b732006-12-10 02:21:36 -08005873static void seg_setup(int seg)
5874{
Mathias Krause772e0312012-08-30 01:30:19 +02005875 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005876 unsigned int ar;
Avi Kivity6aa8b732006-12-10 02:21:36 -08005877
5878 vmcs_write16(sf->selector, 0);
5879 vmcs_writel(sf->base, 0);
5880 vmcs_write32(sf->limit, 0xffff);
Gleb Natapovd54d07b2012-12-20 16:57:46 +02005881 ar = 0x93;
5882 if (seg == VCPU_SREG_CS)
5883 ar |= 0x08; /* code segment */
Nitin A Kamble3a624e22009-06-08 11:34:16 -07005884
5885 vmcs_write32(sf->ar_bytes, ar);
Avi Kivity6aa8b732006-12-10 02:21:36 -08005886}
5887
Sheng Yangf78e0e22007-10-29 09:40:42 +08005888static int alloc_apic_access_page(struct kvm *kvm)
5889{
Xiao Guangrong44841412012-09-07 14:14:20 +08005890 struct page *page;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005891 int r = 0;
5892
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005893 mutex_lock(&kvm->slots_lock);
Tang Chenc24ae0d2014-09-24 15:57:58 +08005894 if (kvm->arch.apic_access_page_done)
Sheng Yangf78e0e22007-10-29 09:40:42 +08005895 goto out;
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02005896 r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
5897 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005898 if (r)
5899 goto out;
Izik Eidus72dc67a2008-02-10 18:04:15 +02005900
Tang Chen73a6d942014-09-11 13:38:00 +08005901 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
Xiao Guangrong44841412012-09-07 14:14:20 +08005902 if (is_error_page(page)) {
5903 r = -EFAULT;
5904 goto out;
5905 }
5906
Tang Chenc24ae0d2014-09-24 15:57:58 +08005907 /*
5908 * Do not pin the page in memory, so that memory hot-unplug
5909 * is able to migrate it.
5910 */
5911 put_page(page);
5912 kvm->arch.apic_access_page_done = true;
Sheng Yangf78e0e22007-10-29 09:40:42 +08005913out:
Marcelo Tosatti79fac952009-12-23 14:35:26 -02005914 mutex_unlock(&kvm->slots_lock);
Sheng Yangf78e0e22007-10-29 09:40:42 +08005915 return r;
5916}
5917
Wanpeng Li991e7a02015-09-16 17:30:05 +08005918static int allocate_vpid(void)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005919{
5920 int vpid;
5921
Avi Kivity919818a2009-03-23 18:01:29 +02005922 if (!enable_vpid)
Wanpeng Li991e7a02015-09-16 17:30:05 +08005923 return 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005924 spin_lock(&vmx_vpid_lock);
5925 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005926 if (vpid < VMX_NR_VPIDS)
Sheng Yang2384d2b2008-01-17 15:14:33 +08005927 __set_bit(vpid, vmx_vpid_bitmap);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005928 else
5929 vpid = 0;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005930 spin_unlock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005931 return vpid;
Sheng Yang2384d2b2008-01-17 15:14:33 +08005932}
5933
Wanpeng Li991e7a02015-09-16 17:30:05 +08005934static void free_vpid(int vpid)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005935{
Wanpeng Li991e7a02015-09-16 17:30:05 +08005936 if (!enable_vpid || vpid == 0)
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005937 return;
5938 spin_lock(&vmx_vpid_lock);
Wanpeng Li991e7a02015-09-16 17:30:05 +08005939 __clear_bit(vpid, vmx_vpid_bitmap);
Lai Jiangshancdbecfc2010-04-17 16:41:47 +08005940 spin_unlock(&vmx_vpid_lock);
5941}
5942
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005943static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
5944 u32 msr, int type)
Sheng Yang25c5f222008-03-28 13:18:56 +08005945{
Avi Kivity3e7c73e2009-02-24 21:46:19 +02005946 int f = sizeof(unsigned long);
Sheng Yang25c5f222008-03-28 13:18:56 +08005947
5948 if (!cpu_has_vmx_msr_bitmap())
5949 return;
5950
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005951 if (static_branch_unlikely(&enable_evmcs))
5952 evmcs_touch_msr_bitmap();
5953
Sheng Yang25c5f222008-03-28 13:18:56 +08005954 /*
5955 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5956 * have the write-low and read-high bitmap offsets the wrong way round.
5957 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5958 */
Sheng Yang25c5f222008-03-28 13:18:56 +08005959 if (msr <= 0x1fff) {
Yang Zhang8d146952013-01-25 10:18:50 +08005960 if (type & MSR_TYPE_R)
5961 /* read-low */
5962 __clear_bit(msr, msr_bitmap + 0x000 / f);
5963
5964 if (type & MSR_TYPE_W)
5965 /* write-low */
5966 __clear_bit(msr, msr_bitmap + 0x800 / f);
5967
Sheng Yang25c5f222008-03-28 13:18:56 +08005968 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5969 msr &= 0x1fff;
Yang Zhang8d146952013-01-25 10:18:50 +08005970 if (type & MSR_TYPE_R)
5971 /* read-high */
5972 __clear_bit(msr, msr_bitmap + 0x400 / f);
5973
5974 if (type & MSR_TYPE_W)
5975 /* write-high */
5976 __clear_bit(msr, msr_bitmap + 0xc00 / f);
5977
5978 }
5979}
5980
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005981static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
5982 u32 msr, int type)
5983{
5984 int f = sizeof(unsigned long);
5985
5986 if (!cpu_has_vmx_msr_bitmap())
5987 return;
5988
Vitaly Kuznetsovceef7d12018-04-16 12:50:33 +02005989 if (static_branch_unlikely(&enable_evmcs))
5990 evmcs_touch_msr_bitmap();
5991
Paolo Bonzini904e14f2018-01-16 16:51:18 +01005992 /*
5993 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5994 * have the write-low and read-high bitmap offsets the wrong way round.
5995 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5996 */
5997 if (msr <= 0x1fff) {
5998 if (type & MSR_TYPE_R)
5999 /* read-low */
6000 __set_bit(msr, msr_bitmap + 0x000 / f);
6001
6002 if (type & MSR_TYPE_W)
6003 /* write-low */
6004 __set_bit(msr, msr_bitmap + 0x800 / f);
6005
6006 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
6007 msr &= 0x1fff;
6008 if (type & MSR_TYPE_R)
6009 /* read-high */
6010 __set_bit(msr, msr_bitmap + 0x400 / f);
6011
6012 if (type & MSR_TYPE_W)
6013 /* write-high */
6014 __set_bit(msr, msr_bitmap + 0xc00 / f);
6015
6016 }
6017}
6018
6019static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
6020 u32 msr, int type, bool value)
6021{
6022 if (value)
6023 vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
6024 else
6025 vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
6026}
6027
Wincy Vanf2b93282015-02-03 23:56:03 +08006028/*
6029 * If a msr is allowed by L0, we should check whether it is allowed by L1.
6030 * The corresponding bit will be cleared unless both of L0 and L1 allow it.
6031 */
6032static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
6033 unsigned long *msr_bitmap_nested,
6034 u32 msr, int type)
6035{
6036 int f = sizeof(unsigned long);
6037
Wincy Vanf2b93282015-02-03 23:56:03 +08006038 /*
6039 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
6040 * have the write-low and read-high bitmap offsets the wrong way round.
6041 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
6042 */
6043 if (msr <= 0x1fff) {
6044 if (type & MSR_TYPE_R &&
6045 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
6046 /* read-low */
6047 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
6048
6049 if (type & MSR_TYPE_W &&
6050 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
6051 /* write-low */
6052 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
6053
6054 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
6055 msr &= 0x1fff;
6056 if (type & MSR_TYPE_R &&
6057 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
6058 /* read-high */
6059 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
6060
6061 if (type & MSR_TYPE_W &&
6062 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
6063 /* write-high */
6064 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
6065
6066 }
6067}
6068
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006069static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
Avi Kivity58972972009-02-24 22:26:47 +02006070{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006071 u8 mode = 0;
6072
6073 if (cpu_has_secondary_exec_ctrls() &&
6074 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
6075 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
6076 mode |= MSR_BITMAP_MODE_X2APIC;
6077 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
6078 mode |= MSR_BITMAP_MODE_X2APIC_APICV;
6079 }
6080
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006081 return mode;
Yang Zhang8d146952013-01-25 10:18:50 +08006082}
6083
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006084#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
6085
6086static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
6087 u8 mode)
Yang Zhang8d146952013-01-25 10:18:50 +08006088{
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006089 int msr;
6090
6091 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
6092 unsigned word = msr / BITS_PER_LONG;
6093 msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
6094 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
Wanpeng Lif6e90f92016-09-22 07:43:25 +08006095 }
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006096
6097 if (mode & MSR_BITMAP_MODE_X2APIC) {
6098 /*
6099 * TPR reads and writes can be virtualized even if virtual interrupt
6100 * delivery is not in use.
6101 */
6102 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
6103 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
6104 vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
6105 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
6106 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
6107 }
6108 }
6109}
6110
6111static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
6112{
6113 struct vcpu_vmx *vmx = to_vmx(vcpu);
6114 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
6115 u8 mode = vmx_msr_bitmap_mode(vcpu);
6116 u8 changed = mode ^ vmx->msr_bitmap_mode;
6117
6118 if (!changed)
6119 return;
6120
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006121 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
6122 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
6123
6124 vmx->msr_bitmap_mode = mode;
Avi Kivity58972972009-02-24 22:26:47 +02006125}
6126
Suravee Suthikulpanitb2a05fe2017-09-12 10:42:41 -05006127static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02006128{
Andrey Smetanind62caab2015-11-10 15:36:33 +03006129 return enable_apicv;
Paolo Bonzinid50ab6c2015-07-29 11:49:59 +02006130}
6131
David Matlackc9f04402017-08-01 14:00:40 -07006132static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
6133{
6134 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6135 gfn_t gfn;
6136
6137 /*
6138 * Don't need to mark the APIC access page dirty; it is never
6139 * written to by the CPU during APIC virtualization.
6140 */
6141
6142 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
6143 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
6144 kvm_vcpu_mark_page_dirty(vcpu, gfn);
6145 }
6146
6147 if (nested_cpu_has_posted_intr(vmcs12)) {
6148 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
6149 kvm_vcpu_mark_page_dirty(vcpu, gfn);
6150 }
6151}
6152
6153
David Hildenbrand6342c502017-01-25 11:58:58 +01006154static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
Wincy Van705699a2015-02-03 23:58:17 +08006155{
6156 struct vcpu_vmx *vmx = to_vmx(vcpu);
6157 int max_irr;
6158 void *vapic_page;
6159 u16 status;
6160
David Matlackc9f04402017-08-01 14:00:40 -07006161 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
6162 return;
Wincy Van705699a2015-02-03 23:58:17 +08006163
David Matlackc9f04402017-08-01 14:00:40 -07006164 vmx->nested.pi_pending = false;
6165 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
6166 return;
Wincy Van705699a2015-02-03 23:58:17 +08006167
David Matlackc9f04402017-08-01 14:00:40 -07006168 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
6169 if (max_irr != 256) {
Wincy Van705699a2015-02-03 23:58:17 +08006170 vapic_page = kmap(vmx->nested.virtual_apic_page);
Liran Alone7387b02017-12-24 18:12:54 +02006171 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
6172 vapic_page, &max_irr);
Wincy Van705699a2015-02-03 23:58:17 +08006173 kunmap(vmx->nested.virtual_apic_page);
6174
6175 status = vmcs_read16(GUEST_INTR_STATUS);
6176 if ((u8)max_irr > ((u8)status & 0xff)) {
6177 status &= ~0xff;
6178 status |= (u8)max_irr;
6179 vmcs_write16(GUEST_INTR_STATUS, status);
6180 }
6181 }
David Matlackc9f04402017-08-01 14:00:40 -07006182
6183 nested_mark_vmcs12_pages_dirty(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08006184}
6185
Paolo Bonzini7e712682018-10-03 13:44:26 +02006186static u8 vmx_get_rvi(void)
6187{
6188 return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
6189}
6190
Liran Alone6c67d82018-09-04 10:56:52 +03006191static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
6192{
6193 struct vcpu_vmx *vmx = to_vmx(vcpu);
6194 void *vapic_page;
6195 u32 vppr;
6196 int rvi;
6197
6198 if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
6199 !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
6200 WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
6201 return false;
6202
Paolo Bonzini7e712682018-10-03 13:44:26 +02006203 rvi = vmx_get_rvi();
Liran Alone6c67d82018-09-04 10:56:52 +03006204
6205 vapic_page = kmap(vmx->nested.virtual_apic_page);
6206 vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
6207 kunmap(vmx->nested.virtual_apic_page);
6208
6209 return ((rvi & 0xf0) > (vppr & 0xf0));
6210}
6211
Wincy Van06a55242017-04-28 13:13:59 +08006212static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
6213 bool nested)
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006214{
6215#ifdef CONFIG_SMP
Wincy Van06a55242017-04-28 13:13:59 +08006216 int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
6217
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006218 if (vcpu->mode == IN_GUEST_MODE) {
Feng Wu28b835d2015-09-18 22:29:54 +08006219 /*
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006220 * The vector of interrupt to be delivered to vcpu had
6221 * been set in PIR before this function.
Feng Wu28b835d2015-09-18 22:29:54 +08006222 *
Haozhong Zhang5753743f2017-09-18 09:56:50 +08006223 * Following cases will be reached in this block, and
6224 * we always send a notification event in all cases as
6225 * explained below.
6226 *
6227 * Case 1: vcpu keeps in non-root mode. Sending a
6228 * notification event posts the interrupt to vcpu.
6229 *
6230 * Case 2: vcpu exits to root mode and is still
6231 * runnable. PIR will be synced to vIRR before the
6232 * next vcpu entry. Sending a notification event in
6233 * this case has no effect, as vcpu is not in root
6234 * mode.
6235 *
6236 * Case 3: vcpu exits to root mode and is blocked.
6237 * vcpu_block() has already synced PIR to vIRR and
6238 * never blocks vcpu if vIRR is not cleared. Therefore,
6239 * a blocked vcpu here does not wait for any requested
6240 * interrupts in PIR, and sending a notification event
6241 * which has no effect is safe here.
Feng Wu28b835d2015-09-18 22:29:54 +08006242 */
Feng Wu28b835d2015-09-18 22:29:54 +08006243
Wincy Van06a55242017-04-28 13:13:59 +08006244 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
Radim Krčmář21bc8dc2015-02-16 15:36:33 +01006245 return true;
6246 }
6247#endif
6248 return false;
6249}
6250
Wincy Van705699a2015-02-03 23:58:17 +08006251static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
6252 int vector)
6253{
6254 struct vcpu_vmx *vmx = to_vmx(vcpu);
6255
6256 if (is_guest_mode(vcpu) &&
6257 vector == vmx->nested.posted_intr_nv) {
Wincy Van705699a2015-02-03 23:58:17 +08006258 /*
6259 * If a posted intr is not recognized by hardware,
6260 * we will accomplish it in the next vmentry.
6261 */
6262 vmx->nested.pi_pending = true;
6263 kvm_make_request(KVM_REQ_EVENT, vcpu);
Liran Alon6b697712017-11-09 20:27:20 +02006264 /* the PIR and ON have been set by L1. */
6265 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
6266 kvm_vcpu_kick(vcpu);
Wincy Van705699a2015-02-03 23:58:17 +08006267 return 0;
6268 }
6269 return -1;
6270}
Avi Kivity6aa8b732006-12-10 02:21:36 -08006271/*
Yang Zhanga20ed542013-04-11 19:25:15 +08006272 * Send interrupt to vcpu via posted interrupt way.
6273 * 1. If target vcpu is running(non-root mode), send posted interrupt
6274 * notification to vcpu and hardware will sync PIR to vIRR atomically.
6275 * 2. If target vcpu isn't running(root mode), kick it to pick up the
6276 * interrupt from PIR in next vmentry.
6277 */
6278static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
6279{
6280 struct vcpu_vmx *vmx = to_vmx(vcpu);
6281 int r;
6282
Wincy Van705699a2015-02-03 23:58:17 +08006283 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
6284 if (!r)
6285 return;
6286
Yang Zhanga20ed542013-04-11 19:25:15 +08006287 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
6288 return;
6289
Paolo Bonzinib95234c2016-12-19 13:57:33 +01006290 /* If a previous notification has sent the IPI, nothing to do. */
6291 if (pi_test_and_set_on(&vmx->pi_desc))
6292 return;
6293
Wincy Van06a55242017-04-28 13:13:59 +08006294 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
Yang Zhanga20ed542013-04-11 19:25:15 +08006295 kvm_vcpu_kick(vcpu);
6296}
6297
Avi Kivity6aa8b732006-12-10 02:21:36 -08006298/*
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006299 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
6300 * will not change in the lifetime of the guest.
6301 * Note that host-state that does change is set elsewhere. E.g., host-state
6302 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
6303 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006304static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006305{
6306 u32 low32, high32;
6307 unsigned long tmpl;
6308 struct desc_ptr dt;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006309 unsigned long cr0, cr3, cr4;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006310
Andy Lutomirski04ac88a2016-10-31 15:18:45 -07006311 cr0 = read_cr0();
6312 WARN_ON(cr0 & X86_CR0_TS);
6313 vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006314
6315 /*
6316 * Save the most likely value for this task's CR3 in the VMCS.
6317 * We can't use __get_current_cr3_fast() because we're not atomic.
6318 */
Andy Lutomirski6c690ee2017-06-12 10:26:14 -07006319 cr3 = __read_cr3();
Andy Lutomirskid6e41f12017-05-28 10:00:17 -07006320 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006321 vmx->loaded_vmcs->host_state.cr3 = cr3;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006322
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006323 /* Save the most likely value for this task's CR4 in the VMCS. */
Andy Lutomirski1e02ce42014-10-24 15:58:08 -07006324 cr4 = cr4_read_shadow();
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006325 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
Sean Christophersond7ee0392018-07-23 12:32:47 -07006326 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -07006327
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006328 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006329#ifdef CONFIG_X86_64
6330 /*
6331 * Load null selectors, so we can avoid reloading them in
Sean Christopherson6d6095b2018-07-23 12:32:44 -07006332 * vmx_prepare_switch_to_host(), in case userspace uses
6333 * the null selectors too (the expected case).
Avi Kivityb2da15a2012-05-13 19:53:24 +03006334 */
6335 vmcs_write16(HOST_DS_SELECTOR, 0);
6336 vmcs_write16(HOST_ES_SELECTOR, 0);
6337#else
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006338 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6339 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
Avi Kivityb2da15a2012-05-13 19:53:24 +03006340#endif
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006341 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
6342 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
6343
Juergen Gross87930012017-09-04 12:25:27 +02006344 store_idt(&dt);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006345 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006346 vmx->host_idt_base = dt.address;
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006347
Avi Kivity83287ea422012-09-16 15:10:57 +03006348 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006349
6350 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
6351 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
6352 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
6353 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
6354
6355 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
6356 rdmsr(MSR_IA32_CR_PAT, low32, high32);
6357 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
6358 }
Sean Christopherson5a5e8a12018-09-26 09:23:56 -07006359
6360 if (cpu_has_load_ia32_efer)
6361 vmcs_write64(HOST_IA32_EFER, host_efer);
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006362}
6363
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006364static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
6365{
6366 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
6367 if (enable_ept)
6368 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03006369 if (is_guest_mode(&vmx->vcpu))
6370 vmx->vcpu.arch.cr4_guest_owned_bits &=
6371 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006372 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
6373}
6374
Yang Zhang01e439b2013-04-11 19:25:12 +08006375static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
6376{
6377 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
6378
Andrey Smetanind62caab2015-11-10 15:36:33 +03006379 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
Yang Zhang01e439b2013-04-11 19:25:12 +08006380 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006381
6382 if (!enable_vnmi)
6383 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
6384
Yunhong Jiang64672c92016-06-13 14:19:59 -07006385 /* Enable the preemption timer dynamically */
6386 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
Yang Zhang01e439b2013-04-11 19:25:12 +08006387 return pin_based_exec_ctrl;
6388}
6389
Andrey Smetanind62caab2015-11-10 15:36:33 +03006390static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
6391{
6392 struct vcpu_vmx *vmx = to_vmx(vcpu);
6393
6394 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Roman Kagan3ce424e2016-05-18 17:48:20 +03006395 if (cpu_has_secondary_exec_ctrls()) {
6396 if (kvm_vcpu_apicv_active(vcpu))
6397 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
6398 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6399 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6400 else
6401 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
6402 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6403 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
6404 }
6405
6406 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006407 vmx_update_msr_bitmap(vcpu);
Andrey Smetanind62caab2015-11-10 15:36:33 +03006408}
6409
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006410static u32 vmx_exec_control(struct vcpu_vmx *vmx)
6411{
6412 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
Paolo Bonzinid16c2932014-02-21 10:36:37 +01006413
6414 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
6415 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
6416
Paolo Bonzini35754c92015-07-29 12:05:37 +02006417 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006418 exec_control &= ~CPU_BASED_TPR_SHADOW;
6419#ifdef CONFIG_X86_64
6420 exec_control |= CPU_BASED_CR8_STORE_EXITING |
6421 CPU_BASED_CR8_LOAD_EXITING;
6422#endif
6423 }
6424 if (!enable_ept)
6425 exec_control |= CPU_BASED_CR3_STORE_EXITING |
6426 CPU_BASED_CR3_LOAD_EXITING |
6427 CPU_BASED_INVLPG_EXITING;
Wanpeng Li4d5422c2018-03-12 04:53:02 -07006428 if (kvm_mwait_in_guest(vmx->vcpu.kvm))
6429 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
6430 CPU_BASED_MONITOR_EXITING);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006431 if (kvm_hlt_in_guest(vmx->vcpu.kvm))
6432 exec_control &= ~CPU_BASED_HLT_EXITING;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006433 return exec_control;
6434}
6435
Jim Mattson45ec3682017-08-23 16:32:04 -07006436static bool vmx_rdrand_supported(void)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006437{
Jim Mattson45ec3682017-08-23 16:32:04 -07006438 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006439 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006440}
6441
Jim Mattson75f4fc82017-08-23 16:32:03 -07006442static bool vmx_rdseed_supported(void)
6443{
6444 return vmcs_config.cpu_based_2nd_exec_ctrl &
David Hildenbrand736fdf72017-08-24 20:51:37 +02006445 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006446}
6447
Paolo Bonzini80154d72017-08-24 13:55:35 +02006448static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006449{
Paolo Bonzini80154d72017-08-24 13:55:35 +02006450 struct kvm_vcpu *vcpu = &vmx->vcpu;
6451
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006452 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006453
Paolo Bonzini80154d72017-08-24 13:55:35 +02006454 if (!cpu_need_virtualize_apic_accesses(vcpu))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006455 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6456 if (vmx->vpid == 0)
6457 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
6458 if (!enable_ept) {
6459 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
6460 enable_unrestricted_guest = 0;
6461 }
6462 if (!enable_unrestricted_guest)
6463 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
Wanpeng Lib31c1142018-03-12 04:53:04 -07006464 if (kvm_pause_in_guest(vmx->vcpu.kvm))
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006465 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
Paolo Bonzini80154d72017-08-24 13:55:35 +02006466 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhangc7c9c562013-01-25 10:18:51 +08006467 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
6468 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
Yang Zhang8d146952013-01-25 10:18:50 +08006469 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02006470
6471 /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
6472 * in vmx_set_cr4. */
6473 exec_control &= ~SECONDARY_EXEC_DESC;
6474
Abel Gordonabc4fc52013-04-18 14:35:25 +03006475 /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
6476 (handle_vmptrld).
6477 We can NOT enable shadow_vmcs here because we don't have yet
6478 a current VMCS12
6479 */
6480 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
Kai Huanga3eaa862015-11-04 13:46:05 +08006481
6482 if (!enable_pml)
6483 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
Kai Huang843e4332015-01-28 10:54:28 +08006484
Paolo Bonzini3db13482017-08-24 14:48:03 +02006485 if (vmx_xsaves_supported()) {
6486 /* Exposing XSAVES only when XSAVE is exposed */
6487 bool xsaves_enabled =
6488 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
6489 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
6490
6491 if (!xsaves_enabled)
6492 exec_control &= ~SECONDARY_EXEC_XSAVES;
6493
6494 if (nested) {
6495 if (xsaves_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006496 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006497 SECONDARY_EXEC_XSAVES;
6498 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006499 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini3db13482017-08-24 14:48:03 +02006500 ~SECONDARY_EXEC_XSAVES;
6501 }
6502 }
6503
Paolo Bonzini80154d72017-08-24 13:55:35 +02006504 if (vmx_rdtscp_supported()) {
6505 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
6506 if (!rdtscp_enabled)
6507 exec_control &= ~SECONDARY_EXEC_RDTSCP;
6508
6509 if (nested) {
6510 if (rdtscp_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006511 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006512 SECONDARY_EXEC_RDTSCP;
6513 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006514 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006515 ~SECONDARY_EXEC_RDTSCP;
6516 }
6517 }
6518
6519 if (vmx_invpcid_supported()) {
6520 /* Exposing INVPCID only when PCID is exposed */
6521 bool invpcid_enabled =
6522 guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
6523 guest_cpuid_has(vcpu, X86_FEATURE_PCID);
6524
6525 if (!invpcid_enabled) {
6526 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
6527 guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
6528 }
6529
6530 if (nested) {
6531 if (invpcid_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006532 vmx->nested.msrs.secondary_ctls_high |=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006533 SECONDARY_EXEC_ENABLE_INVPCID;
6534 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006535 vmx->nested.msrs.secondary_ctls_high &=
Paolo Bonzini80154d72017-08-24 13:55:35 +02006536 ~SECONDARY_EXEC_ENABLE_INVPCID;
6537 }
6538 }
6539
Jim Mattson45ec3682017-08-23 16:32:04 -07006540 if (vmx_rdrand_supported()) {
6541 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
6542 if (rdrand_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006543 exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006544
6545 if (nested) {
6546 if (rdrand_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006547 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006548 SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006549 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006550 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006551 ~SECONDARY_EXEC_RDRAND_EXITING;
Jim Mattson45ec3682017-08-23 16:32:04 -07006552 }
6553 }
6554
Jim Mattson75f4fc82017-08-23 16:32:03 -07006555 if (vmx_rdseed_supported()) {
6556 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
6557 if (rdseed_enabled)
David Hildenbrand736fdf72017-08-24 20:51:37 +02006558 exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006559
6560 if (nested) {
6561 if (rdseed_enabled)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006562 vmx->nested.msrs.secondary_ctls_high |=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006563 SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006564 else
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01006565 vmx->nested.msrs.secondary_ctls_high &=
David Hildenbrand736fdf72017-08-24 20:51:37 +02006566 ~SECONDARY_EXEC_RDSEED_EXITING;
Jim Mattson75f4fc82017-08-23 16:32:03 -07006567 }
6568 }
6569
Paolo Bonzini80154d72017-08-24 13:55:35 +02006570 vmx->secondary_exec_control = exec_control;
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006571}
6572
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006573static void ept_set_mmio_spte_mask(void)
6574{
6575 /*
6576 * EPT Misconfigurations can be generated if the value of bits 2:0
6577 * of an EPT paging-structure entry is 110b (write/execute).
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006578 */
Peter Feinerdcdca5f2017-06-30 17:26:30 -07006579 kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
6580 VMX_EPT_MISCONFIG_WX_VALUE);
Xiao Guangrongce88dec2011-07-12 03:33:44 +08006581}
6582
Wanpeng Lif53cd632014-12-02 19:14:58 +08006583#define VMX_XSS_EXIT_BITMAP 0
Nadav Har'Ela3a8ff82011-05-25 23:09:01 +03006584/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08006585 * Sets up the vmcs for emulated real mode.
6586 */
David Hildenbrand12d79912017-08-24 20:51:26 +02006587static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006588{
Avi Kivity6aa8b732006-12-10 02:21:36 -08006589 int i;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006590
Abel Gordon4607c2d2013-04-18 14:35:55 +03006591 if (enable_shadow_vmcs) {
Jim Mattsonf4160e42018-05-29 09:11:33 -07006592 /*
6593 * At vCPU creation, "VMWRITE to any supported field
6594 * in the VMCS" is supported, so use the more
6595 * permissive vmx_vmread_bitmap to specify both read
6596 * and write permissions for the shadow VMCS.
6597 */
Abel Gordon4607c2d2013-04-18 14:35:55 +03006598 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
Jim Mattsonf4160e42018-05-29 09:11:33 -07006599 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
Abel Gordon4607c2d2013-04-18 14:35:55 +03006600 }
Sheng Yang25c5f222008-03-28 13:18:56 +08006601 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +01006602 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
Sheng Yang25c5f222008-03-28 13:18:56 +08006603
Avi Kivity6aa8b732006-12-10 02:21:36 -08006604 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
6605
Avi Kivity6aa8b732006-12-10 02:21:36 -08006606 /* Control */
Yang Zhang01e439b2013-04-11 19:25:12 +08006607 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
Yunhong Jiang64672c92016-06-13 14:19:59 -07006608 vmx->hv_deadline_tsc = -1;
Yang, Sheng6e5d8652007-09-12 18:03:11 +08006609
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006610 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006611
Dan Williamsdfa169b2016-06-02 11:17:24 -07006612 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +02006613 vmx_compute_secondary_exec_control(vmx);
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006614 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
Paolo Bonzini80154d72017-08-24 13:55:35 +02006615 vmx->secondary_exec_control);
Dan Williamsdfa169b2016-06-02 11:17:24 -07006616 }
Sheng Yangf78e0e22007-10-29 09:40:42 +08006617
Andrey Smetanind62caab2015-11-10 15:36:33 +03006618 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
Yang Zhangc7c9c562013-01-25 10:18:51 +08006619 vmcs_write64(EOI_EXIT_BITMAP0, 0);
6620 vmcs_write64(EOI_EXIT_BITMAP1, 0);
6621 vmcs_write64(EOI_EXIT_BITMAP2, 0);
6622 vmcs_write64(EOI_EXIT_BITMAP3, 0);
6623
6624 vmcs_write16(GUEST_INTR_STATUS, 0);
Yang Zhang01e439b2013-04-11 19:25:12 +08006625
Li RongQing0bcf2612015-12-03 13:29:34 +08006626 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
Yang Zhang01e439b2013-04-11 19:25:12 +08006627 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
Yang Zhangc7c9c562013-01-25 10:18:51 +08006628 }
6629
Wanpeng Lib31c1142018-03-12 04:53:04 -07006630 if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006631 vmcs_write32(PLE_GAP, ple_gap);
Radim Krčmářa7653ec2014-08-21 18:08:07 +02006632 vmx->ple_window = ple_window;
6633 vmx->ple_window_dirty = true;
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08006634 }
6635
Xiao Guangrongc3707952011-07-12 03:28:04 +08006636 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
6637 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006638 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
6639
Avi Kivity9581d442010-10-19 16:46:55 +02006640 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
6641 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
Yang Zhanga547c6d2013-04-11 19:25:10 +08006642 vmx_set_constant_host_state(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006643 vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
6644 vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
Avi Kivity6aa8b732006-12-10 02:21:36 -08006645
Bandan Das2a499e42017-08-03 15:54:41 -04006646 if (cpu_has_vmx_vmfunc())
6647 vmcs_write64(VM_FUNCTION_CONTROL, 0);
6648
Eddie Dong2cc51562007-05-21 07:28:09 +03006649 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
6650 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04006651 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
Eddie Dong2cc51562007-05-21 07:28:09 +03006652 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -04006653 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
Avi Kivity6aa8b732006-12-10 02:21:36 -08006654
Radim Krčmář74545702015-04-27 15:11:25 +02006655 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
6656 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Sheng Yang468d4722008-10-09 16:01:55 +08006657
Paolo Bonzini03916db2014-07-24 14:21:57 +02006658 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08006659 u32 index = vmx_msr_index[i];
6660 u32 data_low, data_high;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006661 int j = vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006662
6663 if (rdmsr_safe(index, &data_low, &data_high) < 0)
6664 continue;
Avi Kivity432bd6c2007-01-31 23:48:13 -08006665 if (wrmsr_safe(index, data_low, data_high) < 0)
6666 continue;
Avi Kivity26bb0982009-09-07 11:14:12 +03006667 vmx->guest_msrs[j].index = i;
6668 vmx->guest_msrs[j].data = 0;
Avi Kivityd5696722009-12-02 12:28:47 +02006669 vmx->guest_msrs[j].mask = -1ull;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -04006670 ++vmx->nmsrs;
Avi Kivity6aa8b732006-12-10 02:21:36 -08006671 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08006672
Paolo Bonzini5b76a3c2018-08-05 16:07:47 +02006673 vmx->arch_capabilities = kvm_get_arch_capabilities();
Gleb Natapov2961e8762013-11-25 15:37:13 +02006674
6675 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006676
6677 /* 22.2.1, 20.8.1 */
Gleb Natapov2961e8762013-11-25 15:37:13 +02006678 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
Yang, Sheng1c3d14fe2007-07-29 11:07:42 +03006679
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006680 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
6681 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
6682
Nadav Har'Elbf8179a2011-05-25 23:09:31 +03006683 set_cr4_guest_host_mask(vmx);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006684
Wanpeng Lif53cd632014-12-02 19:14:58 +08006685 if (vmx_xsaves_supported())
6686 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
6687
Peter Feiner4e595162016-07-07 14:49:58 -07006688 if (enable_pml) {
Peter Feiner4e595162016-07-07 14:49:58 -07006689 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
6690 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6691 }
Sean Christopherson0b665d32018-08-14 09:33:34 -07006692
6693 if (cpu_has_vmx_encls_vmexit())
6694 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006695}
6696
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006697static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006698{
6699 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka58cb6282014-01-24 16:48:44 +01006700 struct msr_data apic_base_msr;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006701 u64 cr0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006702
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006703 vmx->rmode.vm86_active = 0;
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +01006704 vmx->spec_ctrl = 0;
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006705
Wanpeng Li518e7b92018-02-28 14:03:31 +08006706 vcpu->arch.microcode_version = 0x100000000ULL;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08006707 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006708 kvm_set_cr8(vcpu, 0);
6709
6710 if (!init_event) {
6711 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
6712 MSR_IA32_APICBASE_ENABLE;
6713 if (kvm_vcpu_is_reset_bsp(vcpu))
6714 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
6715 apic_base_msr.host_initiated = true;
6716 kvm_set_apic_base(vcpu, &apic_base_msr);
6717 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006718
Avi Kivity2fb92db2011-04-27 19:42:18 +03006719 vmx_segment_cache_clear(vmx);
6720
Avi Kivity5706be02008-08-20 15:07:31 +03006721 seg_setup(VCPU_SREG_CS);
Jan Kiszka66450a22013-03-13 12:42:34 +01006722 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006723 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006724
6725 seg_setup(VCPU_SREG_DS);
6726 seg_setup(VCPU_SREG_ES);
6727 seg_setup(VCPU_SREG_FS);
6728 seg_setup(VCPU_SREG_GS);
6729 seg_setup(VCPU_SREG_SS);
6730
6731 vmcs_write16(GUEST_TR_SELECTOR, 0);
6732 vmcs_writel(GUEST_TR_BASE, 0);
6733 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
6734 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
6735
6736 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
6737 vmcs_writel(GUEST_LDTR_BASE, 0);
6738 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
6739 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
6740
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006741 if (!init_event) {
6742 vmcs_write32(GUEST_SYSENTER_CS, 0);
6743 vmcs_writel(GUEST_SYSENTER_ESP, 0);
6744 vmcs_writel(GUEST_SYSENTER_EIP, 0);
6745 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
6746 }
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006747
Wanpeng Lic37c2872017-11-20 14:52:21 -08006748 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
Jan Kiszka66450a22013-03-13 12:42:34 +01006749 kvm_rip_write(vcpu, 0xfff0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006750
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006751 vmcs_writel(GUEST_GDTR_BASE, 0);
6752 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
6753
6754 vmcs_writel(GUEST_IDTR_BASE, 0);
6755 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
6756
Anthony Liguori443381a2010-12-06 10:53:38 -06006757 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006758 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
Paolo Bonzinif3531052015-12-03 15:49:56 +01006759 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
Wanpeng Lia554d202017-10-11 05:10:19 -07006760 if (kvm_mpx_supported())
6761 vmcs_write64(GUEST_BNDCFGS, 0);
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006762
Avi Kivitye00c8cf2007-10-21 11:00:39 +02006763 setup_msrs(vmx);
6764
Avi Kivity6aa8b732006-12-10 02:21:36 -08006765 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
6766
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006767 if (cpu_has_vmx_tpr_shadow() && !init_event) {
Sheng Yangf78e0e22007-10-29 09:40:42 +08006768 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
Paolo Bonzini35754c92015-07-29 12:05:37 +02006769 if (cpu_need_tpr_shadow(vcpu))
Sheng Yangf78e0e22007-10-29 09:40:42 +08006770 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006771 __pa(vcpu->arch.apic->regs));
Sheng Yangf78e0e22007-10-29 09:40:42 +08006772 vmcs_write32(TPR_THRESHOLD, 0);
6773 }
6774
Paolo Bonzinia73896c2014-11-02 07:54:30 +01006775 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006776
Sheng Yang2384d2b2008-01-17 15:14:33 +08006777 if (vmx->vpid != 0)
6778 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
6779
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006780 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006781 vmx->vcpu.arch.cr0 = cr0;
Bruce Rogersf2463242016-04-28 14:49:21 -06006782 vmx_set_cr0(vcpu, cr0); /* enter rmode */
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006783 vmx_set_cr4(vcpu, 0);
Paolo Bonzini56908912015-10-19 11:30:19 +02006784 vmx_set_efer(vcpu, 0);
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08006785
Nadav Amitd28bc9d2015-04-13 14:34:08 +03006786 update_exception_bitmap(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006787
Wanpeng Lidd5f5342015-09-23 18:26:57 +08006788 vpid_sync_context(vmx->vpid);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006789 if (init_event)
6790 vmx_clear_hlt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08006791}
6792
Nadav Har'Elb6f12502011-05-25 23:13:06 +03006793/*
6794 * In nested virtualization, check if L1 asked to exit on external interrupts.
6795 * For most existing hypervisors, this will always return true.
6796 */
6797static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
6798{
6799 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
6800 PIN_BASED_EXT_INTR_MASK;
6801}
6802
Bandan Das77b0f5d2014-04-19 18:17:45 -04006803/*
6804 * In nested virtualization, check if L1 has set
6805 * VM_EXIT_ACK_INTR_ON_EXIT
6806 */
6807static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
6808{
6809 return get_vmcs12(vcpu)->vm_exit_controls &
6810 VM_EXIT_ACK_INTR_ON_EXIT;
6811}
6812
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006813static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
6814{
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -05006815 return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006816}
6817
Jan Kiszkac9a79532014-03-07 20:03:15 +01006818static void enable_irq_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006819{
Paolo Bonzini47c01522016-12-19 11:44:07 +01006820 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6821 CPU_BASED_VIRTUAL_INTR_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006822}
6823
Jan Kiszkac9a79532014-03-07 20:03:15 +01006824static void enable_nmi_window(struct kvm_vcpu *vcpu)
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006825{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006826 if (!enable_vnmi ||
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006827 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
Jan Kiszkac9a79532014-03-07 20:03:15 +01006828 enable_irq_window(vcpu);
6829 return;
6830 }
Jan Kiszka03b28f82013-04-29 16:46:42 +02006831
Paolo Bonzini47c01522016-12-19 11:44:07 +01006832 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
6833 CPU_BASED_VIRTUAL_NMI_PENDING);
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006834}
6835
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006836static void vmx_inject_irq(struct kvm_vcpu *vcpu)
Eddie Dong85f455f2007-07-06 12:20:49 +03006837{
Avi Kivity9c8cba32007-11-22 11:42:59 +02006838 struct vcpu_vmx *vmx = to_vmx(vcpu);
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006839 uint32_t intr;
6840 int irq = vcpu->arch.interrupt.nr;
Avi Kivity9c8cba32007-11-22 11:42:59 +02006841
Marcelo Tosatti229456f2009-06-17 09:22:14 -03006842 trace_kvm_inj_virq(irq);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04006843
Avi Kivityfa89a812008-09-01 15:57:51 +03006844 ++vcpu->stat.irq_injections;
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006845 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006846 int inc_eip = 0;
6847 if (vcpu->arch.interrupt.soft)
6848 inc_eip = vcpu->arch.event_exit_inst_len;
6849 if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006850 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006851 return;
6852 }
Gleb Natapov66fd3f72009-05-11 13:35:50 +03006853 intr = irq | INTR_INFO_VALID_MASK;
6854 if (vcpu->arch.interrupt.soft) {
6855 intr |= INTR_TYPE_SOFT_INTR;
6856 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6857 vmx->vcpu.arch.event_exit_inst_len);
6858 } else
6859 intr |= INTR_TYPE_EXT_INTR;
6860 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006861
6862 vmx_clear_hlt(vcpu);
Eddie Dong85f455f2007-07-06 12:20:49 +03006863}
6864
Sheng Yangf08864b2008-05-15 18:23:25 +08006865static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
6866{
Jan Kiszka66a5a342008-09-26 09:30:51 +02006867 struct vcpu_vmx *vmx = to_vmx(vcpu);
6868
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006869 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006870 /*
6871 * Tracking the NMI-blocked state in software is built upon
6872 * finding the next open IRQ window. This, in turn, depends on
6873 * well-behaving guests: They have to keep IRQs disabled at
6874 * least as long as the NMI handler runs. Otherwise we may
6875 * cause NMI nesting, maybe breaking the guest. But as this is
6876 * highly unlikely, we can live with the residual risk.
6877 */
6878 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
6879 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6880 }
6881
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006882 ++vcpu->stat.nmi_injections;
6883 vmx->loaded_vmcs->nmi_known_unmasked = false;
Jan Kiszka3b86cd92008-09-26 09:30:57 +02006884
Avi Kivity7ffd92c2009-06-09 14:10:45 +03006885 if (vmx->rmode.vm86_active) {
Serge E. Hallyn71f98332011-04-13 09:12:54 -05006886 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
Mohammed Gamala92601b2010-09-19 14:34:07 +02006887 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Jan Kiszka66a5a342008-09-26 09:30:51 +02006888 return;
6889 }
Wanpeng Lic5a6d5f2016-09-22 17:55:54 +08006890
Sheng Yangf08864b2008-05-15 18:23:25 +08006891 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6892 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
Wanpeng Licaa057a2018-03-12 04:53:03 -07006893
6894 vmx_clear_hlt(vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08006895}
6896
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006897static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
6898{
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006899 struct vcpu_vmx *vmx = to_vmx(vcpu);
6900 bool masked;
6901
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006902 if (!enable_vnmi)
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006903 return vmx->loaded_vmcs->soft_vnmi_blocked;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006904 if (vmx->loaded_vmcs->nmi_known_unmasked)
Avi Kivity9d58b932011-03-07 16:52:07 +02006905 return false;
Paolo Bonzini4c4a6f72017-07-14 13:36:11 +02006906 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
6907 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6908 return masked;
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006909}
6910
6911static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
6912{
6913 struct vcpu_vmx *vmx = to_vmx(vcpu);
6914
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006915 if (!enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006916 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
6917 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
6918 vmx->loaded_vmcs->vnmi_blocked_time = 0;
6919 }
6920 } else {
6921 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
6922 if (masked)
6923 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6924 GUEST_INTR_STATE_NMI);
6925 else
6926 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
6927 GUEST_INTR_STATE_NMI);
6928 }
Jan Kiszka3cfc3092009-11-12 01:04:25 +01006929}
6930
Jan Kiszka2505dc92013-04-14 12:12:47 +02006931static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
6932{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006933 if (to_vmx(vcpu)->nested.nested_run_pending)
6934 return 0;
Jan Kiszkaea8ceb82013-04-14 21:04:26 +02006935
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01006936 if (!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +01006937 to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
6938 return 0;
6939
Jan Kiszka2505dc92013-04-14 12:12:47 +02006940 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6941 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
6942 | GUEST_INTR_STATE_NMI));
6943}
6944
Gleb Natapov78646122009-03-23 12:12:11 +02006945static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
6946{
Jan Kiszkab6b8a142014-03-07 20:03:12 +01006947 return (!to_vmx(vcpu)->nested.nested_run_pending &&
6948 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
Gleb Natapovc4282df2009-04-21 17:45:07 +03006949 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
6950 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
Gleb Natapov78646122009-03-23 12:12:11 +02006951}
6952
Izik Eiduscbc94022007-10-25 00:29:55 +02006953static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
6954{
6955 int ret;
Izik Eiduscbc94022007-10-25 00:29:55 +02006956
Sean Christophersonf7eaeb02018-03-05 12:04:36 -08006957 if (enable_unrestricted_guest)
6958 return 0;
6959
Paolo Bonzini1d8007b2015-10-12 13:38:32 +02006960 ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
6961 PAGE_SIZE * 3);
Izik Eiduscbc94022007-10-25 00:29:55 +02006962 if (ret)
6963 return ret;
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006964 to_kvm_vmx(kvm)->tss_addr = addr;
Paolo Bonzini1f755a82014-09-16 13:37:40 +02006965 return init_rmode_tss(kvm);
Izik Eiduscbc94022007-10-25 00:29:55 +02006966}
6967
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006968static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
6969{
Sean Christopherson40bbb9d2018-03-20 12:17:20 -07006970 to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
Sean Christopherson2ac52ab2018-03-20 12:17:19 -07006971 return 0;
6972}
6973
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006974static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
Avi Kivity6aa8b732006-12-10 02:21:36 -08006975{
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006976 switch (vec) {
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006977 case BP_VECTOR:
Jan Kiszkac573cd222010-02-23 17:47:53 +01006978 /*
6979 * Update instruction length as we may reinject the exception
6980 * from user space while in guest debugging mode.
6981 */
6982 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
6983 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006984 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02006985 return false;
6986 /* fall through */
6987 case DB_VECTOR:
6988 if (vcpu->guest_debug &
6989 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
6990 return false;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01006991 /* fall through */
6992 case DE_VECTOR:
Jan Kiszka77ab6db2008-07-14 12:28:51 +02006993 case OF_VECTOR:
6994 case BR_VECTOR:
6995 case UD_VECTOR:
6996 case DF_VECTOR:
6997 case SS_VECTOR:
6998 case GP_VECTOR:
6999 case MF_VECTOR:
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007000 return true;
7001 break;
Jan Kiszka77ab6db2008-07-14 12:28:51 +02007002 }
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007003 return false;
7004}
7005
7006static int handle_rmode_exception(struct kvm_vcpu *vcpu,
7007 int vec, u32 err_code)
7008{
7009 /*
7010 * Instruction with address size override prefix opcode 0x67
7011 * Cause the #SS fault with 0 error code in VM86 mode.
7012 */
7013 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007014 if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007015 if (vcpu->arch.halt_request) {
7016 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06007017 return kvm_vcpu_halt(vcpu);
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007018 }
7019 return 1;
7020 }
7021 return 0;
7022 }
7023
7024 /*
7025 * Forward all other exceptions that are valid in real mode.
7026 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
7027 * the required debugging infrastructure rework.
7028 */
7029 kvm_queue_exception(vcpu, vec);
7030 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007031}
7032
Andi Kleena0861c02009-06-08 17:37:09 +08007033/*
7034 * Trigger machine check on the host. We assume all the MSRs are already set up
7035 * by the CPU and that we still run on the same CPU as the MCE occurred on.
7036 * We pass a fake environment to the machine check handler because we want
7037 * the guest to be always treated like user space, no matter what context
7038 * it used internally.
7039 */
7040static void kvm_machine_check(void)
7041{
7042#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
7043 struct pt_regs regs = {
7044 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
7045 .flags = X86_EFLAGS_IF,
7046 };
7047
7048 do_machine_check(&regs, 0);
7049#endif
7050}
7051
Avi Kivity851ba692009-08-24 11:10:17 +03007052static int handle_machine_check(struct kvm_vcpu *vcpu)
Andi Kleena0861c02009-06-08 17:37:09 +08007053{
7054 /* already handled by vcpu_run */
7055 return 1;
7056}
7057
Avi Kivity851ba692009-08-24 11:10:17 +03007058static int handle_exception(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007059{
Avi Kivity1155f762007-11-22 11:30:47 +02007060 struct vcpu_vmx *vmx = to_vmx(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03007061 struct kvm_run *kvm_run = vcpu->run;
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007062 u32 intr_info, ex_no, error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007063 unsigned long cr2, rip, dr6;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007064 u32 vect_info;
7065 enum emulation_result er;
7066
Avi Kivity1155f762007-11-22 11:30:47 +02007067 vect_info = vmx->idt_vectoring_info;
Avi Kivity88786472011-03-07 17:39:45 +02007068 intr_info = vmx->exit_intr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007069
Andi Kleena0861c02009-06-08 17:37:09 +08007070 if (is_machine_check(intr_info))
Avi Kivity851ba692009-08-24 11:10:17 +03007071 return handle_machine_check(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +08007072
Jim Mattsonef85b672016-12-12 11:01:37 -08007073 if (is_nmi(intr_info))
Avi Kivity1b6269d2007-10-09 12:12:19 +02007074 return 1; /* already handled by vmx_vcpu_run() */
Anthony Liguori2ab455c2007-04-27 09:29:49 +03007075
Wanpeng Li082d06e2018-04-03 16:28:48 -07007076 if (is_invalid_opcode(intr_info))
7077 return handle_ud(vcpu);
Anthony Liguori7aa81cc2007-09-17 14:57:50 -05007078
Avi Kivity6aa8b732006-12-10 02:21:36 -08007079 error_code = 0;
Ryan Harper2e113842008-02-11 10:26:38 -06007080 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007081 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007082
Liran Alon9e869482018-03-12 13:12:51 +02007083 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
7084 WARN_ON_ONCE(!enable_vmware_backdoor);
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007085 er = kvm_emulate_instruction(vcpu,
Liran Alon9e869482018-03-12 13:12:51 +02007086 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
7087 if (er == EMULATE_USER_EXIT)
7088 return 0;
7089 else if (er != EMULATE_DONE)
7090 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
7091 return 1;
7092 }
7093
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007094 /*
7095 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
7096 * MMIO, it is better to report an internal error.
7097 * See the comments in vmx_handle_exit.
7098 */
7099 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
7100 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
7101 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7102 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
Radim Krčmář80f0e952015-04-02 21:11:05 +02007103 vcpu->run->internal.ndata = 3;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007104 vcpu->run->internal.data[0] = vect_info;
7105 vcpu->run->internal.data[1] = intr_info;
Radim Krčmář80f0e952015-04-02 21:11:05 +02007106 vcpu->run->internal.data[2] = error_code;
Xiao Guangrongbf4ca232012-10-17 13:48:06 +08007107 return 0;
7108 }
7109
Avi Kivity6aa8b732006-12-10 02:21:36 -08007110 if (is_page_fault(intr_info)) {
7111 cr2 = vmcs_readl(EXIT_QUALIFICATION);
Wanpeng Li1261bfa2017-07-13 18:30:40 -07007112 /* EPT won't cause page fault directly */
7113 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
Paolo Bonzinid0006532017-08-11 18:36:43 +02007114 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007115 }
7116
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007117 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
Gleb Natapov0ca1b4f2012-12-20 16:57:47 +02007118
7119 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
7120 return handle_rmode_exception(vcpu, ex_no, error_code);
7121
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007122 switch (ex_no) {
Eric Northup54a20552015-11-03 18:03:53 +01007123 case AC_VECTOR:
7124 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
7125 return 1;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007126 case DB_VECTOR:
7127 dr6 = vmcs_readl(EXIT_QUALIFICATION);
7128 if (!(vcpu->guest_debug &
7129 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
Jan Kiszka8246bf52014-01-04 18:47:17 +01007130 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03007131 vcpu->arch.dr6 |= dr6 | DR6_RTM;
Linus Torvalds32d43cd2018-03-20 12:16:59 -07007132 if (is_icebp(intr_info))
Huw Daviesfd2a4452014-04-16 10:02:51 +01007133 skip_emulated_instruction(vcpu);
7134
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007135 kvm_queue_exception(vcpu, DB_VECTOR);
7136 return 1;
7137 }
7138 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
7139 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
7140 /* fall through */
7141 case BP_VECTOR:
Jan Kiszkac573cd222010-02-23 17:47:53 +01007142 /*
7143 * Update instruction length as we may reinject #BP from
7144 * user space while in guest debugging mode. Reading it for
7145 * #DB as well causes no harm, it is not used in that case.
7146 */
7147 vmx->vcpu.arch.event_exit_inst_len =
7148 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007149 kvm_run->exit_reason = KVM_EXIT_DEBUG;
Avi Kivity0a434bb2011-04-28 15:59:33 +03007150 rip = kvm_rip_read(vcpu);
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007151 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
7152 kvm_run->debug.arch.exception = ex_no;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007153 break;
7154 default:
Jan Kiszkad0bfb942008-12-15 13:52:10 +01007155 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
7156 kvm_run->ex.exception = ex_no;
7157 kvm_run->ex.error_code = error_code;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007158 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007159 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08007160 return 0;
7161}
7162
Avi Kivity851ba692009-08-24 11:10:17 +03007163static int handle_external_interrupt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007164{
Avi Kivity1165f5f2007-04-19 17:27:43 +03007165 ++vcpu->stat.irq_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007166 return 1;
7167}
7168
Avi Kivity851ba692009-08-24 11:10:17 +03007169static int handle_triple_fault(struct kvm_vcpu *vcpu)
Avi Kivity988ad742007-02-12 00:54:36 -08007170{
Avi Kivity851ba692009-08-24 11:10:17 +03007171 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
Wanpeng Libbeac282017-08-09 22:33:12 -07007172 vcpu->mmio_needed = 0;
Avi Kivity988ad742007-02-12 00:54:36 -08007173 return 0;
7174}
Avi Kivity6aa8b732006-12-10 02:21:36 -08007175
Avi Kivity851ba692009-08-24 11:10:17 +03007176static int handle_io(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007177{
He, Qingbfdaab02007-09-12 14:18:28 +08007178 unsigned long exit_qualification;
Sean Christophersondca7f122018-03-08 08:57:27 -08007179 int size, in, string;
Avi Kivity039576c2007-03-20 12:46:50 +02007180 unsigned port;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007181
He, Qingbfdaab02007-09-12 14:18:28 +08007182 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity039576c2007-03-20 12:46:50 +02007183 string = (exit_qualification & 16) != 0;
Laurent Viviere70669a2007-08-05 10:36:40 +03007184
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007185 ++vcpu->stat.io_exits;
7186
Sean Christopherson432baf62018-03-08 08:57:26 -08007187 if (string)
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007188 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007189
7190 port = exit_qualification >> 16;
7191 size = (exit_qualification & 7) + 1;
Sean Christopherson432baf62018-03-08 08:57:26 -08007192 in = (exit_qualification & 8) != 0;
Gleb Natapovcf8f70b2010-03-18 15:20:23 +02007193
Sean Christophersondca7f122018-03-08 08:57:27 -08007194 return kvm_fast_pio(vcpu, size, port, in);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007195}
7196
Ingo Molnar102d8322007-02-19 14:37:47 +02007197static void
7198vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
7199{
7200 /*
7201 * Patch in the VMCALL instruction:
7202 */
7203 hypercall[0] = 0x0f;
7204 hypercall[1] = 0x01;
7205 hypercall[2] = 0xc1;
Ingo Molnar102d8322007-02-19 14:37:47 +02007206}
7207
Guo Chao0fa06072012-06-28 15:16:19 +08007208/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007209static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
7210{
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007211 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007212 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7213 unsigned long orig_val = val;
7214
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007215 /*
7216 * We get here when L2 changed cr0 in a way that did not change
7217 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007218 * but did change L0 shadowed bits. So we first calculate the
7219 * effective cr0 value that L1 would like to write into the
7220 * hardware. It consists of the L2-owned bits from the new
7221 * value combined with the L1-owned bits from L1's guest_cr0.
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007222 */
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007223 val = (val & ~vmcs12->cr0_guest_host_mask) |
7224 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
7225
David Matlack38991522016-11-29 18:14:08 -08007226 if (!nested_guest_cr0_valid(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007227 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007228
7229 if (kvm_set_cr0(vcpu, val))
7230 return 1;
7231 vmcs_writel(CR0_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007232 return 0;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007233 } else {
7234 if (to_vmx(vcpu)->nested.vmxon &&
David Matlack38991522016-11-29 18:14:08 -08007235 !nested_host_cr0_valid(vcpu, val))
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007236 return 1;
David Matlack38991522016-11-29 18:14:08 -08007237
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007238 return kvm_set_cr0(vcpu, val);
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007239 }
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007240}
7241
7242static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
7243{
7244 if (is_guest_mode(vcpu)) {
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007245 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7246 unsigned long orig_val = val;
7247
7248 /* analogously to handle_set_cr0 */
7249 val = (val & ~vmcs12->cr4_guest_host_mask) |
7250 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
7251 if (kvm_set_cr4(vcpu, val))
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007252 return 1;
Jan Kiszka1a0d74e2013-03-07 14:08:07 +01007253 vmcs_writel(CR4_READ_SHADOW, orig_val);
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007254 return 0;
7255 } else
7256 return kvm_set_cr4(vcpu, val);
7257}
7258
Paolo Bonzini0367f202016-07-12 10:44:55 +02007259static int handle_desc(struct kvm_vcpu *vcpu)
7260{
7261 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007262 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Paolo Bonzini0367f202016-07-12 10:44:55 +02007263}
7264
Avi Kivity851ba692009-08-24 11:10:17 +03007265static int handle_cr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007266{
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007267 unsigned long exit_qualification, val;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007268 int cr;
7269 int reg;
Avi Kivity49a9b072010-06-10 17:02:14 +03007270 int err;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007271 int ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007272
He, Qingbfdaab02007-09-12 14:18:28 +08007273 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007274 cr = exit_qualification & 15;
7275 reg = (exit_qualification >> 8) & 15;
7276 switch ((exit_qualification >> 4) & 3) {
7277 case 0: /* mov to cr */
Nadav Amit1e32c072014-06-18 17:19:25 +03007278 val = kvm_register_readl(vcpu, reg);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007279 trace_kvm_cr_write(cr, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007280 switch (cr) {
7281 case 0:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007282 err = handle_set_cr0(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007283 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007284 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007285 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity23902182010-06-10 17:02:16 +03007286 err = kvm_set_cr3(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007287 return kvm_complete_insn_gp(vcpu, err);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007288 case 4:
Nadav Har'Eleeadf9e2011-05-25 23:14:38 +03007289 err = handle_set_cr4(vcpu, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007290 return kvm_complete_insn_gp(vcpu, err);
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007291 case 8: {
7292 u8 cr8_prev = kvm_get_cr8(vcpu);
Nadav Amit1e32c072014-06-18 17:19:25 +03007293 u8 cr8 = (u8)val;
Andre Przywaraeea1cff2010-12-21 11:12:00 +01007294 err = kvm_set_cr8(vcpu, cr8);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007295 ret = kvm_complete_insn_gp(vcpu, err);
Paolo Bonzini35754c92015-07-29 12:05:37 +02007296 if (lapic_in_kernel(vcpu))
Kyle Huey6affcbe2016-11-29 12:40:40 -08007297 return ret;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007298 if (cr8_prev <= cr8)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007299 return ret;
7300 /*
7301 * TODO: we might be squashing a
7302 * KVM_GUESTDBG_SINGLESTEP-triggered
7303 * KVM_EXIT_DEBUG here.
7304 */
Avi Kivity851ba692009-08-24 11:10:17 +03007305 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
Gleb Natapov0a5fff192009-04-21 17:45:06 +03007306 return 0;
7307 }
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +02007308 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08007309 break;
Anthony Liguori25c4c272007-04-27 09:29:21 +03007310 case 2: /* clts */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -08007311 WARN_ONCE(1, "Guest should always own CR0.TS");
7312 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
Avi Kivity4d4ec082009-12-29 18:07:30 +02007313 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
Kyle Huey6affcbe2016-11-29 12:40:40 -08007314 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007315 case 1: /*mov from cr*/
7316 switch (cr) {
7317 case 3:
Sean Christophersone1de91c2018-03-05 12:04:41 -08007318 WARN_ON_ONCE(enable_unrestricted_guest);
Avi Kivity9f8fe502010-12-05 17:30:00 +02007319 val = kvm_read_cr3(vcpu);
7320 kvm_register_write(vcpu, reg, val);
7321 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007322 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007323 case 8:
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007324 val = kvm_get_cr8(vcpu);
7325 kvm_register_write(vcpu, reg, val);
7326 trace_kvm_cr_read(cr, val);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007327 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007328 }
7329 break;
7330 case 3: /* lmsw */
Avi Kivitya1f83a72009-12-29 17:33:58 +02007331 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Avi Kivity4d4ec082009-12-29 18:07:30 +02007332 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
Avi Kivitya1f83a72009-12-29 17:33:58 +02007333 kvm_lmsw(vcpu, val);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007334
Kyle Huey6affcbe2016-11-29 12:40:40 -08007335 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007336 default:
7337 break;
7338 }
Avi Kivity851ba692009-08-24 11:10:17 +03007339 vcpu->run->exit_reason = 0;
Christoffer Dalla737f252012-06-03 21:17:48 +03007340 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
Avi Kivity6aa8b732006-12-10 02:21:36 -08007341 (int)(exit_qualification >> 4) & 3, cr);
7342 return 0;
7343}
7344
Avi Kivity851ba692009-08-24 11:10:17 +03007345static int handle_dr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007346{
He, Qingbfdaab02007-09-12 14:18:28 +08007347 unsigned long exit_qualification;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007348 int dr, dr7, reg;
7349
7350 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7351 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
7352
7353 /* First, if DR does not exist, trigger UD */
7354 if (!kvm_require_dr(vcpu, dr))
7355 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007356
Jan Kiszkaf2483412010-01-20 18:20:20 +01007357 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
Avi Kivity0a79b002009-09-01 12:03:25 +03007358 if (!kvm_require_cpl(vcpu, 0))
7359 return 1;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007360 dr7 = vmcs_readl(GUEST_DR7);
7361 if (dr7 & DR7_GD) {
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007362 /*
7363 * As the vm-exit takes precedence over the debug trap, we
7364 * need to emulate the latter, either for the host or the
7365 * guest debugging itself.
7366 */
7367 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
Avi Kivity851ba692009-08-24 11:10:17 +03007368 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
Nadav Amit16f8a6f2014-10-03 01:10:05 +03007369 vcpu->run->debug.arch.dr7 = dr7;
Nadav Amit82b32772014-11-02 11:54:45 +02007370 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
Avi Kivity851ba692009-08-24 11:10:17 +03007371 vcpu->run->debug.arch.exception = DB_VECTOR;
7372 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007373 return 0;
7374 } else {
Nadav Amit7305eb52014-11-02 11:54:44 +02007375 vcpu->arch.dr6 &= ~15;
Nadav Amit6f43ed02014-07-15 17:37:46 +03007376 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007377 kvm_queue_exception(vcpu, DB_VECTOR);
7378 return 1;
7379 }
7380 }
7381
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007382 if (vcpu->guest_debug == 0) {
Paolo Bonzini8f223722016-02-26 12:09:49 +01007383 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7384 CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007385
7386 /*
7387 * No more DR vmexits; force a reload of the debug registers
7388 * and reenter on this instruction. The next vmexit will
7389 * retrieve the full state of the debug registers.
7390 */
7391 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
7392 return 1;
7393 }
7394
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007395 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
7396 if (exit_qualification & TYPE_MOV_FROM_DR) {
Gleb Natapov020df072010-04-13 10:05:23 +03007397 unsigned long val;
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007398
7399 if (kvm_get_dr(vcpu, dr, &val))
7400 return 1;
7401 kvm_register_write(vcpu, reg, val);
Gleb Natapov020df072010-04-13 10:05:23 +03007402 } else
Nadav Amit57773922014-06-18 17:19:23 +03007403 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
Jan Kiszka4c4d5632013-12-18 19:16:24 +01007404 return 1;
7405
Kyle Huey6affcbe2016-11-29 12:40:40 -08007406 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007407}
7408
Jan Kiszka73aaf249e2014-01-04 18:47:16 +01007409static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
7410{
7411 return vcpu->arch.dr6;
7412}
7413
7414static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
7415{
7416}
7417
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007418static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
7419{
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007420 get_debugreg(vcpu->arch.db[0], 0);
7421 get_debugreg(vcpu->arch.db[1], 1);
7422 get_debugreg(vcpu->arch.db[2], 2);
7423 get_debugreg(vcpu->arch.db[3], 3);
7424 get_debugreg(vcpu->arch.dr6, 6);
7425 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
7426
7427 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
Paolo Bonzini8f223722016-02-26 12:09:49 +01007428 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
Paolo Bonzini81908bf2014-02-21 10:32:27 +01007429}
7430
Gleb Natapov020df072010-04-13 10:05:23 +03007431static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
7432{
7433 vmcs_writel(GUEST_DR7, val);
7434}
7435
Avi Kivity851ba692009-08-24 11:10:17 +03007436static int handle_cpuid(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007437{
Kyle Huey6a908b62016-11-29 12:40:37 -08007438 return kvm_emulate_cpuid(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007439}
7440
Avi Kivity851ba692009-08-24 11:10:17 +03007441static int handle_rdmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007442{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007443 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007444 struct msr_data msr_info;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007445
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007446 msr_info.index = ecx;
7447 msr_info.host_initiated = false;
7448 if (vmx_get_msr(vcpu, &msr_info)) {
Avi Kivity59200272010-01-25 19:47:02 +02007449 trace_kvm_msr_read_ex(ecx);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007450 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007451 return 1;
7452 }
7453
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007454 trace_kvm_msr_read(ecx, msr_info.data);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007455
Avi Kivity6aa8b732006-12-10 02:21:36 -08007456 /* FIXME: handling of bits 32:63 of rax, rdx */
Paolo Bonzini609e36d2015-04-08 15:30:38 +02007457 vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
7458 vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
Kyle Huey6affcbe2016-11-29 12:40:40 -08007459 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007460}
7461
Avi Kivity851ba692009-08-24 11:10:17 +03007462static int handle_wrmsr(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007463{
Will Auld8fe8ab42012-11-29 12:42:12 -08007464 struct msr_data msr;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08007465 u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
7466 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
7467 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007468
Will Auld8fe8ab42012-11-29 12:42:12 -08007469 msr.data = data;
7470 msr.index = ecx;
7471 msr.host_initiated = false;
Nadav Amit854e8bb2014-09-16 03:24:05 +03007472 if (kvm_set_msr(vcpu, &msr) != 0) {
Avi Kivity59200272010-01-25 19:47:02 +02007473 trace_kvm_msr_write_ex(ecx, data);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02007474 kvm_inject_gp(vcpu, 0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007475 return 1;
7476 }
7477
Avi Kivity59200272010-01-25 19:47:02 +02007478 trace_kvm_msr_write(ecx, data);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007479 return kvm_skip_emulated_instruction(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007480}
7481
Avi Kivity851ba692009-08-24 11:10:17 +03007482static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007483{
Paolo Bonzinieb90f342016-12-18 14:02:21 +01007484 kvm_apic_update_ppr(vcpu);
Yang, Sheng6e5d8652007-09-12 18:03:11 +08007485 return 1;
7486}
7487
Avi Kivity851ba692009-08-24 11:10:17 +03007488static int handle_interrupt_window(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007489{
Paolo Bonzini47c01522016-12-19 11:44:07 +01007490 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7491 CPU_BASED_VIRTUAL_INTR_PENDING);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04007492
Avi Kivity3842d132010-07-27 12:30:24 +03007493 kvm_make_request(KVM_REQ_EVENT, vcpu);
7494
Jan Kiszkaa26bf122008-09-26 09:30:45 +02007495 ++vcpu->stat.irq_window_exits;
Avi Kivity6aa8b732006-12-10 02:21:36 -08007496 return 1;
7497}
7498
Avi Kivity851ba692009-08-24 11:10:17 +03007499static int handle_halt(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08007500{
Avi Kivityd3bef152007-06-05 15:53:05 +03007501 return kvm_emulate_halt(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08007502}
7503
Avi Kivity851ba692009-08-24 11:10:17 +03007504static int handle_vmcall(struct kvm_vcpu *vcpu)
Ingo Molnarc21415e2007-02-19 14:37:47 +02007505{
Andrey Smetanin0d9c0552016-02-11 16:44:59 +03007506 return kvm_emulate_hypercall(vcpu);
Ingo Molnarc21415e2007-02-19 14:37:47 +02007507}
7508
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007509static int handle_invd(struct kvm_vcpu *vcpu)
7510{
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007511 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Gleb Natapovec25d5e2010-11-01 15:35:01 +02007512}
7513
Avi Kivity851ba692009-08-24 11:10:17 +03007514static int handle_invlpg(struct kvm_vcpu *vcpu)
Marcelo Tosattia7052892008-09-23 13:18:35 -03007515{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007516 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007517
7518 kvm_mmu_invlpg(vcpu, exit_qualification);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007519 return kvm_skip_emulated_instruction(vcpu);
Marcelo Tosattia7052892008-09-23 13:18:35 -03007520}
7521
Avi Kivityfee84b02011-11-10 14:57:25 +02007522static int handle_rdpmc(struct kvm_vcpu *vcpu)
7523{
7524 int err;
7525
7526 err = kvm_rdpmc(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007527 return kvm_complete_insn_gp(vcpu, err);
Avi Kivityfee84b02011-11-10 14:57:25 +02007528}
7529
Avi Kivity851ba692009-08-24 11:10:17 +03007530static int handle_wbinvd(struct kvm_vcpu *vcpu)
Eddie Donge5edaa02007-11-11 12:28:35 +02007531{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007532 return kvm_emulate_wbinvd(vcpu);
Eddie Donge5edaa02007-11-11 12:28:35 +02007533}
7534
Dexuan Cui2acf9232010-06-10 11:27:12 +08007535static int handle_xsetbv(struct kvm_vcpu *vcpu)
7536{
7537 u64 new_bv = kvm_read_edx_eax(vcpu);
7538 u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
7539
7540 if (kvm_set_xcr(vcpu, index, new_bv) == 0)
Kyle Huey6affcbe2016-11-29 12:40:40 -08007541 return kvm_skip_emulated_instruction(vcpu);
Dexuan Cui2acf9232010-06-10 11:27:12 +08007542 return 1;
7543}
7544
Wanpeng Lif53cd632014-12-02 19:14:58 +08007545static int handle_xsaves(struct kvm_vcpu *vcpu)
7546{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007547 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007548 WARN(1, "this should never happen\n");
7549 return 1;
7550}
7551
7552static int handle_xrstors(struct kvm_vcpu *vcpu)
7553{
Kyle Huey6affcbe2016-11-29 12:40:40 -08007554 kvm_skip_emulated_instruction(vcpu);
Wanpeng Lif53cd632014-12-02 19:14:58 +08007555 WARN(1, "this should never happen\n");
7556 return 1;
7557}
7558
Avi Kivity851ba692009-08-24 11:10:17 +03007559static int handle_apic_access(struct kvm_vcpu *vcpu)
Sheng Yangf78e0e22007-10-29 09:40:42 +08007560{
Kevin Tian58fbbf22011-08-30 13:56:17 +03007561 if (likely(fasteoi)) {
7562 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7563 int access_type, offset;
7564
7565 access_type = exit_qualification & APIC_ACCESS_TYPE;
7566 offset = exit_qualification & APIC_ACCESS_OFFSET;
7567 /*
7568 * Sane guest uses MOV to write EOI, with written value
7569 * not cared. So make a short-circuit here by avoiding
7570 * heavy instruction emulation.
7571 */
7572 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
7573 (offset == APIC_EOI)) {
7574 kvm_lapic_set_eoi(vcpu);
Kyle Huey6affcbe2016-11-29 12:40:40 -08007575 return kvm_skip_emulated_instruction(vcpu);
Kevin Tian58fbbf22011-08-30 13:56:17 +03007576 }
7577 }
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007578 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
Sheng Yangf78e0e22007-10-29 09:40:42 +08007579}
7580
Yang Zhangc7c9c562013-01-25 10:18:51 +08007581static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
7582{
7583 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7584 int vector = exit_qualification & 0xff;
7585
7586 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
7587 kvm_apic_set_eoi_accelerated(vcpu, vector);
7588 return 1;
7589}
7590
Yang Zhang83d4c282013-01-25 10:18:49 +08007591static int handle_apic_write(struct kvm_vcpu *vcpu)
7592{
7593 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7594 u32 offset = exit_qualification & 0xfff;
7595
7596 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
7597 kvm_apic_write_nodecode(vcpu, offset);
7598 return 1;
7599}
7600
Avi Kivity851ba692009-08-24 11:10:17 +03007601static int handle_task_switch(struct kvm_vcpu *vcpu)
Izik Eidus37817f22008-03-24 23:14:53 +02007602{
Jan Kiszka60637aa2008-09-26 09:30:47 +02007603 struct vcpu_vmx *vmx = to_vmx(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02007604 unsigned long exit_qualification;
Jan Kiszkae269fb22010-04-14 15:51:09 +02007605 bool has_error_code = false;
7606 u32 error_code = 0;
Izik Eidus37817f22008-03-24 23:14:53 +02007607 u16 tss_selector;
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007608 int reason, type, idt_v, idt_index;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007609
7610 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007611 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007612 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
Izik Eidus37817f22008-03-24 23:14:53 +02007613
7614 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7615
7616 reason = (u32)exit_qualification >> 30;
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007617 if (reason == TASK_SWITCH_GATE && idt_v) {
7618 switch (type) {
7619 case INTR_TYPE_NMI_INTR:
7620 vcpu->arch.nmi_injected = false;
Avi Kivity654f06f2011-03-23 15:02:47 +02007621 vmx_set_nmi_mask(vcpu, true);
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007622 break;
7623 case INTR_TYPE_EXT_INTR:
Gleb Natapov66fd3f72009-05-11 13:35:50 +03007624 case INTR_TYPE_SOFT_INTR:
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007625 kvm_clear_interrupt_queue(vcpu);
7626 break;
7627 case INTR_TYPE_HARD_EXCEPTION:
Jan Kiszkae269fb22010-04-14 15:51:09 +02007628 if (vmx->idt_vectoring_info &
7629 VECTORING_INFO_DELIVER_CODE_MASK) {
7630 has_error_code = true;
7631 error_code =
7632 vmcs_read32(IDT_VECTORING_ERROR_CODE);
7633 }
7634 /* fall through */
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007635 case INTR_TYPE_SOFT_EXCEPTION:
7636 kvm_clear_exception_queue(vcpu);
7637 break;
7638 default:
7639 break;
7640 }
Jan Kiszka60637aa2008-09-26 09:30:47 +02007641 }
Izik Eidus37817f22008-03-24 23:14:53 +02007642 tss_selector = exit_qualification;
7643
Gleb Natapov64a7ec02009-03-30 16:03:29 +03007644 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
7645 type != INTR_TYPE_EXT_INTR &&
7646 type != INTR_TYPE_NMI_INTR))
7647 skip_emulated_instruction(vcpu);
7648
Kevin Wolf7f3d35f2012-02-08 14:34:38 +01007649 if (kvm_task_switch(vcpu, tss_selector,
7650 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
7651 has_error_code, error_code) == EMULATE_FAIL) {
Gleb Natapovacb54512010-04-15 21:03:50 +03007652 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7653 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7654 vcpu->run->internal.ndata = 0;
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007655 return 0;
Gleb Natapovacb54512010-04-15 21:03:50 +03007656 }
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007657
Jan Kiszka42dbaa52008-12-15 13:52:10 +01007658 /*
7659 * TODO: What about debug traps on tss switch?
7660 * Are we supposed to inject them and update dr6?
7661 */
7662
7663 return 1;
Izik Eidus37817f22008-03-24 23:14:53 +02007664}
7665
Avi Kivity851ba692009-08-24 11:10:17 +03007666static int handle_ept_violation(struct kvm_vcpu *vcpu)
Sheng Yang14394422008-04-28 12:24:45 +08007667{
Sheng Yangf9c617f2009-03-25 10:08:52 +08007668 unsigned long exit_qualification;
Sheng Yang14394422008-04-28 12:24:45 +08007669 gpa_t gpa;
Paolo Bonzinieebed242016-11-28 14:39:58 +01007670 u64 error_code;
Sheng Yang14394422008-04-28 12:24:45 +08007671
Sheng Yangf9c617f2009-03-25 10:08:52 +08007672 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
Sheng Yang14394422008-04-28 12:24:45 +08007673
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007674 /*
7675 * EPT violation happened while executing iret from NMI,
7676 * "blocked by NMI" bit has to be set before next VM entry.
7677 * There are errata that may cause this bit to not be set:
7678 * AAK134, BY25.
7679 */
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007680 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007681 enable_vnmi &&
Gleb Natapovbcd1c292013-09-25 10:58:22 +03007682 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
Gleb Natapov0be9c7a2013-09-15 11:07:23 +03007683 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
7684
Sheng Yang14394422008-04-28 12:24:45 +08007685 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Marcelo Tosatti229456f2009-06-17 09:22:14 -03007686 trace_kvm_page_fault(gpa, exit_qualification);
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007687
Junaid Shahid27959a42016-12-06 16:46:10 -08007688 /* Is it a read fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007689 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
Junaid Shahid27959a42016-12-06 16:46:10 -08007690 ? PFERR_USER_MASK : 0;
7691 /* Is it a write fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007692 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
Junaid Shahid27959a42016-12-06 16:46:10 -08007693 ? PFERR_WRITE_MASK : 0;
7694 /* Is it a fetch fault? */
Junaid Shahidab22a472016-12-21 20:29:28 -08007695 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
Junaid Shahid27959a42016-12-06 16:46:10 -08007696 ? PFERR_FETCH_MASK : 0;
7697 /* ept page table entry is present? */
7698 error_code |= (exit_qualification &
7699 (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
7700 EPT_VIOLATION_EXECUTABLE))
7701 ? PFERR_PRESENT_MASK : 0;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007702
Paolo Bonzinieebed242016-11-28 14:39:58 +01007703 error_code |= (exit_qualification & 0x100) != 0 ?
7704 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
Yang Zhang25d92082013-08-06 12:00:32 +03007705
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007706 vcpu->arch.exit_qualification = exit_qualification;
Xiao Guangrong4f5982a2012-06-20 15:58:04 +08007707 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
Sheng Yang14394422008-04-28 12:24:45 +08007708}
7709
Avi Kivity851ba692009-08-24 11:10:17 +03007710static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007711{
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007712 gpa_t gpa;
7713
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007714 /*
7715 * A nested guest cannot optimize MMIO vmexits, because we have an
7716 * nGPA here instead of the required GPA.
7717 */
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007718 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
Paolo Bonzini9034e6e2017-08-17 18:36:58 +02007719 if (!is_guest_mode(vcpu) &&
7720 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
Jason Wang931c33b2015-09-15 14:41:58 +08007721 trace_kvm_fast_mmio(gpa);
Vitaly Kuznetsovd391f122018-01-25 16:37:07 +01007722 /*
7723 * Doing kvm_skip_emulated_instruction() depends on undefined
7724 * behavior: Intel's manual doesn't mandate
7725 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
7726 * occurs and while on real hardware it was observed to be set,
7727 * other hypervisors (namely Hyper-V) don't set it, we end up
7728 * advancing IP with some random value. Disable fast mmio when
7729 * running nested and keep it for real hardware in hope that
7730 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
7731 */
7732 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
7733 return kvm_skip_emulated_instruction(vcpu);
7734 else
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007735 return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
Sean Christophersonc4409902018-08-23 13:56:46 -07007736 EMULATE_DONE;
Michael S. Tsirkin68c3b4d2014-03-31 21:50:44 +03007737 }
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007738
Sean Christophersonc75d0edc2018-03-29 14:48:31 -07007739 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
Marcelo Tosatti68f89402009-06-11 12:07:43 -03007740}
7741
Avi Kivity851ba692009-08-24 11:10:17 +03007742static int handle_nmi_window(struct kvm_vcpu *vcpu)
Sheng Yangf08864b2008-05-15 18:23:25 +08007743{
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007744 WARN_ON_ONCE(!enable_vnmi);
Paolo Bonzini47c01522016-12-19 11:44:07 +01007745 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
7746 CPU_BASED_VIRTUAL_NMI_PENDING);
Sheng Yangf08864b2008-05-15 18:23:25 +08007747 ++vcpu->stat.nmi_window_exits;
Avi Kivity3842d132010-07-27 12:30:24 +03007748 kvm_make_request(KVM_REQ_EVENT, vcpu);
Sheng Yangf08864b2008-05-15 18:23:25 +08007749
7750 return 1;
7751}
7752
Mohammed Gamal80ced182009-09-01 12:48:18 +02007753static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007754{
Avi Kivity8b3079a2009-01-05 12:10:54 +02007755 struct vcpu_vmx *vmx = to_vmx(vcpu);
7756 enum emulation_result err = EMULATE_DONE;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007757 int ret = 1;
Avi Kivity49e9d552010-09-19 14:34:08 +02007758 u32 cpu_exec_ctrl;
7759 bool intr_window_requested;
Avi Kivityb8405c12012-06-07 17:08:48 +03007760 unsigned count = 130;
Avi Kivity49e9d552010-09-19 14:34:08 +02007761
Sean Christopherson2bb8caf2018-03-12 10:56:13 -07007762 /*
7763 * We should never reach the point where we are emulating L2
7764 * due to invalid guest state as that means we incorrectly
7765 * allowed a nested VMEntry with an invalid vmcs12.
7766 */
7767 WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
7768
Avi Kivity49e9d552010-09-19 14:34:08 +02007769 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
7770 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007771
Paolo Bonzini98eb2f82014-03-27 09:51:52 +01007772 while (vmx->emulation_required && count-- != 0) {
Avi Kivitybdea48e2012-06-10 18:07:57 +03007773 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
Avi Kivity49e9d552010-09-19 14:34:08 +02007774 return handle_interrupt_window(&vmx->vcpu);
7775
Radim Krčmář72875d82017-04-26 22:32:19 +02007776 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
Avi Kivityde87dcdd2012-06-12 20:21:38 +03007777 return 1;
7778
Sean Christopherson0ce97a22018-08-23 13:56:52 -07007779 err = kvm_emulate_instruction(vcpu, 0);
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007780
Paolo Bonziniac0a48c2013-06-25 18:24:41 +02007781 if (err == EMULATE_USER_EXIT) {
Paolo Bonzini94452b92013-08-27 15:41:42 +02007782 ++vcpu->stat.mmio_exits;
Mohammed Gamal80ced182009-09-01 12:48:18 +02007783 ret = 0;
7784 goto out;
7785 }
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +01007786
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007787 if (err != EMULATE_DONE)
7788 goto emulation_error;
7789
7790 if (vmx->emulation_required && !vmx->rmode.vm86_active &&
7791 vcpu->arch.exception.pending)
7792 goto emulation_error;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007793
Gleb Natapov8d76c492013-05-08 18:38:44 +03007794 if (vcpu->arch.halt_request) {
7795 vcpu->arch.halt_request = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -06007796 ret = kvm_vcpu_halt(vcpu);
Gleb Natapov8d76c492013-05-08 18:38:44 +03007797 goto out;
7798 }
7799
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007800 if (signal_pending(current))
Mohammed Gamal80ced182009-09-01 12:48:18 +02007801 goto out;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007802 if (need_resched())
7803 schedule();
7804 }
7805
Mohammed Gamal80ced182009-09-01 12:48:18 +02007806out:
7807 return ret;
Mohammed Gamalea953ef2008-08-17 16:47:05 +03007808
Sean Christophersonadd5ff72018-03-23 09:34:00 -07007809emulation_error:
7810 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7811 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7812 vcpu->run->internal.ndata = 0;
7813 return 0;
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007814}
7815
7816static void grow_ple_window(struct kvm_vcpu *vcpu)
7817{
7818 struct vcpu_vmx *vmx = to_vmx(vcpu);
7819 int old = vmx->ple_window;
7820
Babu Mogerc8e88712018-03-16 16:37:24 -04007821 vmx->ple_window = __grow_ple_window(old, ple_window,
7822 ple_window_grow,
7823 ple_window_max);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007824
7825 if (vmx->ple_window != old)
7826 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007827
7828 trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007829}
7830
7831static void shrink_ple_window(struct kvm_vcpu *vcpu)
7832{
7833 struct vcpu_vmx *vmx = to_vmx(vcpu);
7834 int old = vmx->ple_window;
7835
Babu Mogerc8e88712018-03-16 16:37:24 -04007836 vmx->ple_window = __shrink_ple_window(old, ple_window,
7837 ple_window_shrink,
7838 ple_window);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007839
7840 if (vmx->ple_window != old)
7841 vmx->ple_window_dirty = true;
Radim Krčmář7b462682014-08-21 18:08:09 +02007842
7843 trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
Radim Krčmářb4a2d312014-08-21 18:08:08 +02007844}
7845
7846/*
Feng Wubf9f6ac2015-09-18 22:29:55 +08007847 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
7848 */
7849static void wakeup_handler(void)
7850{
7851 struct kvm_vcpu *vcpu;
7852 int cpu = smp_processor_id();
7853
7854 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7855 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
7856 blocked_vcpu_list) {
7857 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7858
7859 if (pi_test_on(pi_desc) == 1)
7860 kvm_vcpu_kick(vcpu);
7861 }
7862 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
7863}
7864
Peng Haoe01bca22018-04-07 05:47:32 +08007865static void vmx_enable_tdp(void)
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007866{
7867 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
7868 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
7869 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
7870 0ull, VMX_EPT_EXECUTABLE_MASK,
7871 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
Tom Lendackyd0ec49d2017-07-17 16:10:27 -05007872 VMX_EPT_RWX_MASK, 0ull);
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007873
7874 ept_set_mmio_spte_mask();
7875 kvm_enable_tdp();
7876}
7877
Tiejun Chenf2c76482014-10-28 10:14:47 +08007878static __init int hardware_setup(void)
7879{
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007880 unsigned long host_bndcfgs;
Paolo Bonzini904e14f2018-01-16 16:51:18 +01007881 int r = -ENOMEM, i;
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007882
7883 rdmsrl_safe(MSR_EFER, &host_efer);
7884
7885 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
7886 kvm_define_shared_msr(i, vmx_msr_index[i]);
7887
Radim Krčmář23611332016-09-29 22:41:33 +02007888 for (i = 0; i < VMX_BITMAP_NR; i++) {
7889 vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
7890 if (!vmx_bitmap[i])
7891 goto out;
7892 }
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007893
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007894 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
7895 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
7896
Tiejun Chen34a1cd62014-10-28 10:14:48 +08007897 if (setup_vmcs_config(&vmcs_config) < 0) {
7898 r = -EIO;
Radim Krčmář23611332016-09-29 22:41:33 +02007899 goto out;
Tiejun Chenbaa03522014-12-23 16:21:11 +08007900 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007901
7902 if (boot_cpu_has(X86_FEATURE_NX))
7903 kvm_enable_efer_bits(EFER_NX);
7904
Sean Christophersoncf81a7e2018-07-11 09:54:30 -07007905 if (boot_cpu_has(X86_FEATURE_MPX)) {
7906 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
7907 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
7908 }
7909
Wanpeng Li08d839c2017-03-23 05:30:08 -07007910 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
7911 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
Tiejun Chenf2c76482014-10-28 10:14:47 +08007912 enable_vpid = 0;
Wanpeng Li08d839c2017-03-23 05:30:08 -07007913
Tiejun Chenf2c76482014-10-28 10:14:47 +08007914 if (!cpu_has_vmx_ept() ||
David Hildenbrand42aa53b2017-08-10 23:15:29 +02007915 !cpu_has_vmx_ept_4levels() ||
David Hildenbrandf5f51582017-08-24 20:51:30 +02007916 !cpu_has_vmx_ept_mt_wb() ||
Wanpeng Li8ad81822017-10-09 15:51:53 -07007917 !cpu_has_vmx_invept_global())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007918 enable_ept = 0;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007919
Wanpeng Lifce6ac42017-05-11 02:58:56 -07007920 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007921 enable_ept_ad_bits = 0;
7922
Wanpeng Li8ad81822017-10-09 15:51:53 -07007923 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007924 enable_unrestricted_guest = 0;
7925
Paolo Bonziniad15a292015-01-30 16:18:49 +01007926 if (!cpu_has_vmx_flexpriority())
Tiejun Chenf2c76482014-10-28 10:14:47 +08007927 flexpriority_enabled = 0;
7928
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01007929 if (!cpu_has_virtual_nmis())
7930 enable_vnmi = 0;
7931
Paolo Bonziniad15a292015-01-30 16:18:49 +01007932 /*
7933 * set_apic_access_page_addr() is used to reload apic access
7934 * page upon invalidation. No need to do anything if not
7935 * using the APIC_ACCESS_ADDR VMCS field.
7936 */
7937 if (!flexpriority_enabled)
Tiejun Chenf2c76482014-10-28 10:14:47 +08007938 kvm_x86_ops->set_apic_access_page_addr = NULL;
Tiejun Chenf2c76482014-10-28 10:14:47 +08007939
7940 if (!cpu_has_vmx_tpr_shadow())
7941 kvm_x86_ops->update_cr8_intercept = NULL;
7942
7943 if (enable_ept && !cpu_has_vmx_ept_2m_page())
7944 kvm_disable_largepages();
7945
Tianyu Lan877ad952018-07-19 08:40:23 +00007946#if IS_ENABLED(CONFIG_HYPERV)
7947 if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
7948 && enable_ept)
7949 kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
7950#endif
7951
Wanpeng Li0f107682017-09-28 18:06:24 -07007952 if (!cpu_has_vmx_ple()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007953 ple_gap = 0;
Wanpeng Li0f107682017-09-28 18:06:24 -07007954 ple_window = 0;
7955 ple_window_grow = 0;
7956 ple_window_max = 0;
7957 ple_window_shrink = 0;
7958 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007959
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007960 if (!cpu_has_vmx_apicv()) {
Tiejun Chenf2c76482014-10-28 10:14:47 +08007961 enable_apicv = 0;
Paolo Bonzini76dfafd52016-12-19 17:17:11 +01007962 kvm_x86_ops->sync_pir_to_irr = NULL;
7963 }
Tiejun Chenf2c76482014-10-28 10:14:47 +08007964
Haozhong Zhang64903d62015-10-20 15:39:09 +08007965 if (cpu_has_vmx_tsc_scaling()) {
7966 kvm_has_tsc_control = true;
7967 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
7968 kvm_tsc_scaling_ratio_frac_bits = 48;
7969 }
7970
Wanpeng Li04bb92e2015-09-16 19:31:11 +08007971 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
7972
Junaid Shahidf160c7b2016-12-06 16:46:16 -08007973 if (enable_ept)
7974 vmx_enable_tdp();
7975 else
Tiejun Chenbaa03522014-12-23 16:21:11 +08007976 kvm_disable_tdp();
7977
Jim Mattson8fcc4b52018-07-10 11:27:20 +02007978 if (!nested) {
7979 kvm_x86_ops->get_nested_state = NULL;
7980 kvm_x86_ops->set_nested_state = NULL;
7981 }
7982
Kai Huang843e4332015-01-28 10:54:28 +08007983 /*
7984 * Only enable PML when hardware supports PML feature, and both EPT
7985 * and EPT A/D bit features are enabled -- PML depends on them to work.
7986 */
7987 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
7988 enable_pml = 0;
7989
7990 if (!enable_pml) {
7991 kvm_x86_ops->slot_enable_log_dirty = NULL;
7992 kvm_x86_ops->slot_disable_log_dirty = NULL;
7993 kvm_x86_ops->flush_log_dirty = NULL;
7994 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7995 }
7996
Sean Christophersond264ee02018-08-27 15:21:12 -07007997 if (!cpu_has_vmx_preemption_timer())
7998 kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
7999
Yunhong Jiang64672c92016-06-13 14:19:59 -07008000 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
8001 u64 vmx_msr;
8002
8003 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
8004 cpu_preemption_timer_multi =
8005 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
8006 } else {
8007 kvm_x86_ops->set_hv_timer = NULL;
8008 kvm_x86_ops->cancel_hv_timer = NULL;
8009 }
8010
Paolo Bonzinic5d167b2017-12-13 11:05:19 +01008011 if (!cpu_has_vmx_shadow_vmcs())
8012 enable_shadow_vmcs = 0;
8013 if (enable_shadow_vmcs)
8014 init_vmcs_shadow_fields();
8015
Feng Wubf9f6ac2015-09-18 22:29:55 +08008016 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
Paolo Bonzini13893092018-02-26 13:40:09 +01008017 nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
Feng Wubf9f6ac2015-09-18 22:29:55 +08008018
Ashok Rajc45dcc72016-06-22 14:59:56 +08008019 kvm_mce_cap_supported |= MCG_LMCE_P;
8020
Tiejun Chenf2c76482014-10-28 10:14:47 +08008021 return alloc_kvm_area();
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008022
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008023out:
Radim Krčmář23611332016-09-29 22:41:33 +02008024 for (i = 0; i < VMX_BITMAP_NR; i++)
8025 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008026
8027 return r;
Tiejun Chenf2c76482014-10-28 10:14:47 +08008028}
8029
8030static __exit void hardware_unsetup(void)
8031{
Radim Krčmář23611332016-09-29 22:41:33 +02008032 int i;
8033
8034 for (i = 0; i < VMX_BITMAP_NR; i++)
8035 free_page((unsigned long)vmx_bitmap[i]);
Tiejun Chen34a1cd62014-10-28 10:14:48 +08008036
Tiejun Chenf2c76482014-10-28 10:14:47 +08008037 free_kvm_area();
8038}
8039
Avi Kivity6aa8b732006-12-10 02:21:36 -08008040/*
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008041 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
8042 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
8043 */
Marcelo Tosatti9fb41ba2009-10-12 19:37:31 -03008044static int handle_pause(struct kvm_vcpu *vcpu)
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008045{
Wanpeng Lib31c1142018-03-12 04:53:04 -07008046 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +02008047 grow_ple_window(vcpu);
8048
Longpeng(Mike)de63ad42017-08-08 12:05:33 +08008049 /*
8050 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
8051 * VM-execution control is ignored if CPL > 0. OTOH, KVM
8052 * never set PAUSE_EXITING and just set PLE if supported,
8053 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
8054 */
8055 kvm_vcpu_on_spin(vcpu, true);
Kyle Huey6affcbe2016-11-29 12:40:40 -08008056 return kvm_skip_emulated_instruction(vcpu);
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008057}
8058
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008059static int handle_nop(struct kvm_vcpu *vcpu)
Sheng Yang59708672009-12-15 13:29:54 +08008060{
Kyle Huey6affcbe2016-11-29 12:40:40 -08008061 return kvm_skip_emulated_instruction(vcpu);
Sheng Yang59708672009-12-15 13:29:54 +08008062}
8063
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008064static int handle_mwait(struct kvm_vcpu *vcpu)
8065{
8066 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
8067 return handle_nop(vcpu);
8068}
8069
Jim Mattson45ec3682017-08-23 16:32:04 -07008070static int handle_invalid_op(struct kvm_vcpu *vcpu)
8071{
8072 kvm_queue_exception(vcpu, UD_VECTOR);
8073 return 1;
8074}
8075
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03008076static int handle_monitor_trap(struct kvm_vcpu *vcpu)
8077{
8078 return 1;
8079}
8080
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04008081static int handle_monitor(struct kvm_vcpu *vcpu)
8082{
8083 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
8084 return handle_nop(vcpu);
8085}
8086
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08008087/*
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008088 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008089 * set the success or error code of an emulated VMX instruction (as specified
8090 * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
8091 * instruction.
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008092 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008093static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008094{
8095 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
8096 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
8097 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008098 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008099}
8100
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008101static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008102{
8103 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
8104 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
8105 X86_EFLAGS_SF | X86_EFLAGS_OF))
8106 | X86_EFLAGS_CF);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008107 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008108}
8109
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008110static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
8111 u32 vm_instruction_error)
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008112{
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008113 struct vcpu_vmx *vmx = to_vmx(vcpu);
8114
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008115 /*
8116 * failValid writes the error number to the current VMCS, which
8117 * can't be done if there isn't a current VMCS.
8118 */
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008119 if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008120 return nested_vmx_failInvalid(vcpu);
8121
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008122 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
8123 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
8124 X86_EFLAGS_SF | X86_EFLAGS_OF))
8125 | X86_EFLAGS_ZF);
8126 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
8127 /*
8128 * We don't need to force a shadow sync because
8129 * VM_INSTRUCTION_ERROR is not shadowed
8130 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008131 return kvm_skip_emulated_instruction(vcpu);
Arthur Chunqi Li0658fba2013-07-04 15:03:32 +08008132}
Abel Gordon145c28d2013-04-18 14:36:55 +03008133
Wincy Vanff651cb2014-12-11 08:52:58 +03008134static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
8135{
8136 /* TODO: not to reset guest simply here. */
8137 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
Paolo Bonzinibbe41b92016-08-19 17:51:20 +02008138 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
Wincy Vanff651cb2014-12-11 08:52:58 +03008139}
8140
Jan Kiszkaf41245002014-03-07 20:03:13 +01008141static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
8142{
8143 struct vcpu_vmx *vmx =
8144 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
8145
8146 vmx->nested.preemption_timer_expired = true;
8147 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
8148 kvm_vcpu_kick(&vmx->vcpu);
8149
8150 return HRTIMER_NORESTART;
8151}
8152
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03008153/*
Bandan Das19677e32014-05-06 02:19:15 -04008154 * Decode the memory-address operand of a vmx instruction, as recorded on an
8155 * exit caused by such an instruction (run by a guest hypervisor).
8156 * On success, returns 0. When the operand is invalid, returns 1 and throws
8157 * #UD or #GP.
8158 */
8159static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
8160 unsigned long exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008161 u32 vmx_instruction_info, bool wr, gva_t *ret)
Bandan Das19677e32014-05-06 02:19:15 -04008162{
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008163 gva_t off;
8164 bool exn;
8165 struct kvm_segment s;
8166
Bandan Das19677e32014-05-06 02:19:15 -04008167 /*
8168 * According to Vol. 3B, "Information for VM Exits Due to Instruction
8169 * Execution", on an exit, vmx_instruction_info holds most of the
8170 * addressing components of the operand. Only the displacement part
8171 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
8172 * For how an actual address is calculated from all these components,
8173 * refer to Vol. 1, "Operand Addressing".
8174 */
8175 int scaling = vmx_instruction_info & 3;
8176 int addr_size = (vmx_instruction_info >> 7) & 7;
8177 bool is_reg = vmx_instruction_info & (1u << 10);
8178 int seg_reg = (vmx_instruction_info >> 15) & 7;
8179 int index_reg = (vmx_instruction_info >> 18) & 0xf;
8180 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
8181 int base_reg = (vmx_instruction_info >> 23) & 0xf;
8182 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
8183
8184 if (is_reg) {
8185 kvm_queue_exception(vcpu, UD_VECTOR);
8186 return 1;
8187 }
8188
8189 /* Addr = segment_base + offset */
8190 /* offset = base + [index * scale] + displacement */
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008191 off = exit_qualification; /* holds the displacement */
Bandan Das19677e32014-05-06 02:19:15 -04008192 if (base_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008193 off += kvm_register_read(vcpu, base_reg);
Bandan Das19677e32014-05-06 02:19:15 -04008194 if (index_is_valid)
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008195 off += kvm_register_read(vcpu, index_reg)<<scaling;
8196 vmx_get_segment(vcpu, &s, seg_reg);
8197 *ret = s.base + off;
Bandan Das19677e32014-05-06 02:19:15 -04008198
8199 if (addr_size == 1) /* 32 bit */
8200 *ret &= 0xffffffff;
8201
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008202 /* Checks for #GP/#SS exceptions. */
8203 exn = false;
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008204 if (is_long_mode(vcpu)) {
8205 /* Long mode: #GP(0)/#SS(0) if the memory address is in a
8206 * non-canonical form. This is the only check on the memory
8207 * destination for long mode!
8208 */
Yu Zhangfd8cb432017-08-24 20:27:56 +08008209 exn = is_noncanonical_address(*ret, vcpu);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008210 } else if (is_protmode(vcpu)) {
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008211 /* Protected mode: apply checks for segment validity in the
8212 * following order:
8213 * - segment type check (#GP(0) may be thrown)
8214 * - usability check (#GP(0)/#SS(0))
8215 * - limit check (#GP(0)/#SS(0))
8216 */
8217 if (wr)
8218 /* #GP(0) if the destination operand is located in a
8219 * read-only data segment or any code segment.
8220 */
8221 exn = ((s.type & 0xa) == 0 || (s.type & 8));
8222 else
8223 /* #GP(0) if the source operand is located in an
8224 * execute-only code segment
8225 */
8226 exn = ((s.type & 0xa) == 8);
Quentin Casasnovasff30ef42016-06-18 11:01:05 +02008227 if (exn) {
8228 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8229 return 1;
8230 }
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008231 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
8232 */
8233 exn = (s.unusable != 0);
8234 /* Protected mode: #GP(0)/#SS(0) if the memory
8235 * operand is outside the segment limit.
8236 */
8237 exn = exn || (off + sizeof(u64) > s.limit);
8238 }
8239 if (exn) {
8240 kvm_queue_exception_e(vcpu,
8241 seg_reg == VCPU_SREG_SS ?
8242 SS_VECTOR : GP_VECTOR,
8243 0);
8244 return 1;
8245 }
8246
Bandan Das19677e32014-05-06 02:19:15 -04008247 return 0;
8248}
8249
Radim Krčmářcbf71272017-05-19 15:48:51 +02008250static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
Bandan Das3573e222014-05-06 02:19:16 -04008251{
8252 gva_t gva;
Bandan Das3573e222014-05-06 02:19:16 -04008253 struct x86_exception e;
Bandan Das3573e222014-05-06 02:19:16 -04008254
8255 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00008256 vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
Bandan Das3573e222014-05-06 02:19:16 -04008257 return 1;
8258
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02008259 if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
Bandan Das3573e222014-05-06 02:19:16 -04008260 kvm_inject_page_fault(vcpu, &e);
8261 return 1;
8262 }
8263
Bandan Das3573e222014-05-06 02:19:16 -04008264 return 0;
8265}
8266
Liran Alonabfc52c2018-06-23 02:35:13 +03008267/*
8268 * Allocate a shadow VMCS and associate it with the currently loaded
8269 * VMCS, unless such a shadow VMCS already exists. The newly allocated
8270 * VMCS is also VMCLEARed, so that it is ready for use.
8271 */
8272static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
8273{
8274 struct vcpu_vmx *vmx = to_vmx(vcpu);
8275 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
8276
8277 /*
8278 * We should allocate a shadow vmcs for vmcs01 only when L1
8279 * executes VMXON and free it when L1 executes VMXOFF.
8280 * As it is invalid to execute VMXON twice, we shouldn't reach
8281 * here when vmcs01 already have an allocated shadow vmcs.
8282 */
8283 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
8284
8285 if (!loaded_vmcs->shadow_vmcs) {
8286 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
8287 if (loaded_vmcs->shadow_vmcs)
8288 vmcs_clear(loaded_vmcs->shadow_vmcs);
8289 }
8290 return loaded_vmcs->shadow_vmcs;
8291}
8292
Jim Mattsone29acc52016-11-30 12:03:43 -08008293static int enter_vmx_operation(struct kvm_vcpu *vcpu)
8294{
8295 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008296 int r;
Jim Mattsone29acc52016-11-30 12:03:43 -08008297
Paolo Bonzinif21f1652018-01-11 12:16:15 +01008298 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
8299 if (r < 0)
Jim Mattsonde3a0022017-11-27 17:22:25 -06008300 goto out_vmcs02;
Jim Mattsone29acc52016-11-30 12:03:43 -08008301
8302 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8303 if (!vmx->nested.cached_vmcs12)
8304 goto out_cached_vmcs12;
8305
Liran Alon61ada742018-06-23 02:35:08 +03008306 vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
8307 if (!vmx->nested.cached_shadow_vmcs12)
8308 goto out_cached_shadow_vmcs12;
8309
Liran Alonabfc52c2018-06-23 02:35:13 +03008310 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
8311 goto out_shadow_vmcs;
Jim Mattsone29acc52016-11-30 12:03:43 -08008312
Jim Mattsone29acc52016-11-30 12:03:43 -08008313 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
8314 HRTIMER_MODE_REL_PINNED);
8315 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
8316
Roman Kagan63aff652018-07-19 21:59:07 +03008317 vmx->nested.vpid02 = allocate_vpid();
8318
Sean Christopherson9d6105b2018-09-26 09:23:51 -07008319 vmx->nested.vmcs02_initialized = false;
Jim Mattsone29acc52016-11-30 12:03:43 -08008320 vmx->nested.vmxon = true;
8321 return 0;
8322
8323out_shadow_vmcs:
Liran Alon61ada742018-06-23 02:35:08 +03008324 kfree(vmx->nested.cached_shadow_vmcs12);
8325
8326out_cached_shadow_vmcs12:
Jim Mattsone29acc52016-11-30 12:03:43 -08008327 kfree(vmx->nested.cached_vmcs12);
8328
8329out_cached_vmcs12:
Jim Mattsonde3a0022017-11-27 17:22:25 -06008330 free_loaded_vmcs(&vmx->nested.vmcs02);
Jim Mattsone29acc52016-11-30 12:03:43 -08008331
Jim Mattsonde3a0022017-11-27 17:22:25 -06008332out_vmcs02:
Jim Mattsone29acc52016-11-30 12:03:43 -08008333 return -ENOMEM;
8334}
8335
Bandan Das3573e222014-05-06 02:19:16 -04008336/*
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008337 * Emulate the VMXON instruction.
8338 * Currently, we just remember that VMX is active, and do not save or even
8339 * inspect the argument to VMXON (the so-called "VMXON pointer") because we
8340 * do not currently need to store anything in that guest-allocated memory
8341 * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
8342 * argument is different from the VMXON pointer (which the spec says they do).
8343 */
8344static int handle_vmon(struct kvm_vcpu *vcpu)
8345{
Jim Mattsone29acc52016-11-30 12:03:43 -08008346 int ret;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008347 gpa_t vmptr;
8348 struct page *page;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008349 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008350 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
8351 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008352
Jim Mattson70f3aac2017-04-26 08:53:46 -07008353 /*
8354 * The Intel VMX Instruction Reference lists a bunch of bits that are
8355 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
8356 * 1 (see vmx_set_cr4() for when we allow the guest to set this).
8357 * Otherwise, we should fail with #UD. But most faulting conditions
8358 * have already been checked by hardware, prior to the VM-exit for
8359 * VMXON. We do test guest cr4.VMXE because processor CR4 always has
8360 * that bit set to 1 in non-root mode.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008361 */
Jim Mattson70f3aac2017-04-26 08:53:46 -07008362 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008363 kvm_queue_exception(vcpu, UD_VECTOR);
8364 return 1;
8365 }
8366
Felix Wilhelm727ba742018-06-11 09:43:44 +02008367 /* CPL=0 must be checked manually. */
8368 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008369 kvm_inject_gp(vcpu, 0);
Felix Wilhelm727ba742018-06-11 09:43:44 +02008370 return 1;
8371 }
8372
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008373 if (vmx->nested.vmxon)
8374 return nested_vmx_failValid(vcpu,
8375 VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008376
Haozhong Zhang3b840802016-06-22 14:59:54 +08008377 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
Nadav Har'Elb3897a42013-07-08 19:12:35 +08008378 != VMXON_NEEDED_FEATURES) {
8379 kvm_inject_gp(vcpu, 0);
8380 return 1;
8381 }
8382
Radim Krčmářcbf71272017-05-19 15:48:51 +02008383 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Jim Mattson21e7fbe2016-12-22 15:49:55 -08008384 return 1;
Radim Krčmářcbf71272017-05-19 15:48:51 +02008385
8386 /*
8387 * SDM 3: 24.11.5
8388 * The first 4 bytes of VMXON region contain the supported
8389 * VMCS revision identifier
8390 *
8391 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
8392 * which replaces physical address width with 32
8393 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008394 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
8395 return nested_vmx_failInvalid(vcpu);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008396
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02008397 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008398 if (is_error_page(page))
8399 return nested_vmx_failInvalid(vcpu);
8400
Radim Krčmářcbf71272017-05-19 15:48:51 +02008401 if (*(u32 *)kmap(page) != VMCS12_REVISION) {
8402 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008403 kvm_release_page_clean(page);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008404 return nested_vmx_failInvalid(vcpu);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008405 }
8406 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008407 kvm_release_page_clean(page);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008408
8409 vmx->nested.vmxon_ptr = vmptr;
Jim Mattsone29acc52016-11-30 12:03:43 -08008410 ret = enter_vmx_operation(vcpu);
8411 if (ret)
8412 return ret;
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008413
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008414 return nested_vmx_succeed(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008415}
8416
8417/*
8418 * Intel's VMX Instruction Reference specifies a common set of prerequisites
8419 * for running VMX instructions (except VMXON, whose prerequisites are
8420 * slightly different). It also specifies what exception to inject otherwise.
Jim Mattson70f3aac2017-04-26 08:53:46 -07008421 * Note that many of these exceptions have priority over VM exits, so they
8422 * don't have to be checked again here.
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008423 */
8424static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
8425{
Jim Mattson70f3aac2017-04-26 08:53:46 -07008426 if (!to_vmx(vcpu)->nested.vmxon) {
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008427 kvm_queue_exception(vcpu, UD_VECTOR);
8428 return 0;
8429 }
Jim Mattsone49fcb82018-07-27 13:44:45 -07008430
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008431 if (vmx_get_cpl(vcpu)) {
Jim Mattson36090bf2018-07-27 09:18:50 -07008432 kvm_inject_gp(vcpu, 0);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008433 return 0;
8434 }
8435
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008436 return 1;
8437}
8438
David Matlack8ca44e82017-08-01 14:00:39 -07008439static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
8440{
8441 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
8442 vmcs_write64(VMCS_LINK_POINTER, -1ull);
8443}
8444
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008445static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
8446{
8447 struct vcpu_vmx *vmx = to_vmx(vcpu);
8448
8449 if (!vmx->nested.hv_evmcs)
8450 return;
8451
8452 kunmap(vmx->nested.hv_evmcs_page);
8453 kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
8454 vmx->nested.hv_evmcs_vmptr = -1ull;
8455 vmx->nested.hv_evmcs_page = NULL;
8456 vmx->nested.hv_evmcs = NULL;
8457}
8458
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008459static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
Abel Gordone7953d72013-04-18 14:37:55 +03008460{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008461 struct vcpu_vmx *vmx = to_vmx(vcpu);
8462
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008463 if (vmx->nested.current_vmptr == -1ull)
8464 return;
8465
Abel Gordon012f83c2013-04-18 14:39:25 +03008466 if (enable_shadow_vmcs) {
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008467 /* copy to memory all shadowed fields in case
8468 they were modified */
8469 copy_shadow_to_vmcs12(vmx);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02008470 vmx->nested.need_vmcs12_sync = false;
David Matlack8ca44e82017-08-01 14:00:39 -07008471 vmx_disable_shadow_vmcs(vmx);
Abel Gordon012f83c2013-04-18 14:39:25 +03008472 }
Wincy Van705699a2015-02-03 23:58:17 +08008473 vmx->nested.posted_intr_nv = -1;
David Matlack4f2777b2016-07-13 17:16:37 -07008474
8475 /* Flush VMCS12 to guest memory */
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008476 kvm_vcpu_write_guest_page(vcpu,
Paolo Bonzini9f744c52017-07-27 15:54:46 +02008477 vmx->nested.current_vmptr >> PAGE_SHIFT,
8478 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
David Matlack4f2777b2016-07-13 17:16:37 -07008479
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008480 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
8481
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008482 vmx->nested.current_vmptr = -1ull;
Abel Gordone7953d72013-04-18 14:37:55 +03008483}
8484
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008485/*
8486 * Free whatever needs to be freed from vmx->nested when L1 goes down, or
8487 * just stops using VMX.
8488 */
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008489static void free_nested(struct kvm_vcpu *vcpu)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008490{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008491 struct vcpu_vmx *vmx = to_vmx(vcpu);
8492
Wanpeng Lib7455822017-11-22 14:04:00 -08008493 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008494 return;
Paolo Bonzini9a2a05b2014-07-17 11:55:46 +02008495
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008496 vmx->nested.vmxon = false;
Wanpeng Lib7455822017-11-22 14:04:00 -08008497 vmx->nested.smm.vmxon = false;
Wanpeng Li5c614b32015-10-13 09:18:36 -07008498 free_vpid(vmx->nested.vpid02);
David Matlack8ca44e82017-08-01 14:00:39 -07008499 vmx->nested.posted_intr_nv = -1;
8500 vmx->nested.current_vmptr = -1ull;
Jim Mattson355f4fb2016-10-28 08:29:39 -07008501 if (enable_shadow_vmcs) {
David Matlack8ca44e82017-08-01 14:00:39 -07008502 vmx_disable_shadow_vmcs(vmx);
Jim Mattson355f4fb2016-10-28 08:29:39 -07008503 vmcs_clear(vmx->vmcs01.shadow_vmcs);
8504 free_vmcs(vmx->vmcs01.shadow_vmcs);
8505 vmx->vmcs01.shadow_vmcs = NULL;
8506 }
David Matlack4f2777b2016-07-13 17:16:37 -07008507 kfree(vmx->nested.cached_vmcs12);
Liran Alon61ada742018-06-23 02:35:08 +03008508 kfree(vmx->nested.cached_shadow_vmcs12);
Jim Mattsonde3a0022017-11-27 17:22:25 -06008509 /* Unpin physical memory we referred to in the vmcs02 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008510 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008511 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008512 vmx->nested.apic_access_page = NULL;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +03008513 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008514 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +02008515 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +02008516 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +08008517 }
Wincy Van705699a2015-02-03 23:58:17 +08008518 if (vmx->nested.pi_desc_page) {
8519 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02008520 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +08008521 vmx->nested.pi_desc_page = NULL;
8522 vmx->nested.pi_desc = NULL;
8523 }
Nadav Har'Elff2f6fe2011-05-25 23:05:27 +03008524
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008525 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
8526
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008527 nested_release_evmcs(vcpu);
8528
Jim Mattsonde3a0022017-11-27 17:22:25 -06008529 free_loaded_vmcs(&vmx->nested.vmcs02);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008530}
8531
8532/* Emulate the VMXOFF instruction */
8533static int handle_vmoff(struct kvm_vcpu *vcpu)
8534{
8535 if (!nested_vmx_check_permission(vcpu))
8536 return 1;
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02008537 free_nested(vcpu);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008538 return nested_vmx_succeed(vcpu);
Nadav Har'Elec378ae2011-05-25 23:02:54 +03008539}
8540
Nadav Har'El27d6c862011-05-25 23:06:59 +03008541/* Emulate the VMCLEAR instruction */
8542static int handle_vmclear(struct kvm_vcpu *vcpu)
8543{
8544 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattson587d7e722017-03-02 12:41:48 -08008545 u32 zero = 0;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008546 gpa_t vmptr;
Nadav Har'El27d6c862011-05-25 23:06:59 +03008547
8548 if (!nested_vmx_check_permission(vcpu))
8549 return 1;
8550
Radim Krčmářcbf71272017-05-19 15:48:51 +02008551 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El27d6c862011-05-25 23:06:59 +03008552 return 1;
8553
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008554 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
8555 return nested_vmx_failValid(vcpu,
8556 VMXERR_VMCLEAR_INVALID_ADDRESS);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008557
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008558 if (vmptr == vmx->nested.vmxon_ptr)
8559 return nested_vmx_failValid(vcpu,
8560 VMXERR_VMCLEAR_VMXON_POINTER);
Radim Krčmářcbf71272017-05-19 15:48:51 +02008561
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008562 if (vmx->nested.hv_evmcs_page) {
8563 if (vmptr == vmx->nested.hv_evmcs_vmptr)
8564 nested_release_evmcs(vcpu);
8565 } else {
8566 if (vmptr == vmx->nested.current_vmptr)
8567 nested_release_vmcs12(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008568
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008569 kvm_vcpu_write_guest(vcpu,
8570 vmptr + offsetof(struct vmcs12,
8571 launch_state),
8572 &zero, sizeof(zero));
8573 }
Nadav Har'El27d6c862011-05-25 23:06:59 +03008574
Sean Christopherson09abb5e2018-09-26 09:23:55 -07008575 return nested_vmx_succeed(vcpu);
Nadav Har'El27d6c862011-05-25 23:06:59 +03008576}
8577
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03008578static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
8579
8580/* Emulate the VMLAUNCH instruction */
8581static int handle_vmlaunch(struct kvm_vcpu *vcpu)
8582{
8583 return nested_vmx_run(vcpu, true);
8584}
8585
8586/* Emulate the VMRESUME instruction */
8587static int handle_vmresume(struct kvm_vcpu *vcpu)
8588{
8589
8590 return nested_vmx_run(vcpu, false);
8591}
8592
Nadav Har'El49f705c2011-05-25 23:08:30 +03008593/*
8594 * Read a vmcs12 field. Since these can have varying lengths and we return
8595 * one type, we chose the biggest type (u64) and zero-extend the return value
8596 * to that size. Note that the caller, handle_vmread, might need to use only
8597 * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
8598 * 64-bit fields are to be returned).
8599 */
Liran Alone2536742018-06-23 02:35:02 +03008600static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008601 unsigned long field, u64 *ret)
Nadav Har'El49f705c2011-05-25 23:08:30 +03008602{
8603 short offset = vmcs_field_to_offset(field);
8604 char *p;
8605
8606 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008607 return offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008608
Liran Alone2536742018-06-23 02:35:02 +03008609 p = (char *)vmcs12 + offset;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008610
Jim Mattsond37f4262017-12-22 12:12:16 -08008611 switch (vmcs_field_width(field)) {
8612 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008613 *ret = *((natural_width *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008614 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008615 case VMCS_FIELD_WIDTH_U16:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008616 *ret = *((u16 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008617 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008618 case VMCS_FIELD_WIDTH_U32:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008619 *ret = *((u32 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008620 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008621 case VMCS_FIELD_WIDTH_U64:
Nadav Har'El49f705c2011-05-25 23:08:30 +03008622 *ret = *((u64 *)p);
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008623 return 0;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008624 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008625 WARN_ON(1);
8626 return -ENOENT;
Nadav Har'El49f705c2011-05-25 23:08:30 +03008627 }
8628}
8629
Abel Gordon20b97fe2013-04-18 14:36:25 +03008630
Liran Alone2536742018-06-23 02:35:02 +03008631static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008632 unsigned long field, u64 field_value){
Abel Gordon20b97fe2013-04-18 14:36:25 +03008633 short offset = vmcs_field_to_offset(field);
Liran Alone2536742018-06-23 02:35:02 +03008634 char *p = (char *)vmcs12 + offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008635 if (offset < 0)
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008636 return offset;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008637
Jim Mattsond37f4262017-12-22 12:12:16 -08008638 switch (vmcs_field_width(field)) {
8639 case VMCS_FIELD_WIDTH_U16:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008640 *(u16 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008641 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008642 case VMCS_FIELD_WIDTH_U32:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008643 *(u32 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008644 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008645 case VMCS_FIELD_WIDTH_U64:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008646 *(u64 *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008647 return 0;
Jim Mattsond37f4262017-12-22 12:12:16 -08008648 case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
Abel Gordon20b97fe2013-04-18 14:36:25 +03008649 *(natural_width *)p = field_value;
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008650 return 0;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008651 default:
Paolo Bonzinia2ae9df2014-11-04 18:31:19 +01008652 WARN_ON(1);
8653 return -ENOENT;
Abel Gordon20b97fe2013-04-18 14:36:25 +03008654 }
8655
8656}
8657
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02008658static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
8659{
8660 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
8661 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
8662
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02008663 vmcs12->hdr.revision_id = evmcs->revision_id;
8664
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02008665 /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
8666 vmcs12->tpr_threshold = evmcs->tpr_threshold;
8667 vmcs12->guest_rip = evmcs->guest_rip;
8668
8669 if (unlikely(!(evmcs->hv_clean_fields &
8670 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
8671 vmcs12->guest_rsp = evmcs->guest_rsp;
8672 vmcs12->guest_rflags = evmcs->guest_rflags;
8673 vmcs12->guest_interruptibility_info =
8674 evmcs->guest_interruptibility_info;
8675 }
8676
8677 if (unlikely(!(evmcs->hv_clean_fields &
8678 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
8679 vmcs12->cpu_based_vm_exec_control =
8680 evmcs->cpu_based_vm_exec_control;
8681 }
8682
8683 if (unlikely(!(evmcs->hv_clean_fields &
8684 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
8685 vmcs12->exception_bitmap = evmcs->exception_bitmap;
8686 }
8687
8688 if (unlikely(!(evmcs->hv_clean_fields &
8689 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
8690 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
8691 }
8692
8693 if (unlikely(!(evmcs->hv_clean_fields &
8694 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
8695 vmcs12->vm_entry_intr_info_field =
8696 evmcs->vm_entry_intr_info_field;
8697 vmcs12->vm_entry_exception_error_code =
8698 evmcs->vm_entry_exception_error_code;
8699 vmcs12->vm_entry_instruction_len =
8700 evmcs->vm_entry_instruction_len;
8701 }
8702
8703 if (unlikely(!(evmcs->hv_clean_fields &
8704 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
8705 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
8706 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
8707 vmcs12->host_cr0 = evmcs->host_cr0;
8708 vmcs12->host_cr3 = evmcs->host_cr3;
8709 vmcs12->host_cr4 = evmcs->host_cr4;
8710 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
8711 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
8712 vmcs12->host_rip = evmcs->host_rip;
8713 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
8714 vmcs12->host_es_selector = evmcs->host_es_selector;
8715 vmcs12->host_cs_selector = evmcs->host_cs_selector;
8716 vmcs12->host_ss_selector = evmcs->host_ss_selector;
8717 vmcs12->host_ds_selector = evmcs->host_ds_selector;
8718 vmcs12->host_fs_selector = evmcs->host_fs_selector;
8719 vmcs12->host_gs_selector = evmcs->host_gs_selector;
8720 vmcs12->host_tr_selector = evmcs->host_tr_selector;
8721 }
8722
8723 if (unlikely(!(evmcs->hv_clean_fields &
8724 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
8725 vmcs12->pin_based_vm_exec_control =
8726 evmcs->pin_based_vm_exec_control;
8727 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
8728 vmcs12->secondary_vm_exec_control =
8729 evmcs->secondary_vm_exec_control;
8730 }
8731
8732 if (unlikely(!(evmcs->hv_clean_fields &
8733 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
8734 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
8735 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
8736 }
8737
8738 if (unlikely(!(evmcs->hv_clean_fields &
8739 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
8740 vmcs12->msr_bitmap = evmcs->msr_bitmap;
8741 }
8742
8743 if (unlikely(!(evmcs->hv_clean_fields &
8744 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
8745 vmcs12->guest_es_base = evmcs->guest_es_base;
8746 vmcs12->guest_cs_base = evmcs->guest_cs_base;
8747 vmcs12->guest_ss_base = evmcs->guest_ss_base;
8748 vmcs12->guest_ds_base = evmcs->guest_ds_base;
8749 vmcs12->guest_fs_base = evmcs->guest_fs_base;
8750 vmcs12->guest_gs_base = evmcs->guest_gs_base;
8751 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
8752 vmcs12->guest_tr_base = evmcs->guest_tr_base;
8753 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
8754 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
8755 vmcs12->guest_es_limit = evmcs->guest_es_limit;
8756 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
8757 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
8758 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
8759 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
8760 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
8761 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
8762 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
8763 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
8764 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
8765 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
8766 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
8767 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
8768 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
8769 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
8770 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
8771 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
8772 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
8773 vmcs12->guest_es_selector = evmcs->guest_es_selector;
8774 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
8775 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
8776 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
8777 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
8778 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
8779 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
8780 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
8781 }
8782
8783 if (unlikely(!(evmcs->hv_clean_fields &
8784 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
8785 vmcs12->tsc_offset = evmcs->tsc_offset;
8786 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
8787 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
8788 }
8789
8790 if (unlikely(!(evmcs->hv_clean_fields &
8791 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
8792 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
8793 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
8794 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
8795 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
8796 vmcs12->guest_cr0 = evmcs->guest_cr0;
8797 vmcs12->guest_cr3 = evmcs->guest_cr3;
8798 vmcs12->guest_cr4 = evmcs->guest_cr4;
8799 vmcs12->guest_dr7 = evmcs->guest_dr7;
8800 }
8801
8802 if (unlikely(!(evmcs->hv_clean_fields &
8803 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
8804 vmcs12->host_fs_base = evmcs->host_fs_base;
8805 vmcs12->host_gs_base = evmcs->host_gs_base;
8806 vmcs12->host_tr_base = evmcs->host_tr_base;
8807 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
8808 vmcs12->host_idtr_base = evmcs->host_idtr_base;
8809 vmcs12->host_rsp = evmcs->host_rsp;
8810 }
8811
8812 if (unlikely(!(evmcs->hv_clean_fields &
8813 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
8814 vmcs12->ept_pointer = evmcs->ept_pointer;
8815 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
8816 }
8817
8818 if (unlikely(!(evmcs->hv_clean_fields &
8819 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
8820 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
8821 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
8822 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
8823 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
8824 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
8825 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
8826 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
8827 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
8828 vmcs12->guest_pending_dbg_exceptions =
8829 evmcs->guest_pending_dbg_exceptions;
8830 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
8831 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
8832 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
8833 vmcs12->guest_activity_state = evmcs->guest_activity_state;
8834 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
8835 }
8836
8837 /*
8838 * Not used?
8839 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
8840 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
8841 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
8842 * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
8843 * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
8844 * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
8845 * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
8846 * vmcs12->page_fault_error_code_mask =
8847 * evmcs->page_fault_error_code_mask;
8848 * vmcs12->page_fault_error_code_match =
8849 * evmcs->page_fault_error_code_match;
8850 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
8851 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
8852 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
8853 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
8854 */
8855
8856 /*
8857 * Read only fields:
8858 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
8859 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
8860 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
8861 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
8862 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
8863 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
8864 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
8865 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
8866 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
8867 * vmcs12->exit_qualification = evmcs->exit_qualification;
8868 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
8869 *
8870 * Not present in struct vmcs12:
8871 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
8872 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
8873 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
8874 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
8875 */
8876
8877 return 0;
8878}
8879
8880static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
8881{
8882 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
8883 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
8884
8885 /*
8886 * Should not be changed by KVM:
8887 *
8888 * evmcs->host_es_selector = vmcs12->host_es_selector;
8889 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
8890 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
8891 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
8892 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
8893 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
8894 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
8895 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
8896 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
8897 * evmcs->host_cr0 = vmcs12->host_cr0;
8898 * evmcs->host_cr3 = vmcs12->host_cr3;
8899 * evmcs->host_cr4 = vmcs12->host_cr4;
8900 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
8901 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
8902 * evmcs->host_rip = vmcs12->host_rip;
8903 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
8904 * evmcs->host_fs_base = vmcs12->host_fs_base;
8905 * evmcs->host_gs_base = vmcs12->host_gs_base;
8906 * evmcs->host_tr_base = vmcs12->host_tr_base;
8907 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
8908 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
8909 * evmcs->host_rsp = vmcs12->host_rsp;
8910 * sync_vmcs12() doesn't read these:
8911 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
8912 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
8913 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
8914 * evmcs->ept_pointer = vmcs12->ept_pointer;
8915 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
8916 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
8917 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
8918 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
8919 * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
8920 * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
8921 * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
8922 * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
8923 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
8924 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
8925 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
8926 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
8927 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
8928 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
8929 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
8930 * evmcs->page_fault_error_code_mask =
8931 * vmcs12->page_fault_error_code_mask;
8932 * evmcs->page_fault_error_code_match =
8933 * vmcs12->page_fault_error_code_match;
8934 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
8935 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
8936 * evmcs->tsc_offset = vmcs12->tsc_offset;
8937 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
8938 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
8939 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
8940 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
8941 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
8942 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
8943 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
8944 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
8945 *
8946 * Not present in struct vmcs12:
8947 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
8948 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
8949 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
8950 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
8951 */
8952
8953 evmcs->guest_es_selector = vmcs12->guest_es_selector;
8954 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
8955 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
8956 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
8957 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
8958 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
8959 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
8960 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
8961
8962 evmcs->guest_es_limit = vmcs12->guest_es_limit;
8963 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
8964 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
8965 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
8966 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
8967 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
8968 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
8969 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
8970 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
8971 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
8972
8973 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
8974 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
8975 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
8976 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
8977 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
8978 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
8979 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
8980 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
8981
8982 evmcs->guest_es_base = vmcs12->guest_es_base;
8983 evmcs->guest_cs_base = vmcs12->guest_cs_base;
8984 evmcs->guest_ss_base = vmcs12->guest_ss_base;
8985 evmcs->guest_ds_base = vmcs12->guest_ds_base;
8986 evmcs->guest_fs_base = vmcs12->guest_fs_base;
8987 evmcs->guest_gs_base = vmcs12->guest_gs_base;
8988 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
8989 evmcs->guest_tr_base = vmcs12->guest_tr_base;
8990 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
8991 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
8992
8993 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
8994 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
8995
8996 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
8997 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
8998 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
8999 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
9000
9001 evmcs->guest_pending_dbg_exceptions =
9002 vmcs12->guest_pending_dbg_exceptions;
9003 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
9004 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
9005
9006 evmcs->guest_activity_state = vmcs12->guest_activity_state;
9007 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
9008
9009 evmcs->guest_cr0 = vmcs12->guest_cr0;
9010 evmcs->guest_cr3 = vmcs12->guest_cr3;
9011 evmcs->guest_cr4 = vmcs12->guest_cr4;
9012 evmcs->guest_dr7 = vmcs12->guest_dr7;
9013
9014 evmcs->guest_physical_address = vmcs12->guest_physical_address;
9015
9016 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
9017 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
9018 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
9019 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
9020 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
9021 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
9022 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
9023 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
9024
9025 evmcs->exit_qualification = vmcs12->exit_qualification;
9026
9027 evmcs->guest_linear_address = vmcs12->guest_linear_address;
9028 evmcs->guest_rsp = vmcs12->guest_rsp;
9029 evmcs->guest_rflags = vmcs12->guest_rflags;
9030
9031 evmcs->guest_interruptibility_info =
9032 vmcs12->guest_interruptibility_info;
9033 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
9034 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
9035 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
9036 evmcs->vm_entry_exception_error_code =
9037 vmcs12->vm_entry_exception_error_code;
9038 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
9039
9040 evmcs->guest_rip = vmcs12->guest_rip;
9041
9042 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
9043
9044 return 0;
9045}
9046
Jim Mattsonf4160e42018-05-29 09:11:33 -07009047/*
9048 * Copy the writable VMCS shadow fields back to the VMCS12, in case
9049 * they have been modified by the L1 guest. Note that the "read-only"
9050 * VM-exit information fields are actually writable if the vCPU is
9051 * configured to support "VMWRITE to any supported field in the VMCS."
9052 */
Abel Gordon16f5b902013-04-18 14:38:25 +03009053static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
9054{
Jim Mattsonf4160e42018-05-29 09:11:33 -07009055 const u16 *fields[] = {
9056 shadow_read_write_fields,
9057 shadow_read_only_fields
9058 };
9059 const int max_fields[] = {
9060 max_shadow_read_write_fields,
9061 max_shadow_read_only_fields
9062 };
9063 int i, q;
Abel Gordon16f5b902013-04-18 14:38:25 +03009064 unsigned long field;
9065 u64 field_value;
Jim Mattson355f4fb2016-10-28 08:29:39 -07009066 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordon16f5b902013-04-18 14:38:25 +03009067
Jan Kiszka282da872014-10-08 18:05:39 +02009068 preempt_disable();
9069
Abel Gordon16f5b902013-04-18 14:38:25 +03009070 vmcs_load(shadow_vmcs);
9071
Jim Mattsonf4160e42018-05-29 09:11:33 -07009072 for (q = 0; q < ARRAY_SIZE(fields); q++) {
9073 for (i = 0; i < max_fields[q]; i++) {
9074 field = fields[q][i];
9075 field_value = __vmcs_readl(field);
Liran Alone2536742018-06-23 02:35:02 +03009076 vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
Jim Mattsonf4160e42018-05-29 09:11:33 -07009077 }
9078 /*
9079 * Skip the VM-exit information fields if they are read-only.
9080 */
9081 if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
9082 break;
Abel Gordon16f5b902013-04-18 14:38:25 +03009083 }
9084
9085 vmcs_clear(shadow_vmcs);
9086 vmcs_load(vmx->loaded_vmcs->vmcs);
Jan Kiszka282da872014-10-08 18:05:39 +02009087
9088 preempt_enable();
Abel Gordon16f5b902013-04-18 14:38:25 +03009089}
9090
Abel Gordonc3114422013-04-18 14:38:55 +03009091static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
9092{
Paolo Bonzini44900ba2017-12-13 12:58:02 +01009093 const u16 *fields[] = {
Mathias Krausec2bae892013-06-26 20:36:21 +02009094 shadow_read_write_fields,
9095 shadow_read_only_fields
Abel Gordonc3114422013-04-18 14:38:55 +03009096 };
Mathias Krausec2bae892013-06-26 20:36:21 +02009097 const int max_fields[] = {
Abel Gordonc3114422013-04-18 14:38:55 +03009098 max_shadow_read_write_fields,
9099 max_shadow_read_only_fields
9100 };
9101 int i, q;
9102 unsigned long field;
9103 u64 field_value = 0;
Jim Mattson355f4fb2016-10-28 08:29:39 -07009104 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
Abel Gordonc3114422013-04-18 14:38:55 +03009105
9106 vmcs_load(shadow_vmcs);
9107
Mathias Krausec2bae892013-06-26 20:36:21 +02009108 for (q = 0; q < ARRAY_SIZE(fields); q++) {
Abel Gordonc3114422013-04-18 14:38:55 +03009109 for (i = 0; i < max_fields[q]; i++) {
9110 field = fields[q][i];
Liran Alone2536742018-06-23 02:35:02 +03009111 vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
Paolo Bonzini44900ba2017-12-13 12:58:02 +01009112 __vmcs_writel(field, field_value);
Abel Gordonc3114422013-04-18 14:38:55 +03009113 }
9114 }
9115
9116 vmcs_clear(shadow_vmcs);
9117 vmcs_load(vmx->loaded_vmcs->vmcs);
9118}
9119
Nadav Har'El49f705c2011-05-25 23:08:30 +03009120static int handle_vmread(struct kvm_vcpu *vcpu)
9121{
9122 unsigned long field;
9123 u64 field_value;
9124 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9125 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9126 gva_t gva = 0;
Liran Alon6d894f42018-06-23 02:35:09 +03009127 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03009128
Kyle Hueyeb277562016-11-29 12:40:39 -08009129 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009130 return 1;
9131
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009132 if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
9133 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08009134
Liran Alon6d894f42018-06-23 02:35:09 +03009135 if (!is_guest_mode(vcpu))
9136 vmcs12 = get_vmcs12(vcpu);
9137 else {
9138 /*
9139 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
9140 * to shadowed-field sets the ALU flags for VMfailInvalid.
9141 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009142 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
9143 return nested_vmx_failInvalid(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009144 vmcs12 = get_shadow_vmcs12(vcpu);
9145 }
9146
Nadav Har'El49f705c2011-05-25 23:08:30 +03009147 /* Decode instruction info and find the field to read */
Nadav Amit27e6fb52014-06-18 17:19:26 +03009148 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Nadav Har'El49f705c2011-05-25 23:08:30 +03009149 /* Read the field, zero-extended to a u64 field_value */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009150 if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
9151 return nested_vmx_failValid(vcpu,
9152 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
9153
Nadav Har'El49f705c2011-05-25 23:08:30 +03009154 /*
9155 * Now copy part of this value to register or memory, as requested.
9156 * Note that the number of bits actually copied is 32 or 64 depending
9157 * on the guest's mode (32 or 64 bit), not on the given field's length.
9158 */
9159 if (vmx_instruction_info & (1u << 10)) {
Nadav Amit27e6fb52014-06-18 17:19:26 +03009160 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
Nadav Har'El49f705c2011-05-25 23:08:30 +03009161 field_value);
9162 } else {
9163 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009164 vmx_instruction_info, true, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009165 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02009166 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009167 kvm_write_guest_virt_system(vcpu, gva, &field_value,
9168 (is_long_mode(vcpu) ? 8 : 4), NULL);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009169 }
9170
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009171 return nested_vmx_succeed(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009172}
9173
9174
9175static int handle_vmwrite(struct kvm_vcpu *vcpu)
9176{
9177 unsigned long field;
9178 gva_t gva;
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009179 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009180 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9181 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009182
Nadav Har'El49f705c2011-05-25 23:08:30 +03009183 /* The value to write might be 32 or 64 bits, depending on L1's long
9184 * mode, and eventually we need to write that into a field of several
9185 * possible lengths. The code below first zero-extends the value to 64
Adam Buchbinder6a6256f2016-02-23 15:34:30 -08009186 * bit (field_value), and then copies only the appropriate number of
Nadav Har'El49f705c2011-05-25 23:08:30 +03009187 * bits into the vmcs12 field.
9188 */
9189 u64 field_value = 0;
9190 struct x86_exception e;
Liran Alon6d894f42018-06-23 02:35:09 +03009191 struct vmcs12 *vmcs12;
Nadav Har'El49f705c2011-05-25 23:08:30 +03009192
Kyle Hueyeb277562016-11-29 12:40:39 -08009193 if (!nested_vmx_check_permission(vcpu))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009194 return 1;
9195
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009196 if (vmx->nested.current_vmptr == -1ull)
9197 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -08009198
Nadav Har'El49f705c2011-05-25 23:08:30 +03009199 if (vmx_instruction_info & (1u << 10))
Nadav Amit27e6fb52014-06-18 17:19:26 +03009200 field_value = kvm_register_readl(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009201 (((vmx_instruction_info) >> 3) & 0xf));
9202 else {
9203 if (get_vmx_mem_address(vcpu, exit_qualification,
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009204 vmx_instruction_info, false, &gva))
Nadav Har'El49f705c2011-05-25 23:08:30 +03009205 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009206 if (kvm_read_guest_virt(vcpu, gva, &field_value,
9207 (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
Nadav Har'El49f705c2011-05-25 23:08:30 +03009208 kvm_inject_page_fault(vcpu, &e);
9209 return 1;
9210 }
9211 }
9212
9213
Nadav Amit27e6fb52014-06-18 17:19:26 +03009214 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
Jim Mattsonf4160e42018-05-29 09:11:33 -07009215 /*
9216 * If the vCPU supports "VMWRITE to any supported field in the
9217 * VMCS," then the "read-only" fields are actually read/write.
9218 */
9219 if (vmcs_field_readonly(field) &&
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009220 !nested_cpu_has_vmwrite_any_field(vcpu))
9221 return nested_vmx_failValid(vcpu,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009222 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009223
Liran Alon6d894f42018-06-23 02:35:09 +03009224 if (!is_guest_mode(vcpu))
9225 vmcs12 = get_vmcs12(vcpu);
9226 else {
9227 /*
9228 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
9229 * to shadowed-field sets the ALU flags for VMfailInvalid.
9230 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009231 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
9232 return nested_vmx_failInvalid(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009233 vmcs12 = get_shadow_vmcs12(vcpu);
Liran Alon6d894f42018-06-23 02:35:09 +03009234 }
9235
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009236 if (vmcs12_write_any(vmcs12, field, field_value) < 0)
9237 return nested_vmx_failValid(vcpu,
9238 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009239
Liran Alon6d894f42018-06-23 02:35:09 +03009240 /*
9241 * Do not track vmcs12 dirty-state if in guest-mode
9242 * as we actually dirty shadow vmcs12 instead of vmcs12.
9243 */
9244 if (!is_guest_mode(vcpu)) {
9245 switch (field) {
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009246#define SHADOW_FIELD_RW(x) case x:
9247#include "vmx_shadow_fields.h"
Liran Alon6d894f42018-06-23 02:35:09 +03009248 /*
9249 * The fields that can be updated by L1 without a vmexit are
9250 * always updated in the vmcs02, the others go down the slow
9251 * path of prepare_vmcs02.
9252 */
9253 break;
9254 default:
9255 vmx->nested.dirty_vmcs12 = true;
9256 break;
9257 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009258 }
9259
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009260 return nested_vmx_succeed(vcpu);
Nadav Har'El49f705c2011-05-25 23:08:30 +03009261}
9262
Jim Mattsona8bc2842016-11-30 12:03:44 -08009263static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
9264{
9265 vmx->nested.current_vmptr = vmptr;
9266 if (enable_shadow_vmcs) {
9267 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
9268 SECONDARY_EXEC_SHADOW_VMCS);
9269 vmcs_write64(VMCS_LINK_POINTER,
9270 __pa(vmx->vmcs01.shadow_vmcs));
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +02009271 vmx->nested.need_vmcs12_sync = true;
Jim Mattsona8bc2842016-11-30 12:03:44 -08009272 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +01009273 vmx->nested.dirty_vmcs12 = true;
Jim Mattsona8bc2842016-11-30 12:03:44 -08009274}
9275
Nadav Har'El63846662011-05-25 23:07:29 +03009276/* Emulate the VMPTRLD instruction */
9277static int handle_vmptrld(struct kvm_vcpu *vcpu)
9278{
9279 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03009280 gpa_t vmptr;
Nadav Har'El63846662011-05-25 23:07:29 +03009281
9282 if (!nested_vmx_check_permission(vcpu))
9283 return 1;
9284
Radim Krčmářcbf71272017-05-19 15:48:51 +02009285 if (nested_vmx_get_vmptr(vcpu, &vmptr))
Nadav Har'El63846662011-05-25 23:07:29 +03009286 return 1;
9287
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009288 if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
9289 return nested_vmx_failValid(vcpu,
9290 VMXERR_VMPTRLD_INVALID_ADDRESS);
Radim Krčmářcbf71272017-05-19 15:48:51 +02009291
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009292 if (vmptr == vmx->nested.vmxon_ptr)
9293 return nested_vmx_failValid(vcpu,
9294 VMXERR_VMPTRLD_VMXON_POINTER);
Radim Krčmářcbf71272017-05-19 15:48:51 +02009295
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02009296 /* Forbid normal VMPTRLD if Enlightened version was used */
9297 if (vmx->nested.hv_evmcs)
9298 return 1;
9299
Nadav Har'El63846662011-05-25 23:07:29 +03009300 if (vmx->nested.current_vmptr != vmptr) {
9301 struct vmcs12 *new_vmcs12;
9302 struct page *page;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +02009303 page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009304 if (is_error_page(page))
9305 return nested_vmx_failInvalid(vcpu);
9306
Nadav Har'El63846662011-05-25 23:07:29 +03009307 new_vmcs12 = kmap(page);
Liran Alon392b2f22018-06-23 02:35:01 +03009308 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
Liran Alonfa97d7d2018-07-18 14:07:59 +02009309 (new_vmcs12->hdr.shadow_vmcs &&
9310 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
Nadav Har'El63846662011-05-25 23:07:29 +03009311 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02009312 kvm_release_page_clean(page);
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009313 return nested_vmx_failValid(vcpu,
Nadav Har'El63846662011-05-25 23:07:29 +03009314 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
Nadav Har'El63846662011-05-25 23:07:29 +03009315 }
Nadav Har'El63846662011-05-25 23:07:29 +03009316
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +02009317 nested_release_vmcs12(vcpu);
9318
David Matlack4f2777b2016-07-13 17:16:37 -07009319 /*
9320 * Load VMCS12 from guest memory since it is not already
9321 * cached.
9322 */
Paolo Bonzini9f744c52017-07-27 15:54:46 +02009323 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
9324 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +02009325 kvm_release_page_clean(page);
Paolo Bonzini9f744c52017-07-27 15:54:46 +02009326
Jim Mattsona8bc2842016-11-30 12:03:44 -08009327 set_current_vmptr(vmx, vmptr);
Nadav Har'El63846662011-05-25 23:07:29 +03009328 }
9329
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009330 return nested_vmx_succeed(vcpu);
Nadav Har'El63846662011-05-25 23:07:29 +03009331}
9332
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02009333/*
9334 * This is an equivalent of the nested hypervisor executing the vmptrld
9335 * instruction.
9336 */
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +02009337static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
9338 bool from_launch)
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02009339{
9340 struct vcpu_vmx *vmx = to_vmx(vcpu);
9341 struct hv_vp_assist_page assist_page;
9342
9343 if (likely(!vmx->nested.enlightened_vmcs_enabled))
9344 return 1;
9345
9346 if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
9347 return 1;
9348
9349 if (unlikely(!assist_page.enlighten_vmentry))
9350 return 1;
9351
9352 if (unlikely(assist_page.current_nested_vmcs !=
9353 vmx->nested.hv_evmcs_vmptr)) {
9354
9355 if (!vmx->nested.hv_evmcs)
9356 vmx->nested.current_vmptr = -1ull;
9357
9358 nested_release_evmcs(vcpu);
9359
9360 vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
9361 vcpu, assist_page.current_nested_vmcs);
9362
9363 if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
9364 return 0;
9365
9366 vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
9367
9368 if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) {
9369 nested_release_evmcs(vcpu);
9370 return 0;
9371 }
9372
9373 vmx->nested.dirty_vmcs12 = true;
9374 /*
9375 * As we keep L2 state for one guest only 'hv_clean_fields' mask
9376 * can't be used when we switch between them. Reset it here for
9377 * simplicity.
9378 */
9379 vmx->nested.hv_evmcs->hv_clean_fields &=
9380 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
9381 vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
9382
9383 /*
9384 * Unlike normal vmcs12, enlightened vmcs12 is not fully
9385 * reloaded from guest's memory (read only fields, fields not
9386 * present in struct hv_enlightened_vmcs, ...). Make sure there
9387 * are no leftovers.
9388 */
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +02009389 if (from_launch)
9390 memset(vmx->nested.cached_vmcs12, 0,
9391 sizeof(*vmx->nested.cached_vmcs12));
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02009392
9393 }
9394 return 1;
9395}
9396
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009397/* Emulate the VMPTRST instruction */
9398static int handle_vmptrst(struct kvm_vcpu *vcpu)
9399{
Sean Christopherson0a06d422018-07-19 10:31:00 -07009400 unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
9401 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9402 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009403 struct x86_exception e;
Sean Christopherson0a06d422018-07-19 10:31:00 -07009404 gva_t gva;
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009405
9406 if (!nested_vmx_check_permission(vcpu))
9407 return 1;
9408
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +02009409 if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
9410 return 1;
9411
Sean Christopherson0a06d422018-07-19 10:31:00 -07009412 if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009413 return 1;
Felix Wilhelm727ba742018-06-11 09:43:44 +02009414 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
Sean Christopherson0a06d422018-07-19 10:31:00 -07009415 if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
9416 sizeof(gpa_t), &e)) {
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009417 kvm_inject_page_fault(vcpu, &e);
9418 return 1;
9419 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009420 return nested_vmx_succeed(vcpu);
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009421}
9422
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009423/* Emulate the INVEPT instruction */
9424static int handle_invept(struct kvm_vcpu *vcpu)
9425{
Wincy Vanb9c237b2015-02-03 23:56:30 +08009426 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009427 u32 vmx_instruction_info, types;
9428 unsigned long type;
9429 gva_t gva;
9430 struct x86_exception e;
9431 struct {
9432 u64 eptp, gpa;
9433 } operand;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009434
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009435 if (!(vmx->nested.msrs.secondary_ctls_high &
Wincy Vanb9c237b2015-02-03 23:56:30 +08009436 SECONDARY_EXEC_ENABLE_EPT) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009437 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009438 kvm_queue_exception(vcpu, UD_VECTOR);
9439 return 1;
9440 }
9441
9442 if (!nested_vmx_check_permission(vcpu))
9443 return 1;
9444
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009445 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
Nadav Amit27e6fb52014-06-18 17:19:26 +03009446 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009447
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009448 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009449
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009450 if (type >= 32 || !(types & (1 << type)))
9451 return nested_vmx_failValid(vcpu,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009452 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009453
9454 /* According to the Intel VMX instruction reference, the memory
9455 * operand is read even if it isn't needed (e.g., for type==global)
9456 */
9457 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
Eugene Korenevskyf9eb4af2015-04-17 02:22:21 +00009458 vmx_instruction_info, false, &gva))
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009459 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009460 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009461 kvm_inject_page_fault(vcpu, &e);
9462 return 1;
9463 }
9464
9465 switch (type) {
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009466 case VMX_EPT_EXTENT_GLOBAL:
Bandan Das45e11812016-08-02 16:32:36 -04009467 /*
9468 * TODO: track mappings and invalidate
9469 * single context requests appropriately
9470 */
9471 case VMX_EPT_EXTENT_CONTEXT:
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009472 kvm_mmu_sync_roots(vcpu);
Liang Chen77c39132014-09-18 12:38:37 -04009473 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009474 break;
9475 default:
9476 BUG_ON(1);
9477 break;
9478 }
9479
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009480 return nested_vmx_succeed(vcpu);
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009481}
9482
Liran Alon3d5bdae2018-10-08 23:42:18 +03009483static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
9484{
9485 struct vcpu_vmx *vmx = to_vmx(vcpu);
9486
9487 return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
9488}
9489
Petr Matouseka642fc32014-09-23 20:22:30 +02009490static int handle_invvpid(struct kvm_vcpu *vcpu)
9491{
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009492 struct vcpu_vmx *vmx = to_vmx(vcpu);
9493 u32 vmx_instruction_info;
9494 unsigned long type, types;
9495 gva_t gva;
9496 struct x86_exception e;
Jim Mattson40352602017-06-28 09:37:37 -07009497 struct {
9498 u64 vpid;
9499 u64 gla;
9500 } operand;
Liran Alon3d5bdae2018-10-08 23:42:18 +03009501 u16 vpid02;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009502
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009503 if (!(vmx->nested.msrs.secondary_ctls_high &
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009504 SECONDARY_EXEC_ENABLE_VPID) ||
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009505 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009506 kvm_queue_exception(vcpu, UD_VECTOR);
9507 return 1;
9508 }
9509
9510 if (!nested_vmx_check_permission(vcpu))
9511 return 1;
9512
9513 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9514 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
9515
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009516 types = (vmx->nested.msrs.vpid_caps &
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009517 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009518
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009519 if (type >= 32 || !(types & (1 << type)))
9520 return nested_vmx_failValid(vcpu,
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009521 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009522
9523 /* according to the intel vmx instruction reference, the memory
9524 * operand is read even if it isn't needed (e.g., for type==global)
9525 */
9526 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
9527 vmx_instruction_info, false, &gva))
9528 return 1;
Paolo Bonzinice14e868a2018-06-06 17:37:49 +02009529 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009530 kvm_inject_page_fault(vcpu, &e);
9531 return 1;
9532 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009533 if (operand.vpid >> 16)
9534 return nested_vmx_failValid(vcpu,
Jim Mattson40352602017-06-28 09:37:37 -07009535 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009536
Liran Alon3d5bdae2018-10-08 23:42:18 +03009537 vpid02 = nested_get_vpid02(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009538 switch (type) {
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009539 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
Liran Aloncd9a4912018-05-22 17:16:15 +03009540 if (!operand.vpid ||
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009541 is_noncanonical_address(operand.gla, vcpu))
9542 return nested_vmx_failValid(vcpu,
Jim Mattson40352602017-06-28 09:37:37 -07009543 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Liran Alon3d5bdae2018-10-08 23:42:18 +03009544 if (cpu_has_vmx_invvpid_individual_addr()) {
Liran Aloncd9a4912018-05-22 17:16:15 +03009545 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
Liran Alon3d5bdae2018-10-08 23:42:18 +03009546 vpid02, operand.gla);
Liran Aloncd9a4912018-05-22 17:16:15 +03009547 } else
Liran Alon327c0722018-10-08 23:42:19 +03009548 __vmx_flush_tlb(vcpu, vpid02, false);
Liran Aloncd9a4912018-05-22 17:16:15 +03009549 break;
Paolo Bonzinief697a72016-03-18 16:58:38 +01009550 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009551 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009552 if (!operand.vpid)
9553 return nested_vmx_failValid(vcpu,
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009554 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
Liran Alon327c0722018-10-08 23:42:19 +03009555 __vmx_flush_tlb(vcpu, vpid02, false);
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009556 break;
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009557 case VMX_VPID_EXTENT_ALL_CONTEXT:
Liran Alon327c0722018-10-08 23:42:19 +03009558 __vmx_flush_tlb(vcpu, vpid02, false);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009559 break;
9560 default:
Jan Dakinevichbcdde302016-10-28 07:00:30 +03009561 WARN_ON_ONCE(1);
Kyle Huey6affcbe2016-11-29 12:40:40 -08009562 return kvm_skip_emulated_instruction(vcpu);
Wanpeng Li99b83ac2015-10-13 09:12:21 -07009563 }
9564
Sean Christopherson09abb5e2018-09-26 09:23:55 -07009565 return nested_vmx_succeed(vcpu);
Petr Matouseka642fc32014-09-23 20:22:30 +02009566}
9567
Junaid Shahideb4b2482018-06-27 14:59:14 -07009568static int handle_invpcid(struct kvm_vcpu *vcpu)
9569{
9570 u32 vmx_instruction_info;
9571 unsigned long type;
9572 bool pcid_enabled;
9573 gva_t gva;
9574 struct x86_exception e;
Junaid Shahidb94742c2018-06-27 14:59:20 -07009575 unsigned i;
9576 unsigned long roots_to_free = 0;
Junaid Shahideb4b2482018-06-27 14:59:14 -07009577 struct {
9578 u64 pcid;
9579 u64 gla;
9580 } operand;
9581
9582 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
9583 kvm_queue_exception(vcpu, UD_VECTOR);
9584 return 1;
9585 }
9586
9587 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
9588 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
9589
9590 if (type > 3) {
9591 kvm_inject_gp(vcpu, 0);
9592 return 1;
9593 }
9594
9595 /* According to the Intel instruction reference, the memory operand
9596 * is read even if it isn't needed (e.g., for type==all)
9597 */
9598 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
9599 vmx_instruction_info, false, &gva))
9600 return 1;
9601
9602 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
9603 kvm_inject_page_fault(vcpu, &e);
9604 return 1;
9605 }
9606
9607 if (operand.pcid >> 12 != 0) {
9608 kvm_inject_gp(vcpu, 0);
9609 return 1;
9610 }
9611
9612 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
9613
9614 switch (type) {
9615 case INVPCID_TYPE_INDIV_ADDR:
9616 if ((!pcid_enabled && (operand.pcid != 0)) ||
9617 is_noncanonical_address(operand.gla, vcpu)) {
9618 kvm_inject_gp(vcpu, 0);
9619 return 1;
9620 }
9621 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
9622 return kvm_skip_emulated_instruction(vcpu);
9623
9624 case INVPCID_TYPE_SINGLE_CTXT:
9625 if (!pcid_enabled && (operand.pcid != 0)) {
9626 kvm_inject_gp(vcpu, 0);
9627 return 1;
9628 }
9629
9630 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
9631 kvm_mmu_sync_roots(vcpu);
9632 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
9633 }
9634
Junaid Shahidb94742c2018-06-27 14:59:20 -07009635 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +02009636 if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
Junaid Shahidb94742c2018-06-27 14:59:20 -07009637 == operand.pcid)
9638 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
Junaid Shahidade61e22018-06-27 14:59:15 -07009639
Vitaly Kuznetsov6a82cd12018-10-08 21:28:07 +02009640 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
Junaid Shahideb4b2482018-06-27 14:59:14 -07009641 /*
Junaid Shahidb94742c2018-06-27 14:59:20 -07009642 * If neither the current cr3 nor any of the prev_roots use the
Junaid Shahidade61e22018-06-27 14:59:15 -07009643 * given PCID, then nothing needs to be done here because a
9644 * resync will happen anyway before switching to any other CR3.
Junaid Shahideb4b2482018-06-27 14:59:14 -07009645 */
9646
9647 return kvm_skip_emulated_instruction(vcpu);
9648
9649 case INVPCID_TYPE_ALL_NON_GLOBAL:
9650 /*
9651 * Currently, KVM doesn't mark global entries in the shadow
9652 * page tables, so a non-global flush just degenerates to a
9653 * global flush. If needed, we could optimize this later by
9654 * keeping track of global entries in shadow page tables.
9655 */
9656
9657 /* fall-through */
9658 case INVPCID_TYPE_ALL_INCL_GLOBAL:
9659 kvm_mmu_unload(vcpu);
9660 return kvm_skip_emulated_instruction(vcpu);
9661
9662 default:
9663 BUG(); /* We have already checked above that type <= 3 */
9664 }
9665}
9666
Kai Huang843e4332015-01-28 10:54:28 +08009667static int handle_pml_full(struct kvm_vcpu *vcpu)
9668{
9669 unsigned long exit_qualification;
9670
9671 trace_kvm_pml_full(vcpu->vcpu_id);
9672
9673 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9674
9675 /*
9676 * PML buffer FULL happened while executing iret from NMI,
9677 * "blocked by NMI" bit has to be set before next VM entry.
9678 */
9679 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
Paolo Bonzinid02fcf52017-11-06 13:31:13 +01009680 enable_vnmi &&
Kai Huang843e4332015-01-28 10:54:28 +08009681 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
9682 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
9683 GUEST_INTR_STATE_NMI);
9684
9685 /*
9686 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
9687 * here.., and there's no userspace involvement needed for PML.
9688 */
9689 return 1;
9690}
9691
Yunhong Jiang64672c92016-06-13 14:19:59 -07009692static int handle_preemption_timer(struct kvm_vcpu *vcpu)
9693{
Sean Christophersond264ee02018-08-27 15:21:12 -07009694 if (!to_vmx(vcpu)->req_immediate_exit)
9695 kvm_lapic_expired_hv_timer(vcpu);
Yunhong Jiang64672c92016-06-13 14:19:59 -07009696 return 1;
9697}
9698
Bandan Das41ab9372017-08-03 15:54:43 -04009699static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
9700{
9701 struct vcpu_vmx *vmx = to_vmx(vcpu);
Bandan Das41ab9372017-08-03 15:54:43 -04009702 int maxphyaddr = cpuid_maxphyaddr(vcpu);
9703
9704 /* Check for memory type validity */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009705 switch (address & VMX_EPTP_MT_MASK) {
9706 case VMX_EPTP_MT_UC:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009707 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009708 return false;
9709 break;
David Hildenbrandbb97a012017-08-10 23:15:28 +02009710 case VMX_EPTP_MT_WB:
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009711 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009712 return false;
9713 break;
9714 default:
9715 return false;
9716 }
9717
David Hildenbrandbb97a012017-08-10 23:15:28 +02009718 /* only 4 levels page-walk length are valid */
9719 if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
Bandan Das41ab9372017-08-03 15:54:43 -04009720 return false;
9721
9722 /* Reserved bits should not be set */
9723 if (address >> maxphyaddr || ((address >> 7) & 0x1f))
9724 return false;
9725
9726 /* AD, if set, should be supported */
David Hildenbrandbb97a012017-08-10 23:15:28 +02009727 if (address & VMX_EPTP_AD_ENABLE_BIT) {
Paolo Bonzini6677f3d2018-02-26 13:40:08 +01009728 if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
Bandan Das41ab9372017-08-03 15:54:43 -04009729 return false;
9730 }
9731
9732 return true;
9733}
9734
9735static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
9736 struct vmcs12 *vmcs12)
9737{
9738 u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
9739 u64 address;
9740 bool accessed_dirty;
9741 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
9742
9743 if (!nested_cpu_has_eptp_switching(vmcs12) ||
9744 !nested_cpu_has_ept(vmcs12))
9745 return 1;
9746
9747 if (index >= VMFUNC_EPTP_ENTRIES)
9748 return 1;
9749
9750
9751 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
9752 &address, index * 8, 8))
9753 return 1;
9754
David Hildenbrandbb97a012017-08-10 23:15:28 +02009755 accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
Bandan Das41ab9372017-08-03 15:54:43 -04009756
9757 /*
9758 * If the (L2) guest does a vmfunc to the currently
9759 * active ept pointer, we don't have to do anything else
9760 */
9761 if (vmcs12->ept_pointer != address) {
9762 if (!valid_ept_address(vcpu, address))
9763 return 1;
9764
9765 kvm_mmu_unload(vcpu);
9766 mmu->ept_ad = accessed_dirty;
Vitaly Kuznetsov36d9594d2018-10-08 21:28:10 +02009767 mmu->mmu_role.base.ad_disabled = !accessed_dirty;
Bandan Das41ab9372017-08-03 15:54:43 -04009768 vmcs12->ept_pointer = address;
9769 /*
9770 * TODO: Check what's the correct approach in case
9771 * mmu reload fails. Currently, we just let the next
9772 * reload potentially fail
9773 */
9774 kvm_mmu_reload(vcpu);
9775 }
9776
9777 return 0;
9778}
9779
Bandan Das2a499e42017-08-03 15:54:41 -04009780static int handle_vmfunc(struct kvm_vcpu *vcpu)
9781{
Bandan Das27c42a12017-08-03 15:54:42 -04009782 struct vcpu_vmx *vmx = to_vmx(vcpu);
9783 struct vmcs12 *vmcs12;
9784 u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
9785
9786 /*
9787 * VMFUNC is only supported for nested guests, but we always enable the
9788 * secondary control for simplicity; for non-nested mode, fake that we
9789 * didn't by injecting #UD.
9790 */
9791 if (!is_guest_mode(vcpu)) {
9792 kvm_queue_exception(vcpu, UD_VECTOR);
9793 return 1;
9794 }
9795
9796 vmcs12 = get_vmcs12(vcpu);
9797 if ((vmcs12->vm_function_control & (1 << function)) == 0)
9798 goto fail;
Bandan Das41ab9372017-08-03 15:54:43 -04009799
9800 switch (function) {
9801 case 0:
9802 if (nested_vmx_eptp_switching(vcpu, vmcs12))
9803 goto fail;
9804 break;
9805 default:
9806 goto fail;
9807 }
9808 return kvm_skip_emulated_instruction(vcpu);
Bandan Das27c42a12017-08-03 15:54:42 -04009809
9810fail:
9811 nested_vmx_vmexit(vcpu, vmx->exit_reason,
9812 vmcs_read32(VM_EXIT_INTR_INFO),
9813 vmcs_readl(EXIT_QUALIFICATION));
Bandan Das2a499e42017-08-03 15:54:41 -04009814 return 1;
9815}
9816
Sean Christopherson0b665d32018-08-14 09:33:34 -07009817static int handle_encls(struct kvm_vcpu *vcpu)
9818{
9819 /*
9820 * SGX virtualization is not yet supported. There is no software
9821 * enable bit for SGX, so we have to trap ENCLS and inject a #UD
9822 * to prevent the guest from executing ENCLS.
9823 */
9824 kvm_queue_exception(vcpu, UD_VECTOR);
9825 return 1;
9826}
9827
Nadav Har'El0140cae2011-05-25 23:06:28 +03009828/*
Avi Kivity6aa8b732006-12-10 02:21:36 -08009829 * The exit handlers return 1 if the exit was handled fully and guest execution
9830 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
9831 * to be done to userspace and return 0.
9832 */
Mathias Krause772e0312012-08-30 01:30:19 +02009833static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
Avi Kivity6aa8b732006-12-10 02:21:36 -08009834 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
9835 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
Avi Kivity988ad742007-02-12 00:54:36 -08009836 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
Sheng Yangf08864b2008-05-15 18:23:25 +08009837 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009838 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009839 [EXIT_REASON_CR_ACCESS] = handle_cr,
9840 [EXIT_REASON_DR_ACCESS] = handle_dr,
9841 [EXIT_REASON_CPUID] = handle_cpuid,
9842 [EXIT_REASON_MSR_READ] = handle_rdmsr,
9843 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
9844 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
9845 [EXIT_REASON_HLT] = handle_halt,
Gleb Natapovec25d5e2010-11-01 15:35:01 +02009846 [EXIT_REASON_INVD] = handle_invd,
Marcelo Tosattia7052892008-09-23 13:18:35 -03009847 [EXIT_REASON_INVLPG] = handle_invlpg,
Avi Kivityfee84b02011-11-10 14:57:25 +02009848 [EXIT_REASON_RDPMC] = handle_rdpmc,
Ingo Molnarc21415e2007-02-19 14:37:47 +02009849 [EXIT_REASON_VMCALL] = handle_vmcall,
Nadav Har'El27d6c862011-05-25 23:06:59 +03009850 [EXIT_REASON_VMCLEAR] = handle_vmclear,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009851 [EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
Nadav Har'El63846662011-05-25 23:07:29 +03009852 [EXIT_REASON_VMPTRLD] = handle_vmptrld,
Nadav Har'El6a4d7552011-05-25 23:08:00 +03009853 [EXIT_REASON_VMPTRST] = handle_vmptrst,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009854 [EXIT_REASON_VMREAD] = handle_vmread,
Nadav Har'Elcd232ad2011-05-25 23:10:33 +03009855 [EXIT_REASON_VMRESUME] = handle_vmresume,
Nadav Har'El49f705c2011-05-25 23:08:30 +03009856 [EXIT_REASON_VMWRITE] = handle_vmwrite,
Nadav Har'Elec378ae2011-05-25 23:02:54 +03009857 [EXIT_REASON_VMOFF] = handle_vmoff,
9858 [EXIT_REASON_VMON] = handle_vmon,
Sheng Yangf78e0e22007-10-29 09:40:42 +08009859 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
9860 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
Yang Zhang83d4c282013-01-25 10:18:49 +08009861 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
Yang Zhangc7c9c562013-01-25 10:18:51 +08009862 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
Eddie Donge5edaa02007-11-11 12:28:35 +02009863 [EXIT_REASON_WBINVD] = handle_wbinvd,
Dexuan Cui2acf9232010-06-10 11:27:12 +08009864 [EXIT_REASON_XSETBV] = handle_xsetbv,
Izik Eidus37817f22008-03-24 23:14:53 +02009865 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
Andi Kleena0861c02009-06-08 17:37:09 +08009866 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
Paolo Bonzini0367f202016-07-12 10:44:55 +02009867 [EXIT_REASON_GDTR_IDTR] = handle_desc,
9868 [EXIT_REASON_LDTR_TR] = handle_desc,
Marcelo Tosatti68f89402009-06-11 12:07:43 -03009869 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
9870 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
Zhai, Edwin4b8d54f2009-10-09 18:03:20 +08009871 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009872 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
Mihai Donțu5f3d45e2015-07-05 20:08:57 +03009873 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
Gabriel L. Somlo87c00572014-05-07 16:52:13 -04009874 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
Nadav Har'Elbfd0a562013-08-05 11:07:17 +03009875 [EXIT_REASON_INVEPT] = handle_invept,
Petr Matouseka642fc32014-09-23 20:22:30 +02009876 [EXIT_REASON_INVVPID] = handle_invvpid,
Jim Mattson45ec3682017-08-23 16:32:04 -07009877 [EXIT_REASON_RDRAND] = handle_invalid_op,
Jim Mattson75f4fc82017-08-23 16:32:03 -07009878 [EXIT_REASON_RDSEED] = handle_invalid_op,
Wanpeng Lif53cd632014-12-02 19:14:58 +08009879 [EXIT_REASON_XSAVES] = handle_xsaves,
9880 [EXIT_REASON_XRSTORS] = handle_xrstors,
Kai Huang843e4332015-01-28 10:54:28 +08009881 [EXIT_REASON_PML_FULL] = handle_pml_full,
Junaid Shahideb4b2482018-06-27 14:59:14 -07009882 [EXIT_REASON_INVPCID] = handle_invpcid,
Bandan Das2a499e42017-08-03 15:54:41 -04009883 [EXIT_REASON_VMFUNC] = handle_vmfunc,
Yunhong Jiang64672c92016-06-13 14:19:59 -07009884 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
Sean Christopherson0b665d32018-08-14 09:33:34 -07009885 [EXIT_REASON_ENCLS] = handle_encls,
Avi Kivity6aa8b732006-12-10 02:21:36 -08009886};
9887
9888static const int kvm_vmx_max_exit_handlers =
Robert P. J. Day50a34852007-06-03 13:35:29 -04009889 ARRAY_SIZE(kvm_vmx_exit_handlers);
Avi Kivity6aa8b732006-12-10 02:21:36 -08009890
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009891static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
9892 struct vmcs12 *vmcs12)
9893{
9894 unsigned long exit_qualification;
9895 gpa_t bitmap, last_bitmap;
9896 unsigned int port;
9897 int size;
9898 u8 b;
9899
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009900 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
Zhihui Zhang2f0a6392013-12-30 15:56:29 -05009901 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009902
9903 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9904
9905 port = exit_qualification >> 16;
9906 size = (exit_qualification & 7) + 1;
9907
9908 last_bitmap = (gpa_t)-1;
9909 b = -1;
9910
9911 while (size > 0) {
9912 if (port < 0x8000)
9913 bitmap = vmcs12->io_bitmap_a;
9914 else if (port < 0x10000)
9915 bitmap = vmcs12->io_bitmap_b;
9916 else
Joe Perches1d804d02015-03-30 16:46:09 -07009917 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009918 bitmap += (port & 0x7fff) / 8;
9919
9920 if (last_bitmap != bitmap)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009921 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009922 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009923 if (b & (1 << (port & 7)))
Joe Perches1d804d02015-03-30 16:46:09 -07009924 return true;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009925
9926 port++;
9927 size--;
9928 last_bitmap = bitmap;
9929 }
9930
Joe Perches1d804d02015-03-30 16:46:09 -07009931 return false;
Jan Kiszka908a7bd2013-02-18 11:21:16 +01009932}
9933
Nadav Har'El644d7112011-05-25 23:12:35 +03009934/*
9935 * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
9936 * rather than handle it ourselves in L0. I.e., check whether L1 expressed
9937 * disinterest in the current event (read or write a specific MSR) by using an
9938 * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
9939 */
9940static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
9941 struct vmcs12 *vmcs12, u32 exit_reason)
9942{
9943 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
9944 gpa_t bitmap;
9945
Jan Kiszkacbd29cb2013-02-11 12:19:28 +01009946 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
Joe Perches1d804d02015-03-30 16:46:09 -07009947 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009948
9949 /*
9950 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
9951 * for the four combinations of read/write and low/high MSR numbers.
9952 * First we need to figure out which of the four to use:
9953 */
9954 bitmap = vmcs12->msr_bitmap;
9955 if (exit_reason == EXIT_REASON_MSR_WRITE)
9956 bitmap += 2048;
9957 if (msr_index >= 0xc0000000) {
9958 msr_index -= 0xc0000000;
9959 bitmap += 1024;
9960 }
9961
9962 /* Then read the msr_index'th bit from this bitmap: */
9963 if (msr_index < 1024*8) {
9964 unsigned char b;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +02009965 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
Joe Perches1d804d02015-03-30 16:46:09 -07009966 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009967 return 1 & (b >> (msr_index & 7));
9968 } else
Joe Perches1d804d02015-03-30 16:46:09 -07009969 return true; /* let L1 handle the wrong parameter */
Nadav Har'El644d7112011-05-25 23:12:35 +03009970}
9971
9972/*
9973 * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
9974 * rather than handle it ourselves in L0. I.e., check if L1 wanted to
9975 * intercept (via guest_host_mask etc.) the current event.
9976 */
9977static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
9978 struct vmcs12 *vmcs12)
9979{
9980 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
9981 int cr = exit_qualification & 15;
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009982 int reg;
9983 unsigned long val;
Nadav Har'El644d7112011-05-25 23:12:35 +03009984
9985 switch ((exit_qualification >> 4) & 3) {
9986 case 0: /* mov to cr */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +02009987 reg = (exit_qualification >> 8) & 15;
9988 val = kvm_register_readl(vcpu, reg);
Nadav Har'El644d7112011-05-25 23:12:35 +03009989 switch (cr) {
9990 case 0:
9991 if (vmcs12->cr0_guest_host_mask &
9992 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -07009993 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +03009994 break;
9995 case 3:
9996 if ((vmcs12->cr3_target_count >= 1 &&
9997 vmcs12->cr3_target_value0 == val) ||
9998 (vmcs12->cr3_target_count >= 2 &&
9999 vmcs12->cr3_target_value1 == val) ||
10000 (vmcs12->cr3_target_count >= 3 &&
10001 vmcs12->cr3_target_value2 == val) ||
10002 (vmcs12->cr3_target_count >= 4 &&
10003 vmcs12->cr3_target_value3 == val))
Joe Perches1d804d02015-03-30 16:46:09 -070010004 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010005 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -070010006 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010007 break;
10008 case 4:
10009 if (vmcs12->cr4_guest_host_mask &
10010 (vmcs12->cr4_read_shadow ^ val))
Joe Perches1d804d02015-03-30 16:46:09 -070010011 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010012 break;
10013 case 8:
10014 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
Joe Perches1d804d02015-03-30 16:46:09 -070010015 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010016 break;
10017 }
10018 break;
10019 case 2: /* clts */
10020 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
10021 (vmcs12->cr0_read_shadow & X86_CR0_TS))
Joe Perches1d804d02015-03-30 16:46:09 -070010022 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010023 break;
10024 case 1: /* mov from cr */
10025 switch (cr) {
10026 case 3:
10027 if (vmcs12->cpu_based_vm_exec_control &
10028 CPU_BASED_CR3_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -070010029 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010030 break;
10031 case 8:
10032 if (vmcs12->cpu_based_vm_exec_control &
10033 CPU_BASED_CR8_STORE_EXITING)
Joe Perches1d804d02015-03-30 16:46:09 -070010034 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010035 break;
10036 }
10037 break;
10038 case 3: /* lmsw */
10039 /*
10040 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
10041 * cr0. Other attempted changes are ignored, with no exit.
10042 */
Jan H. Schönherre1d39b12017-05-20 13:22:56 +020010043 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
Nadav Har'El644d7112011-05-25 23:12:35 +030010044 if (vmcs12->cr0_guest_host_mask & 0xe &
10045 (val ^ vmcs12->cr0_read_shadow))
Joe Perches1d804d02015-03-30 16:46:09 -070010046 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010047 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
10048 !(vmcs12->cr0_read_shadow & 0x1) &&
10049 (val & 0x1))
Joe Perches1d804d02015-03-30 16:46:09 -070010050 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010051 break;
10052 }
Joe Perches1d804d02015-03-30 16:46:09 -070010053 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010054}
10055
Liran Alona7cde482018-06-23 02:35:10 +030010056static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
10057 struct vmcs12 *vmcs12, gpa_t bitmap)
10058{
10059 u32 vmx_instruction_info;
10060 unsigned long field;
10061 u8 b;
10062
10063 if (!nested_cpu_has_shadow_vmcs(vmcs12))
10064 return true;
10065
10066 /* Decode instruction info and find the field to access */
10067 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
10068 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
10069
10070 /* Out-of-range fields always cause a VM exit from L2 to L1 */
10071 if (field >> 15)
10072 return true;
10073
10074 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
10075 return true;
10076
10077 return 1 & (b >> (field & 7));
10078}
10079
Nadav Har'El644d7112011-05-25 23:12:35 +030010080/*
10081 * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
10082 * should handle it ourselves in L0 (and then continue L2). Only call this
10083 * when in is_guest_mode (L2).
10084 */
Paolo Bonzini7313c692017-07-27 10:31:25 +020010085static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
Nadav Har'El644d7112011-05-25 23:12:35 +030010086{
Nadav Har'El644d7112011-05-25 23:12:35 +030010087 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10088 struct vcpu_vmx *vmx = to_vmx(vcpu);
10089 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10090
Jim Mattson4f350c62017-09-14 16:31:44 -070010091 if (vmx->nested.nested_run_pending)
10092 return false;
10093
10094 if (unlikely(vmx->fail)) {
10095 pr_info_ratelimited("%s failed vm entry %x\n", __func__,
10096 vmcs_read32(VM_INSTRUCTION_ERROR));
10097 return true;
10098 }
Jan Kiszka542060e2014-01-04 18:47:21 +010010099
David Matlackc9f04402017-08-01 14:00:40 -070010100 /*
10101 * The host physical addresses of some pages of guest memory
Jim Mattsonde3a0022017-11-27 17:22:25 -060010102 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
10103 * Page). The CPU may write to these pages via their host
10104 * physical address while L2 is running, bypassing any
10105 * address-translation-based dirty tracking (e.g. EPT write
10106 * protection).
David Matlackc9f04402017-08-01 14:00:40 -070010107 *
10108 * Mark them dirty on every exit from L2 to prevent them from
10109 * getting out of sync with dirty tracking.
10110 */
10111 nested_mark_vmcs12_pages_dirty(vcpu);
10112
Jim Mattson4f350c62017-09-14 16:31:44 -070010113 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
10114 vmcs_readl(EXIT_QUALIFICATION),
10115 vmx->idt_vectoring_info,
10116 intr_info,
10117 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
10118 KVM_ISA_VMX);
Nadav Har'El644d7112011-05-25 23:12:35 +030010119
10120 switch (exit_reason) {
10121 case EXIT_REASON_EXCEPTION_NMI:
Jim Mattsonef85b672016-12-12 11:01:37 -080010122 if (is_nmi(intr_info))
Joe Perches1d804d02015-03-30 16:46:09 -070010123 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010124 else if (is_page_fault(intr_info))
Wanpeng Li52a5c152017-07-13 18:30:42 -070010125 return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
Jan Kiszka6f054852016-02-09 20:15:18 +010010126 else if (is_debug(intr_info) &&
10127 vcpu->guest_debug &
10128 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
10129 return false;
10130 else if (is_breakpoint(intr_info) &&
10131 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
10132 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010133 return vmcs12->exception_bitmap &
10134 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
10135 case EXIT_REASON_EXTERNAL_INTERRUPT:
Joe Perches1d804d02015-03-30 16:46:09 -070010136 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010137 case EXIT_REASON_TRIPLE_FAULT:
Joe Perches1d804d02015-03-30 16:46:09 -070010138 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010139 case EXIT_REASON_PENDING_INTERRUPT:
Jan Kiszka3b656cf2013-04-14 12:12:45 +020010140 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010141 case EXIT_REASON_NMI_WINDOW:
Jan Kiszka3b656cf2013-04-14 12:12:45 +020010142 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010143 case EXIT_REASON_TASK_SWITCH:
Joe Perches1d804d02015-03-30 16:46:09 -070010144 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010145 case EXIT_REASON_CPUID:
Joe Perches1d804d02015-03-30 16:46:09 -070010146 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010147 case EXIT_REASON_HLT:
10148 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
10149 case EXIT_REASON_INVD:
Joe Perches1d804d02015-03-30 16:46:09 -070010150 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010151 case EXIT_REASON_INVLPG:
10152 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
10153 case EXIT_REASON_RDPMC:
10154 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +020010155 case EXIT_REASON_RDRAND:
David Hildenbrand736fdf72017-08-24 20:51:37 +020010156 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
Paolo Bonzinia5f46452017-03-30 11:55:32 +020010157 case EXIT_REASON_RDSEED:
David Hildenbrand736fdf72017-08-24 20:51:37 +020010158 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
Jan Kiszkab3a2a902015-03-23 19:27:19 +010010159 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
Nadav Har'El644d7112011-05-25 23:12:35 +030010160 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
Liran Alona7cde482018-06-23 02:35:10 +030010161 case EXIT_REASON_VMREAD:
10162 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
10163 vmcs12->vmread_bitmap);
10164 case EXIT_REASON_VMWRITE:
10165 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
10166 vmcs12->vmwrite_bitmap);
Nadav Har'El644d7112011-05-25 23:12:35 +030010167 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
10168 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
Liran Alona7cde482018-06-23 02:35:10 +030010169 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
Nadav Har'El644d7112011-05-25 23:12:35 +030010170 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
Petr Matouseka642fc32014-09-23 20:22:30 +020010171 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
Nadav Har'El644d7112011-05-25 23:12:35 +030010172 /*
10173 * VMX instructions trap unconditionally. This allows L1 to
10174 * emulate them for its L2 guest, i.e., allows 3-level nesting!
10175 */
Joe Perches1d804d02015-03-30 16:46:09 -070010176 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010177 case EXIT_REASON_CR_ACCESS:
10178 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
10179 case EXIT_REASON_DR_ACCESS:
10180 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
10181 case EXIT_REASON_IO_INSTRUCTION:
Jan Kiszka908a7bd2013-02-18 11:21:16 +010010182 return nested_vmx_exit_handled_io(vcpu, vmcs12);
Paolo Bonzini1b073042016-10-25 16:06:30 +020010183 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
10184 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
Nadav Har'El644d7112011-05-25 23:12:35 +030010185 case EXIT_REASON_MSR_READ:
10186 case EXIT_REASON_MSR_WRITE:
10187 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
10188 case EXIT_REASON_INVALID_STATE:
Joe Perches1d804d02015-03-30 16:46:09 -070010189 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010190 case EXIT_REASON_MWAIT_INSTRUCTION:
10191 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
Mihai Donțu5f3d45e2015-07-05 20:08:57 +030010192 case EXIT_REASON_MONITOR_TRAP_FLAG:
10193 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
Nadav Har'El644d7112011-05-25 23:12:35 +030010194 case EXIT_REASON_MONITOR_INSTRUCTION:
10195 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
10196 case EXIT_REASON_PAUSE_INSTRUCTION:
10197 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
10198 nested_cpu_has2(vmcs12,
10199 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
10200 case EXIT_REASON_MCE_DURING_VMENTRY:
Joe Perches1d804d02015-03-30 16:46:09 -070010201 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010202 case EXIT_REASON_TPR_BELOW_THRESHOLD:
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010203 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
Nadav Har'El644d7112011-05-25 23:12:35 +030010204 case EXIT_REASON_APIC_ACCESS:
Wincy Van82f0dd42015-02-03 23:57:18 +080010205 case EXIT_REASON_APIC_WRITE:
Wincy Van608406e2015-02-03 23:57:51 +080010206 case EXIT_REASON_EOI_INDUCED:
Jim Mattsonab5df312018-05-09 17:02:03 -040010207 /*
10208 * The controls for "virtualize APIC accesses," "APIC-
10209 * register virtualization," and "virtual-interrupt
10210 * delivery" only come from vmcs12.
10211 */
Joe Perches1d804d02015-03-30 16:46:09 -070010212 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010213 case EXIT_REASON_EPT_VIOLATION:
Nadav Har'El2b1be672013-08-05 11:07:19 +030010214 /*
10215 * L0 always deals with the EPT violation. If nested EPT is
10216 * used, and the nested mmu code discovers that the address is
10217 * missing in the guest EPT table (EPT12), the EPT violation
10218 * will be injected with nested_ept_inject_page_fault()
10219 */
Joe Perches1d804d02015-03-30 16:46:09 -070010220 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010221 case EXIT_REASON_EPT_MISCONFIG:
Nadav Har'El2b1be672013-08-05 11:07:19 +030010222 /*
10223 * L2 never uses directly L1's EPT, but rather L0's own EPT
10224 * table (shadow on EPT) or a merged EPT table that L0 built
10225 * (EPT on EPT). So any problems with the structure of the
10226 * table is L0's fault.
10227 */
Joe Perches1d804d02015-03-30 16:46:09 -070010228 return false;
Paolo Bonzini90a2db62017-07-27 13:22:13 +020010229 case EXIT_REASON_INVPCID:
10230 return
10231 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
10232 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
Nadav Har'El644d7112011-05-25 23:12:35 +030010233 case EXIT_REASON_WBINVD:
10234 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
10235 case EXIT_REASON_XSETBV:
Joe Perches1d804d02015-03-30 16:46:09 -070010236 return true;
Wanpeng Li81dc01f2014-12-04 19:11:07 +080010237 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
10238 /*
10239 * This should never happen, since it is not possible to
10240 * set XSS to a non-zero value---neither in L1 nor in L2.
10241 * If if it were, XSS would have to be checked against
10242 * the XSS exit bitmap in vmcs12.
10243 */
10244 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
Wanpeng Li55123e32016-07-06 18:29:58 +080010245 case EXIT_REASON_PREEMPTION_TIMER:
10246 return false;
Ladi Prosekab007cc2017-03-31 10:19:26 +020010247 case EXIT_REASON_PML_FULL:
Bandan Das03efce62017-05-05 15:25:15 -040010248 /* We emulate PML support to L1. */
Ladi Prosekab007cc2017-03-31 10:19:26 +020010249 return false;
Bandan Das2a499e42017-08-03 15:54:41 -040010250 case EXIT_REASON_VMFUNC:
10251 /* VM functions are emulated through L2->L0 vmexits. */
10252 return false;
Sean Christopherson0b665d32018-08-14 09:33:34 -070010253 case EXIT_REASON_ENCLS:
10254 /* SGX is never exposed to L1 */
10255 return false;
Nadav Har'El644d7112011-05-25 23:12:35 +030010256 default:
Joe Perches1d804d02015-03-30 16:46:09 -070010257 return true;
Nadav Har'El644d7112011-05-25 23:12:35 +030010258 }
10259}
10260
Paolo Bonzini7313c692017-07-27 10:31:25 +020010261static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
10262{
10263 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10264
10265 /*
10266 * At this point, the exit interruption info in exit_intr_info
10267 * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
10268 * we need to query the in-kernel LAPIC.
10269 */
10270 WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
10271 if ((exit_intr_info &
10272 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
10273 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
10274 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10275 vmcs12->vm_exit_intr_error_code =
10276 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
10277 }
10278
10279 nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
10280 vmcs_readl(EXIT_QUALIFICATION));
10281 return 1;
10282}
10283
Avi Kivity586f9602010-11-18 13:09:54 +020010284static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
10285{
10286 *info1 = vmcs_readl(EXIT_QUALIFICATION);
10287 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
10288}
10289
Kai Huanga3eaa862015-11-04 13:46:05 +080010290static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
Kai Huang843e4332015-01-28 10:54:28 +080010291{
Kai Huanga3eaa862015-11-04 13:46:05 +080010292 if (vmx->pml_pg) {
10293 __free_page(vmx->pml_pg);
10294 vmx->pml_pg = NULL;
10295 }
Kai Huang843e4332015-01-28 10:54:28 +080010296}
10297
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010298static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
Kai Huang843e4332015-01-28 10:54:28 +080010299{
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010300 struct vcpu_vmx *vmx = to_vmx(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +080010301 u64 *pml_buf;
10302 u16 pml_idx;
10303
10304 pml_idx = vmcs_read16(GUEST_PML_INDEX);
10305
10306 /* Do nothing if PML buffer is empty */
10307 if (pml_idx == (PML_ENTITY_NUM - 1))
10308 return;
10309
10310 /* PML index always points to next available PML buffer entity */
10311 if (pml_idx >= PML_ENTITY_NUM)
10312 pml_idx = 0;
10313 else
10314 pml_idx++;
10315
10316 pml_buf = page_address(vmx->pml_pg);
10317 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
10318 u64 gpa;
10319
10320 gpa = pml_buf[pml_idx];
10321 WARN_ON(gpa & (PAGE_SIZE - 1));
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010322 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
Kai Huang843e4332015-01-28 10:54:28 +080010323 }
10324
10325 /* reset PML index */
10326 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
10327}
10328
10329/*
10330 * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
10331 * Called before reporting dirty_bitmap to userspace.
10332 */
10333static void kvm_flush_pml_buffers(struct kvm *kvm)
10334{
10335 int i;
10336 struct kvm_vcpu *vcpu;
10337 /*
10338 * We only need to kick vcpu out of guest mode here, as PML buffer
10339 * is flushed at beginning of all VMEXITs, and it's obvious that only
10340 * vcpus running in guest are possible to have unflushed GPAs in PML
10341 * buffer.
10342 */
10343 kvm_for_each_vcpu(i, vcpu, kvm)
10344 kvm_vcpu_kick(vcpu);
10345}
10346
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010347static void vmx_dump_sel(char *name, uint32_t sel)
10348{
10349 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
Chao Peng96794e42017-02-21 03:50:01 -050010350 name, vmcs_read16(sel),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010351 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
10352 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
10353 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
10354}
10355
10356static void vmx_dump_dtsel(char *name, uint32_t limit)
10357{
10358 pr_err("%s limit=0x%08x, base=0x%016lx\n",
10359 name, vmcs_read32(limit),
10360 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
10361}
10362
10363static void dump_vmcs(void)
10364{
10365 u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
10366 u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
10367 u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
10368 u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
10369 u32 secondary_exec_control = 0;
10370 unsigned long cr4 = vmcs_readl(GUEST_CR4);
Paolo Bonzinif3531052015-12-03 15:49:56 +010010371 u64 efer = vmcs_read64(GUEST_IA32_EFER);
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010372 int i, n;
10373
10374 if (cpu_has_secondary_exec_ctrls())
10375 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
10376
10377 pr_err("*** Guest State ***\n");
10378 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
10379 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
10380 vmcs_readl(CR0_GUEST_HOST_MASK));
10381 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
10382 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
10383 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
10384 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
10385 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
10386 {
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010387 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
10388 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
10389 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
10390 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010391 }
10392 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
10393 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
10394 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
10395 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
10396 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
10397 vmcs_readl(GUEST_SYSENTER_ESP),
10398 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
10399 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
10400 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
10401 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
10402 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
10403 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
10404 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
10405 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
10406 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
10407 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
10408 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
10409 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
10410 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010411 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
10412 efer, vmcs_read64(GUEST_IA32_PAT));
10413 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
10414 vmcs_read64(GUEST_IA32_DEBUGCTL),
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010415 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010416 if (cpu_has_load_perf_global_ctrl &&
10417 vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010418 pr_err("PerfGlobCtl = 0x%016llx\n",
10419 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010420 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010421 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010422 pr_err("Interruptibility = %08x ActivityState = %08x\n",
10423 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
10424 vmcs_read32(GUEST_ACTIVITY_STATE));
10425 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
10426 pr_err("InterruptStatus = %04x\n",
10427 vmcs_read16(GUEST_INTR_STATUS));
10428
10429 pr_err("*** Host State ***\n");
10430 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
10431 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
10432 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
10433 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
10434 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
10435 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
10436 vmcs_read16(HOST_TR_SELECTOR));
10437 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
10438 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
10439 vmcs_readl(HOST_TR_BASE));
10440 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
10441 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
10442 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
10443 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
10444 vmcs_readl(HOST_CR4));
10445 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
10446 vmcs_readl(HOST_IA32_SYSENTER_ESP),
10447 vmcs_read32(HOST_IA32_SYSENTER_CS),
10448 vmcs_readl(HOST_IA32_SYSENTER_EIP));
10449 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010450 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
10451 vmcs_read64(HOST_IA32_EFER),
10452 vmcs_read64(HOST_IA32_PAT));
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010010453 if (cpu_has_load_perf_global_ctrl &&
10454 vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010455 pr_err("PerfGlobCtl = 0x%016llx\n",
10456 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010457
10458 pr_err("*** Control State ***\n");
10459 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
10460 pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
10461 pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
10462 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
10463 vmcs_read32(EXCEPTION_BITMAP),
10464 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
10465 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
10466 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
10467 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
10468 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
10469 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
10470 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
10471 vmcs_read32(VM_EXIT_INTR_INFO),
10472 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
10473 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
10474 pr_err(" reason=%08x qualification=%016lx\n",
10475 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
10476 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
10477 vmcs_read32(IDT_VECTORING_INFO_FIELD),
10478 vmcs_read32(IDT_VECTORING_ERROR_CODE));
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010479 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
Haozhong Zhang8cfe9862015-10-20 15:39:12 +080010480 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010481 pr_err("TSC Multiplier = 0x%016llx\n",
10482 vmcs_read64(TSC_MULTIPLIER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010483 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
10484 pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
10485 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
10486 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
10487 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
Paolo Bonzini845c5b402015-12-03 15:51:00 +010010488 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010489 n = vmcs_read32(CR3_TARGET_COUNT);
10490 for (i = 0; i + 1 < n; i += 4)
10491 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
10492 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
10493 i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
10494 if (i < n)
10495 pr_err("CR3 target%u=%016lx\n",
10496 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
10497 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
10498 pr_err("PLE Gap=%08x Window=%08x\n",
10499 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
10500 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
10501 pr_err("Virtual processor ID = 0x%04x\n",
10502 vmcs_read16(VIRTUAL_PROCESSOR_ID));
10503}
10504
Avi Kivity6aa8b732006-12-10 02:21:36 -080010505/*
10506 * The guest has exited. See if we can fix it or if we need userspace
10507 * assistance.
10508 */
Avi Kivity851ba692009-08-24 11:10:17 +030010509static int vmx_handle_exit(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -080010510{
Avi Kivity29bd8a72007-09-10 17:27:03 +030010511 struct vcpu_vmx *vmx = to_vmx(vcpu);
Andi Kleena0861c02009-06-08 17:37:09 +080010512 u32 exit_reason = vmx->exit_reason;
Avi Kivity1155f762007-11-22 11:30:47 +020010513 u32 vectoring_info = vmx->idt_vectoring_info;
Avi Kivity29bd8a72007-09-10 17:27:03 +030010514
Paolo Bonzini8b89fe12015-12-10 18:37:32 +010010515 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
10516
Kai Huang843e4332015-01-28 10:54:28 +080010517 /*
10518 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
10519 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
10520 * querying dirty_bitmap, we only need to kick all vcpus out of guest
10521 * mode as if vcpus is in root mode, the PML buffer must has been
10522 * flushed already.
10523 */
10524 if (enable_pml)
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020010525 vmx_flush_pml_buffer(vcpu);
Kai Huang843e4332015-01-28 10:54:28 +080010526
Mohammed Gamal80ced182009-09-01 12:48:18 +020010527 /* If guest state is invalid, start emulating */
Gleb Natapov14168782013-01-21 15:36:49 +020010528 if (vmx->emulation_required)
Mohammed Gamal80ced182009-09-01 12:48:18 +020010529 return handle_invalid_guest_state(vcpu);
Guillaume Thouvenin1d5a4d92008-10-29 09:39:42 +010010530
Paolo Bonzini7313c692017-07-27 10:31:25 +020010531 if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
10532 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
Nadav Har'El644d7112011-05-25 23:12:35 +030010533
Mohammed Gamal51207022010-05-31 22:40:54 +030010534 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
Paolo Bonzini4eb64dc2015-04-30 12:57:28 +020010535 dump_vmcs();
Mohammed Gamal51207022010-05-31 22:40:54 +030010536 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
10537 vcpu->run->fail_entry.hardware_entry_failure_reason
10538 = exit_reason;
10539 return 0;
10540 }
10541
Avi Kivity29bd8a72007-09-10 17:27:03 +030010542 if (unlikely(vmx->fail)) {
Avi Kivity851ba692009-08-24 11:10:17 +030010543 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
10544 vcpu->run->fail_entry.hardware_entry_failure_reason
Avi Kivity29bd8a72007-09-10 17:27:03 +030010545 = vmcs_read32(VM_INSTRUCTION_ERROR);
10546 return 0;
10547 }
Avi Kivity6aa8b732006-12-10 02:21:36 -080010548
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010549 /*
10550 * Note:
10551 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
10552 * delivery event since it indicates guest is accessing MMIO.
10553 * The vm-exit can be triggered again after return to guest that
10554 * will cause infinite loop.
10555 */
Mike Dayd77c26f2007-10-08 09:02:08 -040010556 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
Sheng Yang14394422008-04-28 12:24:45 +080010557 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
Jan Kiszka60637aa2008-09-26 09:30:47 +020010558 exit_reason != EXIT_REASON_EPT_VIOLATION &&
Cao, Leib244c9f2016-07-15 13:54:04 +000010559 exit_reason != EXIT_REASON_PML_FULL &&
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010560 exit_reason != EXIT_REASON_TASK_SWITCH)) {
10561 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
10562 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
Paolo Bonzini70bcd702017-07-05 12:38:06 +020010563 vcpu->run->internal.ndata = 3;
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010564 vcpu->run->internal.data[0] = vectoring_info;
10565 vcpu->run->internal.data[1] = exit_reason;
Paolo Bonzini70bcd702017-07-05 12:38:06 +020010566 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
10567 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
10568 vcpu->run->internal.ndata++;
10569 vcpu->run->internal.data[3] =
10570 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
10571 }
Xiao Guangrongb9bf6882012-10-17 13:46:52 +080010572 return 0;
10573 }
Jan Kiszka3b86cd92008-09-26 09:30:57 +020010574
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010575 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010576 vmx->loaded_vmcs->soft_vnmi_blocked)) {
10577 if (vmx_interrupt_allowed(vcpu)) {
10578 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
10579 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
10580 vcpu->arch.nmi_pending) {
10581 /*
10582 * This CPU don't support us in finding the end of an
10583 * NMI-blocked window if the guest runs with IRQs
10584 * disabled. So we pull the trigger after 1 s of
10585 * futile waiting, but inform the user about this.
10586 */
10587 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
10588 "state on VCPU %d after 1 s timeout\n",
10589 __func__, vcpu->vcpu_id);
10590 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
10591 }
10592 }
10593
Avi Kivity6aa8b732006-12-10 02:21:36 -080010594 if (exit_reason < kvm_vmx_max_exit_handlers
10595 && kvm_vmx_exit_handlers[exit_reason])
Avi Kivity851ba692009-08-24 11:10:17 +030010596 return kvm_vmx_exit_handlers[exit_reason](vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -080010597 else {
Radim Krčmář6c6c5e02017-01-13 18:59:04 +010010598 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
10599 exit_reason);
Michael S. Tsirkin2bc19dc2014-09-18 16:21:16 +030010600 kvm_queue_exception(vcpu, UD_VECTOR);
10601 return 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -080010602 }
Avi Kivity6aa8b732006-12-10 02:21:36 -080010603}
10604
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010605/*
10606 * Software based L1D cache flush which is used when microcode providing
10607 * the cache control MSR is not loaded.
10608 *
10609 * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
10610 * flush it is required to read in 64 KiB because the replacement algorithm
10611 * is not exactly LRU. This could be sized at runtime via topology
10612 * information but as all relevant affected CPUs have 32KiB L1D cache size
10613 * there is no point in doing so.
10614 */
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010615static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010616{
10617 int size = PAGE_SIZE << L1D_CACHE_ORDER;
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010618
10619 /*
Thomas Gleixner2f055942018-07-13 16:23:17 +020010620 * This code is only executed when the the flush mode is 'cond' or
10621 * 'always'
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010622 */
Nicolai Stange427362a2018-07-21 22:25:00 +020010623 if (static_branch_likely(&vmx_l1d_flush_cond)) {
Nicolai Stange45b575c2018-07-27 13:22:16 +020010624 bool flush_l1d;
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020010625
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010626 /*
Nicolai Stange45b575c2018-07-27 13:22:16 +020010627 * Clear the per-vcpu flush bit, it gets set again
10628 * either from vcpu_run() or from one of the unsafe
10629 * VMEXIT handlers.
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010630 */
Nicolai Stange45b575c2018-07-27 13:22:16 +020010631 flush_l1d = vcpu->arch.l1tf_flush_l1d;
Thomas Gleixner4c6523e2018-07-13 16:23:20 +020010632 vcpu->arch.l1tf_flush_l1d = false;
Nicolai Stange45b575c2018-07-27 13:22:16 +020010633
10634 /*
10635 * Clear the per-cpu flush bit, it gets set again from
10636 * the interrupt handlers.
10637 */
10638 flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
10639 kvm_clear_cpu_l1tf_flush_l1d();
10640
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020010641 if (!flush_l1d)
10642 return;
Nicolai Stange379fd0c2018-07-21 22:16:56 +020010643 }
Paolo Bonzinic595cee2018-07-02 13:07:14 +020010644
10645 vcpu->stat.l1d_flush++;
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010646
Paolo Bonzini3fa045b2018-07-02 13:03:48 +020010647 if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
10648 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
10649 return;
10650 }
10651
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010652 asm volatile(
10653 /* First ensure the pages are in the TLB */
10654 "xorl %%eax, %%eax\n"
10655 ".Lpopulate_tlb:\n\t"
Nicolai Stange288d1522018-07-18 19:07:38 +020010656 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010657 "addl $4096, %%eax\n\t"
10658 "cmpl %%eax, %[size]\n\t"
10659 "jne .Lpopulate_tlb\n\t"
10660 "xorl %%eax, %%eax\n\t"
10661 "cpuid\n\t"
10662 /* Now fill the cache */
10663 "xorl %%eax, %%eax\n"
10664 ".Lfill_cache:\n"
Nicolai Stange288d1522018-07-18 19:07:38 +020010665 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010666 "addl $64, %%eax\n\t"
10667 "cmpl %%eax, %[size]\n\t"
10668 "jne .Lfill_cache\n\t"
10669 "lfence\n"
Nicolai Stange288d1522018-07-18 19:07:38 +020010670 :: [flush_pages] "r" (vmx_l1d_flush_pages),
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020010671 [size] "r" (size)
10672 : "eax", "ebx", "ecx", "edx");
10673}
10674
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010675static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010676{
Wanpeng Lia7c0b072014-08-21 19:46:50 +080010677 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10678
10679 if (is_guest_mode(vcpu) &&
10680 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
10681 return;
10682
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010683 if (irr == -1 || tpr < irr) {
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010684 vmcs_write32(TPR_THRESHOLD, 0);
10685 return;
10686 }
10687
Gleb Natapov95ba8273132009-04-21 17:45:08 +030010688 vmcs_write32(TPR_THRESHOLD, irr);
Yang, Sheng6e5d8652007-09-12 18:03:11 +080010689}
10690
Jim Mattson8d860bb2018-05-09 16:56:05 -040010691static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
Yang Zhang8d146952013-01-25 10:18:50 +080010692{
10693 u32 sec_exec_control;
10694
Jim Mattson8d860bb2018-05-09 16:56:05 -040010695 if (!lapic_in_kernel(vcpu))
10696 return;
10697
Sean Christophersonfd6b6d92018-10-01 14:25:34 -070010698 if (!flexpriority_enabled &&
10699 !cpu_has_vmx_virtualize_x2apic_mode())
10700 return;
10701
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020010702 /* Postpone execution until vmcs01 is the current VMCS. */
10703 if (is_guest_mode(vcpu)) {
Jim Mattson8d860bb2018-05-09 16:56:05 -040010704 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020010705 return;
10706 }
10707
Yang Zhang8d146952013-01-25 10:18:50 +080010708 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
Jim Mattson8d860bb2018-05-09 16:56:05 -040010709 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
10710 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
Yang Zhang8d146952013-01-25 10:18:50 +080010711
Jim Mattson8d860bb2018-05-09 16:56:05 -040010712 switch (kvm_get_apic_mode(vcpu)) {
10713 case LAPIC_MODE_INVALID:
10714 WARN_ONCE(true, "Invalid local APIC state");
10715 case LAPIC_MODE_DISABLED:
10716 break;
10717 case LAPIC_MODE_XAPIC:
10718 if (flexpriority_enabled) {
10719 sec_exec_control |=
10720 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
10721 vmx_flush_tlb(vcpu, true);
10722 }
10723 break;
10724 case LAPIC_MODE_X2APIC:
10725 if (cpu_has_vmx_virtualize_x2apic_mode())
10726 sec_exec_control |=
10727 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
10728 break;
Yang Zhang8d146952013-01-25 10:18:50 +080010729 }
10730 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
10731
Paolo Bonzini904e14f2018-01-16 16:51:18 +010010732 vmx_update_msr_bitmap(vcpu);
Yang Zhang8d146952013-01-25 10:18:50 +080010733}
10734
Tang Chen38b99172014-09-24 15:57:54 +080010735static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
10736{
Jim Mattsonab5df312018-05-09 17:02:03 -040010737 if (!is_guest_mode(vcpu)) {
Tang Chen38b99172014-09-24 15:57:54 +080010738 vmcs_write64(APIC_ACCESS_ADDR, hpa);
Junaid Shahida468f2d2018-04-26 13:09:50 -070010739 vmx_flush_tlb(vcpu, true);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070010740 }
Tang Chen38b99172014-09-24 15:57:54 +080010741}
10742
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010743static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
Yang Zhangc7c9c562013-01-25 10:18:51 +080010744{
10745 u16 status;
10746 u8 old;
10747
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010748 if (max_isr == -1)
10749 max_isr = 0;
Yang Zhangc7c9c562013-01-25 10:18:51 +080010750
10751 status = vmcs_read16(GUEST_INTR_STATUS);
10752 old = status >> 8;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010753 if (max_isr != old) {
Yang Zhangc7c9c562013-01-25 10:18:51 +080010754 status &= 0xff;
Paolo Bonzini67c9ddd2016-05-10 17:01:23 +020010755 status |= max_isr << 8;
Yang Zhangc7c9c562013-01-25 10:18:51 +080010756 vmcs_write16(GUEST_INTR_STATUS, status);
10757 }
10758}
10759
10760static void vmx_set_rvi(int vector)
10761{
10762 u16 status;
10763 u8 old;
10764
Wei Wang4114c272014-11-05 10:53:43 +080010765 if (vector == -1)
10766 vector = 0;
10767
Yang Zhangc7c9c562013-01-25 10:18:51 +080010768 status = vmcs_read16(GUEST_INTR_STATUS);
10769 old = (u8)status & 0xff;
10770 if ((u8)vector != old) {
10771 status &= ~0xff;
10772 status |= (u8)vector;
10773 vmcs_write16(GUEST_INTR_STATUS, status);
10774 }
10775}
10776
10777static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
10778{
Liran Alon851c1a182017-12-24 18:12:56 +020010779 /*
10780 * When running L2, updating RVI is only relevant when
10781 * vmcs12 virtual-interrupt-delivery enabled.
10782 * However, it can be enabled only when L1 also
10783 * intercepts external-interrupts and in that case
10784 * we should not update vmcs02 RVI but instead intercept
10785 * interrupt. Therefore, do nothing when running L2.
10786 */
10787 if (!is_guest_mode(vcpu))
Wanpeng Li963fee12014-07-17 19:03:00 +080010788 vmx_set_rvi(max_irr);
Yang Zhangc7c9c562013-01-25 10:18:51 +080010789}
10790
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010791static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010792{
10793 struct vcpu_vmx *vmx = to_vmx(vcpu);
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010794 int max_irr;
Liran Alonf27a85c2017-12-24 18:12:55 +020010795 bool max_irr_updated;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010796
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010797 WARN_ON(!vcpu->arch.apicv_active);
10798 if (pi_test_on(&vmx->pi_desc)) {
10799 pi_clear_on(&vmx->pi_desc);
10800 /*
10801 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
10802 * But on x86 this is just a compiler barrier anyway.
10803 */
10804 smp_mb__after_atomic();
Liran Alonf27a85c2017-12-24 18:12:55 +020010805 max_irr_updated =
10806 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
10807
10808 /*
10809 * If we are running L2 and L1 has a new pending interrupt
10810 * which can be injected, we should re-evaluate
10811 * what should be done with this new L1 interrupt.
Liran Alon851c1a182017-12-24 18:12:56 +020010812 * If L1 intercepts external-interrupts, we should
10813 * exit from L2 to L1. Otherwise, interrupt should be
10814 * delivered directly to L2.
Liran Alonf27a85c2017-12-24 18:12:55 +020010815 */
Liran Alon851c1a182017-12-24 18:12:56 +020010816 if (is_guest_mode(vcpu) && max_irr_updated) {
10817 if (nested_exit_on_intr(vcpu))
10818 kvm_vcpu_exiting_guest_mode(vcpu);
10819 else
10820 kvm_make_request(KVM_REQ_EVENT, vcpu);
10821 }
Paolo Bonzini76dfafd52016-12-19 17:17:11 +010010822 } else {
10823 max_irr = kvm_lapic_find_highest_irr(vcpu);
10824 }
10825 vmx_hwapic_irr_update(vcpu, max_irr);
10826 return max_irr;
Paolo Bonzini810e6de2016-12-19 13:05:46 +010010827}
10828
Paolo Bonzini7e712682018-10-03 13:44:26 +020010829static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
10830{
10831 u8 rvi = vmx_get_rvi();
10832 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
10833
10834 return ((rvi & 0xf0) > (vppr & 0xf0));
10835}
10836
Andrey Smetanin63086302015-11-10 15:36:32 +030010837static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
Yang Zhangc7c9c562013-01-25 10:18:51 +080010838{
Andrey Smetanind62caab2015-11-10 15:36:33 +030010839 if (!kvm_vcpu_apicv_active(vcpu))
Yang Zhang3d81bc72013-04-11 19:25:13 +080010840 return;
10841
Yang Zhangc7c9c562013-01-25 10:18:51 +080010842 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
10843 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
10844 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
10845 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
10846}
10847
Paolo Bonzini967235d2016-12-19 14:03:45 +010010848static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
10849{
10850 struct vcpu_vmx *vmx = to_vmx(vcpu);
10851
10852 pi_clear_on(&vmx->pi_desc);
10853 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
10854}
10855
Avi Kivity51aa01d2010-07-20 14:31:20 +030010856static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
Avi Kivitycf393f72008-07-01 16:20:21 +030010857{
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010858 u32 exit_intr_info = 0;
10859 u16 basic_exit_reason = (u16)vmx->exit_reason;
Avi Kivity00eba012011-03-07 17:24:54 +020010860
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010861 if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
10862 || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
Avi Kivity00eba012011-03-07 17:24:54 +020010863 return;
10864
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010865 if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
10866 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10867 vmx->exit_intr_info = exit_intr_info;
Andi Kleena0861c02009-06-08 17:37:09 +080010868
Wanpeng Li1261bfa2017-07-13 18:30:40 -070010869 /* if exit due to PF check for async PF */
10870 if (is_page_fault(exit_intr_info))
10871 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
10872
Andi Kleena0861c02009-06-08 17:37:09 +080010873 /* Handle machine checks before interrupts are enabled */
Jim Mattson48ae0fb2017-05-22 09:48:33 -070010874 if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
10875 is_machine_check(exit_intr_info))
Andi Kleena0861c02009-06-08 17:37:09 +080010876 kvm_machine_check();
10877
Gleb Natapov20f65982009-05-11 13:35:55 +030010878 /* We need to handle NMIs before interrupts are enabled */
Jim Mattsonef85b672016-12-12 11:01:37 -080010879 if (is_nmi(exit_intr_info)) {
Andi Kleendd60d212017-07-25 17:20:32 -070010880 kvm_before_interrupt(&vmx->vcpu);
Gleb Natapov20f65982009-05-11 13:35:55 +030010881 asm("int $2");
Andi Kleendd60d212017-07-25 17:20:32 -070010882 kvm_after_interrupt(&vmx->vcpu);
Zhang, Yanminff9d07a2010-04-19 13:32:45 +080010883 }
Avi Kivity51aa01d2010-07-20 14:31:20 +030010884}
Gleb Natapov20f65982009-05-11 13:35:55 +030010885
Yang Zhanga547c6d2013-04-11 19:25:10 +080010886static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
10887{
10888 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10889
Yang Zhanga547c6d2013-04-11 19:25:10 +080010890 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
10891 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
10892 unsigned int vector;
10893 unsigned long entry;
10894 gate_desc *desc;
10895 struct vcpu_vmx *vmx = to_vmx(vcpu);
10896#ifdef CONFIG_X86_64
10897 unsigned long tmp;
10898#endif
10899
10900 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10901 desc = (gate_desc *)vmx->host_idt_base + vector;
Thomas Gleixner64b163f2017-08-28 08:47:37 +020010902 entry = gate_offset(desc);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010903 asm volatile(
10904#ifdef CONFIG_X86_64
10905 "mov %%" _ASM_SP ", %[sp]\n\t"
10906 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
10907 "push $%c[ss]\n\t"
10908 "push %[sp]\n\t"
10909#endif
10910 "pushf\n\t"
Yang Zhanga547c6d2013-04-11 19:25:10 +080010911 __ASM_SIZE(push) " $%c[cs]\n\t"
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010912 CALL_NOSPEC
Yang Zhanga547c6d2013-04-11 19:25:10 +080010913 :
10914#ifdef CONFIG_X86_64
Chris J Arges3f62de52016-01-22 15:44:38 -060010915 [sp]"=&r"(tmp),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010916#endif
Josh Poimboeuff5caf622017-09-20 16:24:33 -050010917 ASM_CALL_CONSTRAINT
Yang Zhanga547c6d2013-04-11 19:25:10 +080010918 :
Peter Zijlstrac940a3f2018-01-25 10:58:14 +010010919 THUNK_TARGET(entry),
Yang Zhanga547c6d2013-04-11 19:25:10 +080010920 [ss]"i"(__KERNEL_DS),
10921 [cs]"i"(__KERNEL_CS)
10922 );
Paolo Bonzinif2485b32016-06-15 15:23:11 +020010923 }
Yang Zhanga547c6d2013-04-11 19:25:10 +080010924}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050010925STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
Yang Zhanga547c6d2013-04-11 19:25:10 +080010926
Tom Lendackybc226f02018-05-10 22:06:39 +020010927static bool vmx_has_emulated_msr(int index)
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010928{
Tom Lendackybc226f02018-05-10 22:06:39 +020010929 switch (index) {
10930 case MSR_IA32_SMBASE:
10931 /*
10932 * We cannot do SMM unless we can run the guest in big
10933 * real mode.
10934 */
10935 return enable_unrestricted_guest || emulate_invalid_guest_state;
10936 case MSR_AMD64_VIRT_SPEC_CTRL:
10937 /* This is AMD only. */
10938 return false;
10939 default:
10940 return true;
10941 }
Paolo Bonzini6d396b52015-04-01 14:25:33 +020010942}
10943
Liu, Jinsongda8999d2014-02-24 10:55:46 +000010944static bool vmx_mpx_supported(void)
10945{
10946 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
10947 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
10948}
10949
Wanpeng Li55412b22014-12-02 19:21:30 +080010950static bool vmx_xsaves_supported(void)
10951{
10952 return vmcs_config.cpu_based_2nd_exec_ctrl &
10953 SECONDARY_EXEC_XSAVES;
10954}
10955
Avi Kivity51aa01d2010-07-20 14:31:20 +030010956static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
10957{
Avi Kivityc5ca8e52011-03-07 17:37:37 +020010958 u32 exit_intr_info;
Avi Kivity51aa01d2010-07-20 14:31:20 +030010959 bool unblock_nmi;
10960 u8 vector;
10961 bool idtv_info_valid;
10962
10963 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Gleb Natapov20f65982009-05-11 13:35:55 +030010964
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010010965 if (enable_vnmi) {
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010010966 if (vmx->loaded_vmcs->nmi_known_unmasked)
10967 return;
10968 /*
10969 * Can't use vmx->exit_intr_info since we're not sure what
10970 * the exit reason is.
10971 */
10972 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
10973 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
10974 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
10975 /*
10976 * SDM 3: 27.7.1.2 (September 2008)
10977 * Re-set bit "block by NMI" before VM entry if vmexit caused by
10978 * a guest IRET fault.
10979 * SDM 3: 23.2.2 (September 2008)
10980 * Bit 12 is undefined in any of the following cases:
10981 * If the VM exit sets the valid bit in the IDT-vectoring
10982 * information field.
10983 * If the VM exit is due to a double fault.
10984 */
10985 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
10986 vector != DF_VECTOR && !idtv_info_valid)
10987 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
10988 GUEST_INTR_STATE_NMI);
10989 else
10990 vmx->loaded_vmcs->nmi_known_unmasked =
10991 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
10992 & GUEST_INTR_STATE_NMI);
10993 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
10994 vmx->loaded_vmcs->vnmi_blocked_time +=
10995 ktime_to_ns(ktime_sub(ktime_get(),
10996 vmx->loaded_vmcs->entry_time));
Avi Kivity51aa01d2010-07-20 14:31:20 +030010997}
10998
Jan Kiszka3ab66e82013-02-20 14:03:24 +010010999static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
Avi Kivity83422e12010-07-20 14:43:23 +030011000 u32 idt_vectoring_info,
11001 int instr_len_field,
11002 int error_code_field)
Avi Kivity51aa01d2010-07-20 14:31:20 +030011003{
Avi Kivity51aa01d2010-07-20 14:31:20 +030011004 u8 vector;
11005 int type;
11006 bool idtv_info_valid;
11007
11008 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
Avi Kivity668f6122008-07-02 09:28:55 +030011009
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011010 vcpu->arch.nmi_injected = false;
11011 kvm_clear_exception_queue(vcpu);
11012 kvm_clear_interrupt_queue(vcpu);
Gleb Natapov37b96e92009-03-30 16:03:13 +030011013
11014 if (!idtv_info_valid)
11015 return;
11016
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011017 kvm_make_request(KVM_REQ_EVENT, vcpu);
Avi Kivity3842d132010-07-27 12:30:24 +030011018
Avi Kivity668f6122008-07-02 09:28:55 +030011019 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
11020 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
Gleb Natapov37b96e92009-03-30 16:03:13 +030011021
Gleb Natapov64a7ec02009-03-30 16:03:29 +030011022 switch (type) {
Gleb Natapov37b96e92009-03-30 16:03:13 +030011023 case INTR_TYPE_NMI_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011024 vcpu->arch.nmi_injected = true;
Avi Kivity668f6122008-07-02 09:28:55 +030011025 /*
Gleb Natapov7b4a25c2009-03-30 16:03:08 +030011026 * SDM 3: 27.7.1.2 (September 2008)
Gleb Natapov37b96e92009-03-30 16:03:13 +030011027 * Clear bit "block by NMI" before VM entry if a NMI
11028 * delivery faulted.
Avi Kivity668f6122008-07-02 09:28:55 +030011029 */
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011030 vmx_set_nmi_mask(vcpu, false);
Gleb Natapov37b96e92009-03-30 16:03:13 +030011031 break;
Gleb Natapov37b96e92009-03-30 16:03:13 +030011032 case INTR_TYPE_SOFT_EXCEPTION:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011033 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030011034 /* fall through */
11035 case INTR_TYPE_HARD_EXCEPTION:
Avi Kivity35920a32008-07-03 14:50:12 +030011036 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
Avi Kivity83422e12010-07-20 14:43:23 +030011037 u32 err = vmcs_read32(error_code_field);
Gleb Natapov851eb6672013-09-25 12:51:34 +030011038 kvm_requeue_exception_e(vcpu, vector, err);
Avi Kivity35920a32008-07-03 14:50:12 +030011039 } else
Gleb Natapov851eb6672013-09-25 12:51:34 +030011040 kvm_requeue_exception(vcpu, vector);
Gleb Natapov37b96e92009-03-30 16:03:13 +030011041 break;
Gleb Natapov66fd3f72009-05-11 13:35:50 +030011042 case INTR_TYPE_SOFT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011043 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
Gleb Natapov66fd3f72009-05-11 13:35:50 +030011044 /* fall through */
Gleb Natapov37b96e92009-03-30 16:03:13 +030011045 case INTR_TYPE_EXT_INTR:
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011046 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
Gleb Natapov37b96e92009-03-30 16:03:13 +030011047 break;
11048 default:
11049 break;
Avi Kivityf7d92382008-07-03 16:14:28 +030011050 }
Avi Kivitycf393f72008-07-01 16:20:21 +030011051}
11052
Avi Kivity83422e12010-07-20 14:43:23 +030011053static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
11054{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011055 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
Avi Kivity83422e12010-07-20 14:43:23 +030011056 VM_EXIT_INSTRUCTION_LEN,
11057 IDT_VECTORING_ERROR_CODE);
11058}
11059
Avi Kivityb463a6f2010-07-20 15:06:17 +030011060static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
11061{
Jan Kiszka3ab66e82013-02-20 14:03:24 +010011062 __vmx_complete_interrupts(vcpu,
Avi Kivityb463a6f2010-07-20 15:06:17 +030011063 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
11064 VM_ENTRY_INSTRUCTION_LEN,
11065 VM_ENTRY_EXCEPTION_ERROR_CODE);
11066
11067 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
11068}
11069
Gleb Natapovd7cd9792011-10-05 14:01:23 +020011070static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
11071{
11072 int i, nr_msrs;
11073 struct perf_guest_switch_msr *msrs;
11074
11075 msrs = perf_guest_get_msrs(&nr_msrs);
11076
11077 if (!msrs)
11078 return;
11079
11080 for (i = 0; i < nr_msrs; i++)
11081 if (msrs[i].host == msrs[i].guest)
11082 clear_atomic_switch_msr(vmx, msrs[i].msr);
11083 else
11084 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
Konrad Rzeszutek Wilk989e3992018-06-20 22:01:22 -040011085 msrs[i].host, false);
Gleb Natapovd7cd9792011-10-05 14:01:23 +020011086}
11087
Sean Christophersonf459a702018-08-27 15:21:11 -070011088static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
11089{
11090 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
11091 if (!vmx->loaded_vmcs->hv_timer_armed)
11092 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
11093 PIN_BASED_VMX_PREEMPTION_TIMER);
11094 vmx->loaded_vmcs->hv_timer_armed = true;
11095}
11096
11097static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
Yunhong Jiang64672c92016-06-13 14:19:59 -070011098{
11099 struct vcpu_vmx *vmx = to_vmx(vcpu);
11100 u64 tscl;
11101 u32 delta_tsc;
11102
Sean Christophersond264ee02018-08-27 15:21:12 -070011103 if (vmx->req_immediate_exit) {
11104 vmx_arm_hv_timer(vmx, 0);
11105 return;
11106 }
11107
Sean Christophersonf459a702018-08-27 15:21:11 -070011108 if (vmx->hv_deadline_tsc != -1) {
11109 tscl = rdtsc();
11110 if (vmx->hv_deadline_tsc > tscl)
11111 /* set_hv_timer ensures the delta fits in 32-bits */
11112 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
11113 cpu_preemption_timer_multi);
11114 else
11115 delta_tsc = 0;
11116
11117 vmx_arm_hv_timer(vmx, delta_tsc);
Yunhong Jiang64672c92016-06-13 14:19:59 -070011118 return;
Sean Christophersonf459a702018-08-27 15:21:11 -070011119 }
Yunhong Jiang64672c92016-06-13 14:19:59 -070011120
Sean Christophersonf459a702018-08-27 15:21:11 -070011121 if (vmx->loaded_vmcs->hv_timer_armed)
11122 vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
11123 PIN_BASED_VMX_PREEMPTION_TIMER);
11124 vmx->loaded_vmcs->hv_timer_armed = false;
Yunhong Jiang64672c92016-06-13 14:19:59 -070011125}
11126
Lai Jiangshana3b5ba42011-02-11 14:29:40 +080011127static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -080011128{
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011129 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011130 unsigned long cr3, cr4, evmcs_rsp;
Avi Kivity104f2262010-11-18 13:12:52 +020011131
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010011132 /* Record the guest's net vcpu time for enforced NMI injections. */
Paolo Bonzinid02fcf52017-11-06 13:31:13 +010011133 if (unlikely(!enable_vnmi &&
Paolo Bonzini8a1b4392017-11-06 13:31:12 +010011134 vmx->loaded_vmcs->soft_vnmi_blocked))
11135 vmx->loaded_vmcs->entry_time = ktime_get();
11136
Avi Kivity104f2262010-11-18 13:12:52 +020011137 /* Don't enter VMX if guest state is invalid, let the exit handler
11138 start emulation until we arrive back to a valid state */
Gleb Natapov14168782013-01-21 15:36:49 +020011139 if (vmx->emulation_required)
Avi Kivity104f2262010-11-18 13:12:52 +020011140 return;
11141
Radim Krčmářa7653ec2014-08-21 18:08:07 +020011142 if (vmx->ple_window_dirty) {
11143 vmx->ple_window_dirty = false;
11144 vmcs_write32(PLE_WINDOW, vmx->ple_window);
11145 }
11146
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020011147 if (vmx->nested.need_vmcs12_sync) {
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020011148 /*
11149 * hv_evmcs may end up being not mapped after migration (when
11150 * L2 was running), map it here to make sure vmcs12 changes are
11151 * properly reflected.
11152 */
11153 if (vmx->nested.enlightened_vmcs_enabled &&
11154 !vmx->nested.hv_evmcs)
11155 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
11156
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020011157 if (vmx->nested.hv_evmcs) {
11158 copy_vmcs12_to_enlightened(vmx);
11159 /* All fields are clean */
11160 vmx->nested.hv_evmcs->hv_clean_fields |=
11161 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
11162 } else {
11163 copy_vmcs12_to_shadow(vmx);
11164 }
11165 vmx->nested.need_vmcs12_sync = false;
Abel Gordon012f83c2013-04-18 14:39:25 +030011166 }
11167
Avi Kivity104f2262010-11-18 13:12:52 +020011168 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
11169 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
11170 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
11171 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
11172
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011173 cr3 = __get_current_cr3_fast();
Sean Christophersond7ee0392018-07-23 12:32:47 -070011174 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011175 vmcs_writel(HOST_CR3, cr3);
Sean Christophersond7ee0392018-07-23 12:32:47 -070011176 vmx->loaded_vmcs->host_state.cr3 = cr3;
Andy Lutomirskid6e41f12017-05-28 10:00:17 -070011177 }
11178
Andy Lutomirski1e02ce42014-10-24 15:58:08 -070011179 cr4 = cr4_read_shadow();
Sean Christophersond7ee0392018-07-23 12:32:47 -070011180 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
Andy Lutomirskid974baa2014-10-08 09:02:13 -070011181 vmcs_writel(HOST_CR4, cr4);
Sean Christophersond7ee0392018-07-23 12:32:47 -070011182 vmx->loaded_vmcs->host_state.cr4 = cr4;
Andy Lutomirskid974baa2014-10-08 09:02:13 -070011183 }
11184
Avi Kivity104f2262010-11-18 13:12:52 +020011185 /* When single-stepping over STI and MOV SS, we must clear the
11186 * corresponding interruptibility bits in the guest state. Otherwise
11187 * vmentry fails as it then expects bit 14 (BS) in pending debug
11188 * exceptions being set, but that's not correct for the guest debugging
11189 * case. */
11190 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
11191 vmx_set_interrupt_shadow(vcpu, 0);
11192
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020011193 if (static_cpu_has(X86_FEATURE_PKU) &&
11194 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
11195 vcpu->arch.pkru != vmx->host_pkru)
11196 __write_pkru(vcpu->arch.pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011197
Gleb Natapovd7cd9792011-10-05 14:01:23 +020011198 atomic_switch_perf_msrs(vmx);
11199
Sean Christophersonf459a702018-08-27 15:21:11 -070011200 vmx_update_hv_timer(vcpu);
Yunhong Jiang64672c92016-06-13 14:19:59 -070011201
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011202 /*
11203 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
11204 * it's non-zero. Since vmentry is serialising on affected CPUs, there
11205 * is no need to worry about the conditional branch over the wrmsr
11206 * being speculatively taken.
11207 */
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020011208 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011209
Nadav Har'Eld462b812011-05-24 15:26:10 +030011210 vmx->__launched = vmx->loaded_vmcs->launched;
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011211
11212 evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
11213 (unsigned long)&current_evmcs->host_rsp : 0;
11214
Nicolai Stange5b6ccc62018-07-21 22:35:28 +020011215 if (static_branch_unlikely(&vmx_l1d_should_flush))
11216 vmx_l1d_flush(vcpu);
Paolo Bonzinic595cee2018-07-02 13:07:14 +020011217
Avi Kivity104f2262010-11-18 13:12:52 +020011218 asm(
Avi Kivity6aa8b732006-12-10 02:21:36 -080011219 /* Store host registers */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011220 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
11221 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
11222 "push %%" _ASM_CX " \n\t"
11223 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Avi Kivity313dbd42008-07-17 18:04:30 +030011224 "je 1f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011225 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011226 /* Avoid VMWRITE when Enlightened VMCS is in use */
11227 "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
11228 "jz 2f \n\t"
11229 "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
11230 "jmp 1f \n\t"
11231 "2: \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011232 __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
Avi Kivity313dbd42008-07-17 18:04:30 +030011233 "1: \n\t"
Avi Kivityd3edefc2009-06-16 12:33:56 +030011234 /* Reload cr2 if changed */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011235 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
11236 "mov %%cr2, %%" _ASM_DX " \n\t"
11237 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011238 "je 3f \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011239 "mov %%" _ASM_AX", %%cr2 \n\t"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011240 "3: \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011241 /* Check if vmlaunch of vmresume is needed */
Avi Kivitye08aa782007-11-15 18:06:18 +020011242 "cmpl $0, %c[launched](%0) \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011243 /* Load guest registers. Don't clobber flags. */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011244 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
11245 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
11246 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
11247 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
11248 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
11249 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011250#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020011251 "mov %c[r8](%0), %%r8 \n\t"
11252 "mov %c[r9](%0), %%r9 \n\t"
11253 "mov %c[r10](%0), %%r10 \n\t"
11254 "mov %c[r11](%0), %%r11 \n\t"
11255 "mov %c[r12](%0), %%r12 \n\t"
11256 "mov %c[r13](%0), %%r13 \n\t"
11257 "mov %c[r14](%0), %%r14 \n\t"
11258 "mov %c[r15](%0), %%r15 \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011259#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030011260 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
Avi Kivityc8019492008-07-14 14:44:59 +030011261
Avi Kivity6aa8b732006-12-10 02:21:36 -080011262 /* Enter guest mode */
Avi Kivity83287ea422012-09-16 15:10:57 +030011263 "jne 1f \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011264 __ex("vmlaunch") "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011265 "jmp 2f \n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020011266 "1: " __ex("vmresume") "\n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011267 "2: "
Avi Kivity6aa8b732006-12-10 02:21:36 -080011268 /* Save guest registers, load host registers, keep flags */
Avi Kivityb188c81f2012-09-16 15:10:58 +030011269 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
Avi Kivity40712fa2011-01-06 18:09:12 +020011270 "pop %0 \n\t"
Jim Mattson0cb5b302018-01-03 14:31:38 -080011271 "setbe %c[fail](%0)\n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011272 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
11273 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
11274 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
11275 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
11276 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
11277 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
11278 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011279#ifdef CONFIG_X86_64
Avi Kivitye08aa782007-11-15 18:06:18 +020011280 "mov %%r8, %c[r8](%0) \n\t"
11281 "mov %%r9, %c[r9](%0) \n\t"
11282 "mov %%r10, %c[r10](%0) \n\t"
11283 "mov %%r11, %c[r11](%0) \n\t"
11284 "mov %%r12, %c[r12](%0) \n\t"
11285 "mov %%r13, %c[r13](%0) \n\t"
11286 "mov %%r14, %c[r14](%0) \n\t"
11287 "mov %%r15, %c[r15](%0) \n\t"
Uros Bizjak43ce76c2018-10-17 16:46:57 +020011288 /*
11289 * Clear host registers marked as clobbered to prevent
11290 * speculative use.
11291 */
Jim Mattson0cb5b302018-01-03 14:31:38 -080011292 "xor %%r8d, %%r8d \n\t"
11293 "xor %%r9d, %%r9d \n\t"
11294 "xor %%r10d, %%r10d \n\t"
11295 "xor %%r11d, %%r11d \n\t"
11296 "xor %%r12d, %%r12d \n\t"
11297 "xor %%r13d, %%r13d \n\t"
11298 "xor %%r14d, %%r14d \n\t"
11299 "xor %%r15d, %%r15d \n\t"
Avi Kivity6aa8b732006-12-10 02:21:36 -080011300#endif
Avi Kivityb188c81f2012-09-16 15:10:58 +030011301 "mov %%cr2, %%" _ASM_AX " \n\t"
11302 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
Avi Kivityc8019492008-07-14 14:44:59 +030011303
Jim Mattson0cb5b302018-01-03 14:31:38 -080011304 "xor %%eax, %%eax \n\t"
11305 "xor %%ebx, %%ebx \n\t"
11306 "xor %%esi, %%esi \n\t"
11307 "xor %%edi, %%edi \n\t"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011308 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
Avi Kivity83287ea422012-09-16 15:10:57 +030011309 ".pushsection .rodata \n\t"
11310 ".global vmx_return \n\t"
11311 "vmx_return: " _ASM_PTR " 2b \n\t"
11312 ".popsection"
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011313 : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
Nadav Har'Eld462b812011-05-24 15:26:10 +030011314 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
Avi Kivitye08aa782007-11-15 18:06:18 +020011315 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
Avi Kivity313dbd42008-07-17 18:04:30 +030011316 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
Zhang Xiantaoad312c72007-12-13 23:50:52 +080011317 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
11318 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
11319 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
11320 [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
11321 [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
11322 [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
11323 [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
Avi Kivity05b3e0c2006-12-13 00:33:45 -080011324#ifdef CONFIG_X86_64
Zhang Xiantaoad312c72007-12-13 23:50:52 +080011325 [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
11326 [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
11327 [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
11328 [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
11329 [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
11330 [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
11331 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
11332 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
Avi Kivity6aa8b732006-12-10 02:21:36 -080011333#endif
Avi Kivity40712fa2011-01-06 18:09:12 +020011334 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
11335 [wordsize]"i"(sizeof(ulong))
Laurent Vivierc2036302007-10-25 14:18:52 +020011336 : "cc", "memory"
11337#ifdef CONFIG_X86_64
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011338 , "rax", "rbx", "rdi"
Laurent Vivierc2036302007-10-25 14:18:52 +020011339 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
Avi Kivityb188c81f2012-09-16 15:10:58 +030011340#else
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011341 , "eax", "ebx", "edi"
Laurent Vivierc2036302007-10-25 14:18:52 +020011342#endif
11343 );
Avi Kivity6aa8b732006-12-10 02:21:36 -080011344
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011345 /*
11346 * We do not use IBRS in the kernel. If this vCPU has used the
11347 * SPEC_CTRL MSR it may have left it on; save the value and
11348 * turn it off. This is much more efficient than blindly adding
11349 * it to the atomic save/restore list. Especially as the former
11350 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
11351 *
11352 * For non-nested case:
11353 * If the L01 MSR bitmap does not intercept the MSR, then we need to
11354 * save it.
11355 *
11356 * For nested case:
11357 * If the L02 MSR bitmap does not intercept the MSR, then we need to
11358 * save it.
11359 */
Paolo Bonzini946fbbc2018-02-22 16:43:18 +010011360 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
Paolo Bonziniecb586b2018-02-22 16:43:17 +010011361 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011362
Thomas Gleixnerccbcd262018-05-09 23:01:01 +020011363 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010011364
David Woodhouse117cc7a2018-01-12 11:11:27 +000011365 /* Eliminate branch target predictions from guest mode */
11366 vmexit_fill_RSB();
11367
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010011368 /* All fields are clean at this point */
11369 if (static_branch_unlikely(&enable_evmcs))
11370 current_evmcs->hv_clean_fields |=
11371 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
11372
Gleb Natapov2a7921b2012-08-12 16:12:29 +030011373 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
Wanpeng Li74c55932017-11-29 01:31:20 -080011374 if (vmx->host_debugctlmsr)
11375 update_debugctlmsr(vmx->host_debugctlmsr);
Gleb Natapov2a7921b2012-08-12 16:12:29 +030011376
Avi Kivityaa67f602012-08-01 16:48:03 +030011377#ifndef CONFIG_X86_64
11378 /*
11379 * The sysexit path does not restore ds/es, so we must set them to
11380 * a reasonable value ourselves.
11381 *
Sean Christopherson6d6095b2018-07-23 12:32:44 -070011382 * We can't defer this to vmx_prepare_switch_to_host() since that
11383 * function may be executed in interrupt context, which saves and
11384 * restore segments around it, nullifying its effect.
Avi Kivityaa67f602012-08-01 16:48:03 +030011385 */
11386 loadsegment(ds, __USER_DS);
11387 loadsegment(es, __USER_DS);
11388#endif
11389
Avi Kivity6de4f3a2009-05-31 22:58:47 +030011390 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
Avi Kivity6de12732011-03-07 12:51:22 +020011391 | (1 << VCPU_EXREG_RFLAGS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020011392 | (1 << VCPU_EXREG_PDPTR)
Avi Kivity2fb92db2011-04-27 19:42:18 +030011393 | (1 << VCPU_EXREG_SEGMENTS)
Avi Kivityaff48ba2010-12-05 18:56:11 +020011394 | (1 << VCPU_EXREG_CR3));
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030011395 vcpu->arch.regs_dirty = 0;
11396
Gleb Natapove0b890d2013-09-25 12:51:33 +030011397 /*
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011398 * eager fpu is enabled if PKEY is supported and CR4 is switched
11399 * back on host, so it is safe to read guest PKRU from current
11400 * XSAVE.
11401 */
Paolo Bonzinib9dd21e2017-08-23 23:14:38 +020011402 if (static_cpu_has(X86_FEATURE_PKU) &&
11403 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
11404 vcpu->arch.pkru = __read_pkru();
11405 if (vcpu->arch.pkru != vmx->host_pkru)
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011406 __write_pkru(vmx->host_pkru);
Xiao Guangrong1be0e612016-03-22 16:51:18 +080011407 }
11408
Gleb Natapove0b890d2013-09-25 12:51:33 +030011409 vmx->nested.nested_run_pending = 0;
Jim Mattsonb060ca32017-09-14 16:31:42 -070011410 vmx->idt_vectoring_info = 0;
11411
11412 vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
11413 if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
11414 return;
11415
11416 vmx->loaded_vmcs->launched = 1;
11417 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
Gleb Natapove0b890d2013-09-25 12:51:33 +030011418
Avi Kivity51aa01d2010-07-20 14:31:20 +030011419 vmx_complete_atomic_exit(vmx);
11420 vmx_recover_nmi_blocking(vmx);
Avi Kivitycf393f72008-07-01 16:20:21 +030011421 vmx_complete_interrupts(vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011422}
Josh Poimboeufc207aee2017-06-28 10:11:06 -050011423STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011424
Sean Christopherson434a1e92018-03-20 12:17:18 -070011425static struct kvm *vmx_vm_alloc(void)
11426{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070011427 struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
Sean Christopherson40bbb9d2018-03-20 12:17:20 -070011428 return &kvm_vmx->kvm;
Sean Christopherson434a1e92018-03-20 12:17:18 -070011429}
11430
11431static void vmx_vm_free(struct kvm *kvm)
11432{
Marc Orrd1e5b0e2018-05-15 04:37:37 -070011433 vfree(to_kvm_vmx(kvm));
Sean Christopherson434a1e92018-03-20 12:17:18 -070011434}
11435
David Hildenbrand1279a6b12017-03-20 10:00:08 +010011436static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011437{
11438 struct vcpu_vmx *vmx = to_vmx(vcpu);
11439 int cpu;
11440
David Hildenbrand1279a6b12017-03-20 10:00:08 +010011441 if (vmx->loaded_vmcs == vmcs)
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011442 return;
11443
11444 cpu = get_cpu();
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011445 vmx_vcpu_put(vcpu);
Sean Christophersonbd9966d2018-07-23 12:32:42 -070011446 vmx->loaded_vmcs = vmcs;
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011447 vmx_vcpu_load(vcpu, cpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011448 put_cpu();
Sean Christophersonb7031fd2018-09-26 09:23:42 -070011449
11450 vm_entry_controls_reset_shadow(vmx);
11451 vm_exit_controls_reset_shadow(vmx);
11452 vmx_segment_cache_clear(vmx);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011453}
11454
Jim Mattson2f1fe812016-07-08 15:36:06 -070011455/*
11456 * Ensure that the current vmcs of the logical processor is the
11457 * vmcs01 of the vcpu before calling free_nested().
11458 */
11459static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
11460{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011461 vcpu_load(vcpu);
11462 vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
11463 free_nested(vcpu);
11464 vcpu_put(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070011465}
11466
Avi Kivity6aa8b732006-12-10 02:21:36 -080011467static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
11468{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011469 struct vcpu_vmx *vmx = to_vmx(vcpu);
11470
Kai Huang843e4332015-01-28 10:54:28 +080011471 if (enable_pml)
Kai Huanga3eaa862015-11-04 13:46:05 +080011472 vmx_destroy_pml_buffer(vmx);
Wanpeng Li991e7a02015-09-16 17:30:05 +080011473 free_vpid(vmx->vpid);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011474 leave_guest_mode(vcpu);
Jim Mattson2f1fe812016-07-08 15:36:06 -070011475 vmx_free_vcpu_nested(vcpu);
Paolo Bonzini4fa77342014-07-17 12:25:16 +020011476 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011477 kfree(vmx->guest_msrs);
11478 kvm_vcpu_uninit(vcpu);
Rusty Russella4770342007-08-01 14:46:11 +100011479 kmem_cache_free(kvm_vcpu_cache, vmx);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011480}
11481
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011482static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
Avi Kivity6aa8b732006-12-10 02:21:36 -080011483{
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011484 int err;
Rusty Russellc16f8622007-07-30 21:12:19 +100011485 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011486 unsigned long *msr_bitmap;
Avi Kivity15ad7142007-07-11 18:17:21 +030011487 int cpu;
Avi Kivity6aa8b732006-12-10 02:21:36 -080011488
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011489 if (!vmx)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011490 return ERR_PTR(-ENOMEM);
11491
Wanpeng Li991e7a02015-09-16 17:30:05 +080011492 vmx->vpid = allocate_vpid();
Sheng Yang2384d2b2008-01-17 15:14:33 +080011493
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011494 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
11495 if (err)
11496 goto free_vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011497
Peter Feiner4e595162016-07-07 14:49:58 -070011498 err = -ENOMEM;
11499
11500 /*
11501 * If PML is turned on, failure on enabling PML just results in failure
11502 * of creating the vcpu, therefore we can simplify PML logic (by
11503 * avoiding dealing with cases, such as enabling PML partially on vcpus
11504 * for the guest, etc.
11505 */
11506 if (enable_pml) {
11507 vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
11508 if (!vmx->pml_pg)
11509 goto uninit_vcpu;
11510 }
11511
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011512 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
Paolo Bonzini03916db2014-07-24 14:21:57 +020011513 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
11514 > PAGE_SIZE);
Nadav Amit0123be42014-07-24 15:06:56 +030011515
Peter Feiner4e595162016-07-07 14:49:58 -070011516 if (!vmx->guest_msrs)
11517 goto free_pml;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011518
Paolo Bonzinif21f1652018-01-11 12:16:15 +010011519 err = alloc_loaded_vmcs(&vmx->vmcs01);
11520 if (err < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011521 goto free_msrs;
Gregory Haskinsa2fa3e92007-07-27 08:13:10 -040011522
Paolo Bonzini904e14f2018-01-16 16:51:18 +010011523 msr_bitmap = vmx->vmcs01.msr_bitmap;
11524 vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
11525 vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
11526 vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
11527 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
11528 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
11529 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
11530 vmx->msr_bitmap_mode = 0;
11531
Paolo Bonzinif21f1652018-01-11 12:16:15 +010011532 vmx->loaded_vmcs = &vmx->vmcs01;
Avi Kivity15ad7142007-07-11 18:17:21 +030011533 cpu = get_cpu();
11534 vmx_vcpu_load(&vmx->vcpu, cpu);
Zachary Amsdene48672f2010-08-19 22:07:23 -100011535 vmx->vcpu.cpu = cpu;
David Hildenbrand12d79912017-08-24 20:51:26 +020011536 vmx_vcpu_setup(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011537 vmx_vcpu_put(&vmx->vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +030011538 put_cpu();
Paolo Bonzini35754c92015-07-29 12:05:37 +020011539 if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
Jan Kiszkabe6d05c2011-04-13 01:27:55 +020011540 err = alloc_apic_access_page(kvm);
11541 if (err)
Marcelo Tosatti5e4a0b32008-02-14 21:21:43 -020011542 goto free_vmcs;
Jan Kiszkaa63cb562013-04-08 11:07:46 +020011543 }
Ingo Molnar965b58a2007-01-05 16:36:23 -080011544
Sean Christophersone90008d2018-03-05 12:04:37 -080011545 if (enable_ept && !enable_unrestricted_guest) {
Tang Chenf51770e2014-09-16 18:41:59 +080011546 err = init_rmode_identity_map(kvm);
11547 if (err)
Gleb Natapov93ea5382011-02-21 12:07:59 +020011548 goto free_vmcs;
Sheng Yangb927a3c2009-07-21 10:42:48 +080011549 }
Sheng Yangb7ebfb02008-04-25 21:44:52 +080011550
Roman Kagan63aff652018-07-19 21:59:07 +030011551 if (nested)
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011552 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
11553 kvm_vcpu_apicv_active(&vmx->vcpu));
Wincy Vanb9c237b2015-02-03 23:56:30 +080011554
Wincy Van705699a2015-02-03 23:58:17 +080011555 vmx->nested.posted_intr_nv = -1;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030011556 vmx->nested.current_vmptr = -1ull;
Nadav Har'Ela9d30f32011-05-25 23:03:55 +030011557
Haozhong Zhang37e4c992016-06-22 14:59:55 +080011558 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
11559
Paolo Bonzini31afb2e2017-06-06 12:57:06 +020011560 /*
11561 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
11562 * or POSTED_INTR_WAKEUP_VECTOR.
11563 */
11564 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
11565 vmx->pi_desc.sn = 1;
11566
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011567 return &vmx->vcpu;
Ingo Molnar965b58a2007-01-05 16:36:23 -080011568
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011569free_vmcs:
Xiao Guangrong5f3fbc32012-05-14 14:58:58 +080011570 free_loaded_vmcs(vmx->loaded_vmcs);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011571free_msrs:
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011572 kfree(vmx->guest_msrs);
Peter Feiner4e595162016-07-07 14:49:58 -070011573free_pml:
11574 vmx_destroy_pml_buffer(vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011575uninit_vcpu:
11576 kvm_vcpu_uninit(&vmx->vcpu);
11577free_vcpu:
Wanpeng Li991e7a02015-09-16 17:30:05 +080011578 free_vpid(vmx->vpid);
Rusty Russella4770342007-08-01 14:46:11 +100011579 kmem_cache_free(kvm_vcpu_cache, vmx);
Rusty Russellfb3f0f52007-07-27 17:16:56 +100011580 return ERR_PTR(err);
Avi Kivity6aa8b732006-12-10 02:21:36 -080011581}
11582
Jiri Kosinad90a7a02018-07-13 16:23:25 +020011583#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
11584#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011585
Wanpeng Lib31c1142018-03-12 04:53:04 -070011586static int vmx_vm_init(struct kvm *kvm)
11587{
Tianyu Lan877ad952018-07-19 08:40:23 +000011588 spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
11589
Wanpeng Lib31c1142018-03-12 04:53:04 -070011590 if (!ple_gap)
11591 kvm->arch.pause_in_guest = true;
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011592
Jiri Kosinad90a7a02018-07-13 16:23:25 +020011593 if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
11594 switch (l1tf_mitigation) {
11595 case L1TF_MITIGATION_OFF:
11596 case L1TF_MITIGATION_FLUSH_NOWARN:
11597 /* 'I explicitly don't care' is set */
11598 break;
11599 case L1TF_MITIGATION_FLUSH:
11600 case L1TF_MITIGATION_FLUSH_NOSMT:
11601 case L1TF_MITIGATION_FULL:
11602 /*
11603 * Warn upon starting the first VM in a potentially
11604 * insecure environment.
11605 */
11606 if (cpu_smt_control == CPU_SMT_ENABLED)
11607 pr_warn_once(L1TF_MSG_SMT);
11608 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
11609 pr_warn_once(L1TF_MSG_L1D);
11610 break;
11611 case L1TF_MITIGATION_FULL_FORCE:
11612 /* Flush is enforced */
11613 break;
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011614 }
Konrad Rzeszutek Wilk26acfb62018-06-20 11:29:53 -040011615 }
Wanpeng Lib31c1142018-03-12 04:53:04 -070011616 return 0;
11617}
11618
Yang, Sheng002c7f72007-07-31 14:23:01 +030011619static void __init vmx_check_processor_compat(void *rtn)
11620{
11621 struct vmcs_config vmcs_conf;
11622
11623 *(int *)rtn = 0;
11624 if (setup_vmcs_config(&vmcs_conf) < 0)
11625 *(int *)rtn = -EIO;
Paolo Bonzini13893092018-02-26 13:40:09 +010011626 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
Yang, Sheng002c7f72007-07-31 14:23:01 +030011627 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
11628 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
11629 smp_processor_id());
11630 *(int *)rtn = -EIO;
11631 }
11632}
11633
Sheng Yang4b12f0d2009-04-27 20:35:42 +080011634static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
Sheng Yang64d4d522008-10-09 16:01:57 +080011635{
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011636 u8 cache;
11637 u64 ipat = 0;
Sheng Yang4b12f0d2009-04-27 20:35:42 +080011638
Sheng Yang522c68c2009-04-27 20:35:43 +080011639 /* For VT-d and EPT combination
Paolo Bonzini606decd2015-10-01 13:12:47 +020011640 * 1. MMIO: always map as UC
Sheng Yang522c68c2009-04-27 20:35:43 +080011641 * 2. EPT with VT-d:
11642 * a. VT-d without snooping control feature: can't guarantee the
Paolo Bonzini606decd2015-10-01 13:12:47 +020011643 * result, try to trust guest.
Sheng Yang522c68c2009-04-27 20:35:43 +080011644 * b. VT-d with snooping control feature: snooping control feature of
11645 * VT-d engine can guarantee the cache correctness. Just set it
11646 * to WB to keep consistent with host. So the same as item 3.
Sheng Yanga19a6d12010-02-09 16:41:53 +080011647 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
Sheng Yang522c68c2009-04-27 20:35:43 +080011648 * consistent with host MTRR
11649 */
Paolo Bonzini606decd2015-10-01 13:12:47 +020011650 if (is_mmio) {
11651 cache = MTRR_TYPE_UNCACHABLE;
11652 goto exit;
11653 }
11654
11655 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011656 ipat = VMX_EPT_IPAT_BIT;
11657 cache = MTRR_TYPE_WRBACK;
11658 goto exit;
11659 }
11660
11661 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
11662 ipat = VMX_EPT_IPAT_BIT;
Paolo Bonzini0da029e2015-07-23 08:24:42 +020011663 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
Xiao Guangrongfb2799502015-07-16 03:25:56 +080011664 cache = MTRR_TYPE_WRBACK;
11665 else
11666 cache = MTRR_TYPE_UNCACHABLE;
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011667 goto exit;
11668 }
11669
Xiao Guangrongff536042015-06-15 16:55:22 +080011670 cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
Xiao Guangrongb18d5432015-06-15 16:55:21 +080011671
11672exit:
11673 return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
Sheng Yang64d4d522008-10-09 16:01:57 +080011674}
11675
Sheng Yang17cc3932010-01-05 19:02:27 +080011676static int vmx_get_lpage_level(void)
Joerg Roedel344f4142009-07-27 16:30:48 +020011677{
Sheng Yang878403b2010-01-05 19:02:29 +080011678 if (enable_ept && !cpu_has_vmx_ept_1g_page())
11679 return PT_DIRECTORY_LEVEL;
11680 else
11681 /* For shadow and EPT supported 1GB page */
11682 return PT_PDPE_LEVEL;
Joerg Roedel344f4142009-07-27 16:30:48 +020011683}
11684
Xiao Guangrongfeda8052015-09-09 14:05:55 +080011685static void vmcs_set_secondary_exec_control(u32 new_ctl)
11686{
11687 /*
11688 * These bits in the secondary execution controls field
11689 * are dynamic, the others are mostly based on the hypervisor
11690 * architecture and the guest's CPUID. Do not touch the
11691 * dynamic bits.
11692 */
11693 u32 mask =
11694 SECONDARY_EXEC_SHADOW_VMCS |
11695 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
Paolo Bonzini0367f202016-07-12 10:44:55 +020011696 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
11697 SECONDARY_EXEC_DESC;
Xiao Guangrongfeda8052015-09-09 14:05:55 +080011698
11699 u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
11700
11701 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
11702 (new_ctl & ~mask) | (cur_ctl & mask));
11703}
11704
David Matlack8322ebb2016-11-29 18:14:09 -080011705/*
11706 * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
11707 * (indicating "allowed-1") if they are supported in the guest's CPUID.
11708 */
11709static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
11710{
11711 struct vcpu_vmx *vmx = to_vmx(vcpu);
11712 struct kvm_cpuid_entry2 *entry;
11713
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011714 vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
11715 vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
David Matlack8322ebb2016-11-29 18:14:09 -080011716
11717#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
11718 if (entry && (entry->_reg & (_cpuid_mask))) \
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011719 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
David Matlack8322ebb2016-11-29 18:14:09 -080011720} while (0)
11721
11722 entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
11723 cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
11724 cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
11725 cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
11726 cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
11727 cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
11728 cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
11729 cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
11730 cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
11731 cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
11732 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
11733 cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
11734 cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
11735 cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
11736 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
11737
11738 entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
11739 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
11740 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
11741 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
11742 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
Paolo Bonzinic4ad77e2017-11-13 14:23:59 +010011743 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
David Matlack8322ebb2016-11-29 18:14:09 -080011744
11745#undef cr4_fixed1_update
11746}
11747
Liran Alon5f76f6f2018-09-14 03:25:52 +030011748static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
11749{
11750 struct vcpu_vmx *vmx = to_vmx(vcpu);
11751
11752 if (kvm_mpx_supported()) {
11753 bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
11754
11755 if (mpx_enabled) {
11756 vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
11757 vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
11758 } else {
11759 vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
11760 vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
11761 }
11762 }
11763}
11764
Sheng Yang0e851882009-12-18 16:48:46 +080011765static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
11766{
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011767 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011768
Paolo Bonzini80154d72017-08-24 13:55:35 +020011769 if (cpu_has_secondary_exec_ctrls()) {
11770 vmx_compute_secondary_exec_control(vmx);
11771 vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
Sheng Yang4e47c7a2009-12-18 16:48:47 +080011772 }
Mao, Junjiead756a12012-07-02 01:18:48 +000011773
Haozhong Zhang37e4c992016-06-22 14:59:55 +080011774 if (nested_vmx_allowed(vcpu))
11775 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
11776 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
11777 else
11778 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
11779 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
David Matlack8322ebb2016-11-29 18:14:09 -080011780
Liran Alon5f76f6f2018-09-14 03:25:52 +030011781 if (nested_vmx_allowed(vcpu)) {
David Matlack8322ebb2016-11-29 18:14:09 -080011782 nested_vmx_cr_fixed1_bits_update(vcpu);
Liran Alon5f76f6f2018-09-14 03:25:52 +030011783 nested_vmx_entry_exit_ctls_update(vcpu);
11784 }
Sheng Yang0e851882009-12-18 16:48:46 +080011785}
11786
Joerg Roedeld4330ef2010-04-22 12:33:11 +020011787static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
11788{
Nadav Har'El7b8050f2011-05-25 23:16:10 +030011789 if (func == 1 && nested)
11790 entry->ecx |= bit(X86_FEATURE_VMX);
Joerg Roedeld4330ef2010-04-22 12:33:11 +020011791}
11792
Yang Zhang25d92082013-08-06 12:00:32 +030011793static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
11794 struct x86_exception *fault)
11795{
Jan Kiszka533558b2014-01-04 18:47:20 +010011796 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Bandan Dasc5f983f2017-05-05 15:25:14 -040011797 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jan Kiszka533558b2014-01-04 18:47:20 +010011798 u32 exit_reason;
Bandan Dasc5f983f2017-05-05 15:25:14 -040011799 unsigned long exit_qualification = vcpu->arch.exit_qualification;
Yang Zhang25d92082013-08-06 12:00:32 +030011800
Bandan Dasc5f983f2017-05-05 15:25:14 -040011801 if (vmx->nested.pml_full) {
11802 exit_reason = EXIT_REASON_PML_FULL;
11803 vmx->nested.pml_full = false;
11804 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
11805 } else if (fault->error_code & PFERR_RSVD_MASK)
Jan Kiszka533558b2014-01-04 18:47:20 +010011806 exit_reason = EXIT_REASON_EPT_MISCONFIG;
Yang Zhang25d92082013-08-06 12:00:32 +030011807 else
Jan Kiszka533558b2014-01-04 18:47:20 +010011808 exit_reason = EXIT_REASON_EPT_VIOLATION;
Bandan Dasc5f983f2017-05-05 15:25:14 -040011809
11810 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
Yang Zhang25d92082013-08-06 12:00:32 +030011811 vmcs12->guest_physical_address = fault->address;
11812}
11813
Peter Feiner995f00a2017-06-30 17:26:32 -070011814static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
11815{
David Hildenbrandbb97a012017-08-10 23:15:28 +020011816 return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
Peter Feiner995f00a2017-06-30 17:26:32 -070011817}
11818
Nadav Har'El155a97a2013-08-05 11:07:16 +030011819/* Callbacks for nested_ept_init_mmu_context: */
11820
11821static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
11822{
11823 /* return the page table to be shadowed - in our case, EPT12 */
11824 return get_vmcs12(vcpu)->ept_pointer;
11825}
11826
Sean Christopherson5b8ba412018-09-26 09:23:40 -070011827static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
Nadav Har'El155a97a2013-08-05 11:07:16 +030011828{
Paolo Bonziniad896af2013-10-02 16:56:14 +020011829 WARN_ON(mmu_is_nested(vcpu));
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020011830
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011831 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
Paolo Bonziniad896af2013-10-02 16:56:14 +020011832 kvm_init_shadow_ept_mmu(vcpu,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010011833 to_vmx(vcpu)->nested.msrs.ept_caps &
Paolo Bonziniae1e2d12017-03-30 11:55:30 +020011834 VMX_EPT_EXECUTE_ONLY_BIT,
Junaid Shahid50c28f22018-06-27 14:59:11 -070011835 nested_ept_ad_enabled(vcpu),
11836 nested_ept_get_cr3(vcpu));
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +020011837 vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
11838 vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
11839 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
Vitaly Kuznetsov3dc773e2018-10-08 21:28:06 +020011840 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011841
11842 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011843}
11844
11845static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
11846{
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020011847 vcpu->arch.mmu = &vcpu->arch.root_mmu;
Vitaly Kuznetsov44dd3ff2018-10-08 21:28:05 +020011848 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
Nadav Har'El155a97a2013-08-05 11:07:16 +030011849}
11850
Eugene Korenevsky19d5f102014-12-16 22:35:53 +030011851static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
11852 u16 error_code)
11853{
11854 bool inequality, bit;
11855
11856 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
11857 inequality =
11858 (error_code & vmcs12->page_fault_error_code_mask) !=
11859 vmcs12->page_fault_error_code_match;
11860 return inequality ^ bit;
11861}
11862
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011863static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
11864 struct x86_exception *fault)
11865{
11866 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11867
11868 WARN_ON(!is_guest_mode(vcpu));
11869
Wanpeng Li305d0ab2017-09-28 18:16:44 -070011870 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
11871 !to_vmx(vcpu)->nested.nested_run_pending) {
Paolo Bonzinib96fb432017-07-27 12:29:32 +020011872 vmcs12->vm_exit_intr_error_code = fault->error_code;
11873 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11874 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
11875 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
11876 fault->address);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011877 } else {
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011878 kvm_inject_page_fault(vcpu, fault);
Paolo Bonzini7313c692017-07-27 10:31:25 +020011879 }
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030011880}
11881
Paolo Bonzinic9923842017-12-13 14:16:30 +010011882static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
11883 struct vmcs12 *vmcs12);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011884
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011885static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011886{
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020011887 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011888 struct vcpu_vmx *vmx = to_vmx(vcpu);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011889 struct page *page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011890 u64 hpa;
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011891
11892 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011893 /*
11894 * Translate L1 physical address to host physical
11895 * address for vmcs02. Keep the page pinned, so this
11896 * physical address remains valid. We keep a reference
11897 * to it so we can release it later.
11898 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011899 if (vmx->nested.apic_access_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011900 kvm_release_page_dirty(vmx->nested.apic_access_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011901 vmx->nested.apic_access_page = NULL;
11902 }
11903 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011904 /*
11905 * If translation failed, no matter: This feature asks
11906 * to exit when accessing the given address, and if it
11907 * can never be accessed, this feature won't do
11908 * anything anyway.
11909 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011910 if (!is_error_page(page)) {
11911 vmx->nested.apic_access_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011912 hpa = page_to_phys(vmx->nested.apic_access_page);
11913 vmcs_write64(APIC_ACCESS_ADDR, hpa);
11914 } else {
11915 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
11916 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
11917 }
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011918 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011919
11920 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011921 if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
David Hildenbrand53a70da2017-08-03 18:11:05 +020011922 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011923 vmx->nested.virtual_apic_page = NULL;
11924 }
11925 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011926
11927 /*
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011928 * If translation failed, VM entry will fail because
11929 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
11930 * Failing the vm entry is _not_ what the processor
11931 * does but it's basically the only possibility we
11932 * have. We could still enter the guest if CR8 load
11933 * exits are enabled, CR8 store exits are enabled, and
11934 * virtualize APIC access is disabled; in this case
11935 * the processor would never use the TPR shadow and we
11936 * could simply clear the bit from the execution
11937 * control. But such a configuration is useless, so
11938 * let's keep the code simple.
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011939 */
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011940 if (!is_error_page(page)) {
11941 vmx->nested.virtual_apic_page = page;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011942 hpa = page_to_phys(vmx->nested.virtual_apic_page);
11943 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
11944 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080011945 }
11946
Wincy Van705699a2015-02-03 23:58:17 +080011947 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080011948 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
11949 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020011950 kvm_release_page_dirty(vmx->nested.pi_desc_page);
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011951 vmx->nested.pi_desc_page = NULL;
Wincy Van705699a2015-02-03 23:58:17 +080011952 }
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011953 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
11954 if (is_error_page(page))
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011955 return;
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020011956 vmx->nested.pi_desc_page = page;
11957 vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080011958 vmx->nested.pi_desc =
11959 (struct pi_desc *)((void *)vmx->nested.pi_desc +
11960 (unsigned long)(vmcs12->posted_intr_desc_addr &
11961 (PAGE_SIZE - 1)));
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011962 vmcs_write64(POSTED_INTR_DESC_ADDR,
11963 page_to_phys(vmx->nested.pi_desc_page) +
11964 (unsigned long)(vmcs12->posted_intr_desc_addr &
11965 (PAGE_SIZE - 1)));
Wincy Van705699a2015-02-03 23:58:17 +080011966 }
Linus Torvaldsd4667ca2018-02-14 17:02:15 -080011967 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
KarimAllah Ahmed3712caeb2018-02-10 23:39:26 +000011968 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
11969 CPU_BASED_USE_MSR_BITMAPS);
Jim Mattson6beb7bd2016-11-30 12:03:45 -080011970 else
11971 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
11972 CPU_BASED_USE_MSR_BITMAPS);
Wanpeng Lia2bcba52014-08-21 19:46:49 +080011973}
11974
Jan Kiszkaf41245002014-03-07 20:03:13 +010011975static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
11976{
11977 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
11978 struct vcpu_vmx *vmx = to_vmx(vcpu);
11979
Sean Christopherson4c008122018-08-27 15:21:10 -070011980 /*
11981 * A timer value of zero is architecturally guaranteed to cause
11982 * a VMExit prior to executing any instructions in the guest.
11983 */
11984 if (preemption_timeout == 0) {
Jan Kiszkaf41245002014-03-07 20:03:13 +010011985 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
11986 return;
11987 }
11988
Sean Christopherson4c008122018-08-27 15:21:10 -070011989 if (vcpu->arch.virtual_tsc_khz == 0)
11990 return;
11991
Jan Kiszkaf41245002014-03-07 20:03:13 +010011992 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
11993 preemption_timeout *= 1000000;
11994 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
11995 hrtimer_start(&vmx->nested.preemption_timer,
11996 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
11997}
11998
Jim Mattson56a20512017-07-06 16:33:06 -070011999static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
12000 struct vmcs12 *vmcs12)
12001{
12002 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
12003 return 0;
12004
12005 if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
12006 !page_address_valid(vcpu, vmcs12->io_bitmap_b))
12007 return -EINVAL;
12008
12009 return 0;
12010}
12011
Wincy Van3af18d92015-02-03 23:49:31 +080012012static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
12013 struct vmcs12 *vmcs12)
12014{
Wincy Van3af18d92015-02-03 23:49:31 +080012015 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
12016 return 0;
12017
Jim Mattson5fa99cb2017-07-06 16:33:07 -070012018 if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
Wincy Van3af18d92015-02-03 23:49:31 +080012019 return -EINVAL;
12020
12021 return 0;
12022}
12023
Jim Mattson712b12d2017-08-24 13:24:47 -070012024static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
12025 struct vmcs12 *vmcs12)
12026{
12027 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
12028 return 0;
12029
12030 if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
12031 return -EINVAL;
12032
12033 return 0;
12034}
12035
Wincy Van3af18d92015-02-03 23:49:31 +080012036/*
12037 * Merge L0's and L1's MSR bitmap, return false to indicate that
12038 * we do not use the hardware.
12039 */
Paolo Bonzinic9923842017-12-13 14:16:30 +010012040static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
12041 struct vmcs12 *vmcs12)
Wincy Van3af18d92015-02-03 23:49:31 +080012042{
Wincy Van82f0dd42015-02-03 23:57:18 +080012043 int msr;
Wincy Vanf2b93282015-02-03 23:56:03 +080012044 struct page *page;
Radim Krčmářd048c092016-08-08 20:16:22 +020012045 unsigned long *msr_bitmap_l1;
Paolo Bonzini904e14f2018-01-16 16:51:18 +010012046 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
Ashok Raj15d45072018-02-01 22:59:43 +010012047 /*
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010012048 * pred_cmd & spec_ctrl are trying to verify two things:
Ashok Raj15d45072018-02-01 22:59:43 +010012049 *
12050 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
12051 * ensures that we do not accidentally generate an L02 MSR bitmap
12052 * from the L12 MSR bitmap that is too permissive.
12053 * 2. That L1 or L2s have actually used the MSR. This avoids
12054 * unnecessarily merging of the bitmap if the MSR is unused. This
12055 * works properly because we only update the L01 MSR bitmap lazily.
12056 * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
12057 * updated to reflect this when L1 (or its L2s) actually write to
12058 * the MSR.
12059 */
KarimAllah Ahmed206587a2018-02-10 23:39:25 +000012060 bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
12061 bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
Wincy Vanf2b93282015-02-03 23:56:03 +080012062
Paolo Bonzinic9923842017-12-13 14:16:30 +010012063 /* Nothing to do if the MSR bitmap is not in use. */
12064 if (!cpu_has_vmx_msr_bitmap() ||
12065 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
12066 return false;
12067
Ashok Raj15d45072018-02-01 22:59:43 +010012068 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010012069 !pred_cmd && !spec_ctrl)
Wincy Vanf2b93282015-02-03 23:56:03 +080012070 return false;
12071
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020012072 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
12073 if (is_error_page(page))
Wincy Vanf2b93282015-02-03 23:56:03 +080012074 return false;
Paolo Bonzinic9923842017-12-13 14:16:30 +010012075
Radim Krčmářd048c092016-08-08 20:16:22 +020012076 msr_bitmap_l1 = (unsigned long *)kmap(page);
Paolo Bonzinic9923842017-12-13 14:16:30 +010012077 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
12078 /*
12079 * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
12080 * just lets the processor take the value from the virtual-APIC page;
12081 * take those 256 bits directly from the L1 bitmap.
12082 */
12083 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
12084 unsigned word = msr / BITS_PER_LONG;
12085 msr_bitmap_l0[word] = msr_bitmap_l1[word];
12086 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
Wincy Van608406e2015-02-03 23:57:51 +080012087 }
Paolo Bonzinic9923842017-12-13 14:16:30 +010012088 } else {
12089 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
12090 unsigned word = msr / BITS_PER_LONG;
12091 msr_bitmap_l0[word] = ~0;
12092 msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
12093 }
12094 }
12095
12096 nested_vmx_disable_intercept_for_msr(
12097 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010012098 X2APIC_MSR(APIC_TASKPRI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010012099 MSR_TYPE_W);
12100
12101 if (nested_cpu_has_vid(vmcs12)) {
12102 nested_vmx_disable_intercept_for_msr(
12103 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010012104 X2APIC_MSR(APIC_EOI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010012105 MSR_TYPE_W);
12106 nested_vmx_disable_intercept_for_msr(
12107 msr_bitmap_l1, msr_bitmap_l0,
Paolo Bonzinid7231e72017-12-21 00:47:55 +010012108 X2APIC_MSR(APIC_SELF_IPI),
Paolo Bonzinic9923842017-12-13 14:16:30 +010012109 MSR_TYPE_W);
Wincy Van82f0dd42015-02-03 23:57:18 +080012110 }
Ashok Raj15d45072018-02-01 22:59:43 +010012111
KarimAllah Ahmedd28b3872018-02-01 22:59:45 +010012112 if (spec_ctrl)
12113 nested_vmx_disable_intercept_for_msr(
12114 msr_bitmap_l1, msr_bitmap_l0,
12115 MSR_IA32_SPEC_CTRL,
12116 MSR_TYPE_R | MSR_TYPE_W);
12117
Ashok Raj15d45072018-02-01 22:59:43 +010012118 if (pred_cmd)
12119 nested_vmx_disable_intercept_for_msr(
12120 msr_bitmap_l1, msr_bitmap_l0,
12121 MSR_IA32_PRED_CMD,
12122 MSR_TYPE_W);
12123
Wincy Vanf2b93282015-02-03 23:56:03 +080012124 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020012125 kvm_release_page_clean(page);
Wincy Vanf2b93282015-02-03 23:56:03 +080012126
12127 return true;
12128}
12129
Liran Alon61ada742018-06-23 02:35:08 +030012130static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
12131 struct vmcs12 *vmcs12)
12132{
12133 struct vmcs12 *shadow;
12134 struct page *page;
12135
12136 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
12137 vmcs12->vmcs_link_pointer == -1ull)
12138 return;
12139
12140 shadow = get_shadow_vmcs12(vcpu);
12141 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
12142
12143 memcpy(shadow, kmap(page), VMCS12_SIZE);
12144
12145 kunmap(page);
12146 kvm_release_page_clean(page);
12147}
12148
12149static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
12150 struct vmcs12 *vmcs12)
12151{
12152 struct vcpu_vmx *vmx = to_vmx(vcpu);
12153
12154 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
12155 vmcs12->vmcs_link_pointer == -1ull)
12156 return;
12157
12158 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
12159 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
12160}
12161
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040012162static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
12163 struct vmcs12 *vmcs12)
12164{
12165 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
12166 !page_address_valid(vcpu, vmcs12->apic_access_addr))
12167 return -EINVAL;
12168 else
12169 return 0;
12170}
12171
Wincy Vanf2b93282015-02-03 23:56:03 +080012172static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
12173 struct vmcs12 *vmcs12)
12174{
Wincy Van82f0dd42015-02-03 23:57:18 +080012175 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
Wincy Van608406e2015-02-03 23:57:51 +080012176 !nested_cpu_has_apic_reg_virt(vmcs12) &&
Wincy Van705699a2015-02-03 23:58:17 +080012177 !nested_cpu_has_vid(vmcs12) &&
12178 !nested_cpu_has_posted_intr(vmcs12))
Wincy Vanf2b93282015-02-03 23:56:03 +080012179 return 0;
12180
12181 /*
12182 * If virtualize x2apic mode is enabled,
12183 * virtualize apic access must be disabled.
12184 */
Wincy Van82f0dd42015-02-03 23:57:18 +080012185 if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
12186 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Wincy Vanf2b93282015-02-03 23:56:03 +080012187 return -EINVAL;
12188
Wincy Van608406e2015-02-03 23:57:51 +080012189 /*
12190 * If virtual interrupt delivery is enabled,
12191 * we must exit on external interrupts.
12192 */
12193 if (nested_cpu_has_vid(vmcs12) &&
12194 !nested_exit_on_intr(vcpu))
12195 return -EINVAL;
12196
Wincy Van705699a2015-02-03 23:58:17 +080012197 /*
12198 * bits 15:8 should be zero in posted_intr_nv,
12199 * the descriptor address has been already checked
12200 * in nested_get_vmcs12_pages.
Krish Sadhukhan6de84e52018-08-23 20:03:03 -040012201 *
12202 * bits 5:0 of posted_intr_desc_addr should be zero.
Wincy Van705699a2015-02-03 23:58:17 +080012203 */
12204 if (nested_cpu_has_posted_intr(vmcs12) &&
12205 (!nested_cpu_has_vid(vmcs12) ||
12206 !nested_exit_intr_ack_set(vcpu) ||
Krish Sadhukhan6de84e52018-08-23 20:03:03 -040012207 (vmcs12->posted_intr_nv & 0xff00) ||
12208 (vmcs12->posted_intr_desc_addr & 0x3f) ||
12209 (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
Wincy Van705699a2015-02-03 23:58:17 +080012210 return -EINVAL;
12211
Wincy Vanf2b93282015-02-03 23:56:03 +080012212 /* tpr shadow is needed by all apicv features. */
12213 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
12214 return -EINVAL;
12215
12216 return 0;
Wincy Van3af18d92015-02-03 23:49:31 +080012217}
12218
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012219static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
12220 unsigned long count_field,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012221 unsigned long addr_field)
Wincy Vanff651cb2014-12-11 08:52:58 +030012222{
Liran Alone2536742018-06-23 02:35:02 +030012223 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012224 int maxphyaddr;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012225 u64 count, addr;
12226
Liran Alone2536742018-06-23 02:35:02 +030012227 if (vmcs12_read_any(vmcs12, count_field, &count) ||
12228 vmcs12_read_any(vmcs12, addr_field, &addr)) {
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012229 WARN_ON(1);
12230 return -EINVAL;
12231 }
12232 if (count == 0)
12233 return 0;
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012234 maxphyaddr = cpuid_maxphyaddr(vcpu);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012235 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
12236 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012237 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012238 "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
12239 addr_field, maxphyaddr, count, addr);
12240 return -EINVAL;
12241 }
12242 return 0;
12243}
12244
12245static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
12246 struct vmcs12 *vmcs12)
12247{
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012248 if (vmcs12->vm_exit_msr_load_count == 0 &&
12249 vmcs12->vm_exit_msr_store_count == 0 &&
12250 vmcs12->vm_entry_msr_load_count == 0)
12251 return 0; /* Fast path */
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012252 if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012253 VM_EXIT_MSR_LOAD_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012254 nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012255 VM_EXIT_MSR_STORE_ADDR) ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012256 nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
Eugene Korenevsky92d71bc2015-03-29 23:56:44 +030012257 VM_ENTRY_MSR_LOAD_ADDR))
Wincy Vanff651cb2014-12-11 08:52:58 +030012258 return -EINVAL;
12259 return 0;
12260}
12261
Bandan Dasc5f983f2017-05-05 15:25:14 -040012262static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
12263 struct vmcs12 *vmcs12)
12264{
Krish Sadhukhan55c1dcd2018-09-27 14:33:27 -040012265 if (!nested_cpu_has_pml(vmcs12))
12266 return 0;
Bandan Dasc5f983f2017-05-05 15:25:14 -040012267
Krish Sadhukhan55c1dcd2018-09-27 14:33:27 -040012268 if (!nested_cpu_has_ept(vmcs12) ||
12269 !page_address_valid(vcpu, vmcs12->pml_address))
12270 return -EINVAL;
Bandan Dasc5f983f2017-05-05 15:25:14 -040012271
12272 return 0;
12273}
12274
Liran Alona8a7c022018-06-23 02:35:06 +030012275static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
12276 struct vmcs12 *vmcs12)
12277{
12278 if (!nested_cpu_has_shadow_vmcs(vmcs12))
12279 return 0;
12280
12281 if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
12282 !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
12283 return -EINVAL;
12284
12285 return 0;
12286}
12287
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012288static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
12289 struct vmx_msr_entry *e)
12290{
12291 /* x2APIC MSR accesses are not allowed */
Jan Kiszka8a9781f2015-05-04 08:32:32 +020012292 if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012293 return -EINVAL;
12294 if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
12295 e->index == MSR_IA32_UCODE_REV)
12296 return -EINVAL;
12297 if (e->reserved != 0)
12298 return -EINVAL;
12299 return 0;
12300}
12301
12302static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
12303 struct vmx_msr_entry *e)
Wincy Vanff651cb2014-12-11 08:52:58 +030012304{
12305 if (e->index == MSR_FS_BASE ||
12306 e->index == MSR_GS_BASE ||
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012307 e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
12308 nested_vmx_msr_check_common(vcpu, e))
12309 return -EINVAL;
12310 return 0;
12311}
12312
12313static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
12314 struct vmx_msr_entry *e)
12315{
12316 if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
12317 nested_vmx_msr_check_common(vcpu, e))
Wincy Vanff651cb2014-12-11 08:52:58 +030012318 return -EINVAL;
12319 return 0;
12320}
12321
12322/*
12323 * Load guest's/host's msr at nested entry/exit.
12324 * return 0 for success, entry index for failure.
12325 */
12326static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
12327{
12328 u32 i;
12329 struct vmx_msr_entry e;
12330 struct msr_data msr;
12331
12332 msr.host_initiated = false;
12333 for (i = 0; i < count; i++) {
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012334 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
12335 &e, sizeof(e))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012336 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012337 "%s cannot read MSR entry (%u, 0x%08llx)\n",
12338 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030012339 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012340 }
12341 if (nested_vmx_load_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012342 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012343 "%s check failed (%u, 0x%x, 0x%x)\n",
12344 __func__, i, e.index, e.reserved);
12345 goto fail;
12346 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012347 msr.index = e.index;
12348 msr.data = e.value;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012349 if (kvm_set_msr(vcpu, &msr)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012350 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012351 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
12352 __func__, i, e.index, e.value);
Wincy Vanff651cb2014-12-11 08:52:58 +030012353 goto fail;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012354 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012355 }
12356 return 0;
12357fail:
12358 return i + 1;
12359}
12360
12361static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
12362{
12363 u32 i;
12364 struct vmx_msr_entry e;
12365
12366 for (i = 0; i < count; i++) {
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012367 struct msr_data msr_info;
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012368 if (kvm_vcpu_read_guest(vcpu,
12369 gpa + i * sizeof(e),
12370 &e, 2 * sizeof(u32))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012371 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012372 "%s cannot read MSR entry (%u, 0x%08llx)\n",
12373 __func__, i, gpa + i * sizeof(e));
Wincy Vanff651cb2014-12-11 08:52:58 +030012374 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012375 }
12376 if (nested_vmx_store_msr_check(vcpu, &e)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012377 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012378 "%s check failed (%u, 0x%x, 0x%x)\n",
12379 __func__, i, e.index, e.reserved);
Wincy Vanff651cb2014-12-11 08:52:58 +030012380 return -EINVAL;
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012381 }
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012382 msr_info.host_initiated = false;
12383 msr_info.index = e.index;
12384 if (kvm_get_msr(vcpu, &msr_info)) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012385 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012386 "%s cannot read MSR (%u, 0x%x)\n",
12387 __func__, i, e.index);
12388 return -EINVAL;
12389 }
Paolo Bonzini54bf36a2015-04-08 15:39:23 +020012390 if (kvm_vcpu_write_guest(vcpu,
12391 gpa + i * sizeof(e) +
12392 offsetof(struct vmx_msr_entry, value),
12393 &msr_info.data, sizeof(msr_info.data))) {
Paolo Bonzinibbe41b92016-08-19 17:51:20 +020012394 pr_debug_ratelimited(
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012395 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
Paolo Bonzini609e36d2015-04-08 15:30:38 +020012396 __func__, i, e.index, msr_info.data);
Eugene Korenevskye9ac0332014-12-11 08:53:27 +030012397 return -EINVAL;
12398 }
Wincy Vanff651cb2014-12-11 08:52:58 +030012399 }
12400 return 0;
12401}
12402
Ladi Prosek1dc35da2016-11-30 16:03:11 +010012403static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
12404{
12405 unsigned long invalid_mask;
12406
12407 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
12408 return (val & invalid_mask) == 0;
12409}
12410
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012411/*
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012412 * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
12413 * emulating VM entry into a guest with EPT enabled.
12414 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
12415 * is assigned to entry_failure_code on failure.
12416 */
12417static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
Jim Mattsonca0bde22016-11-30 12:03:46 -080012418 u32 *entry_failure_code)
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012419{
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012420 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
Ladi Prosek1dc35da2016-11-30 16:03:11 +010012421 if (!nested_cr3_valid(vcpu, cr3)) {
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012422 *entry_failure_code = ENTRY_FAIL_DEFAULT;
12423 return 1;
12424 }
12425
12426 /*
12427 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
12428 * must not be dereferenced.
12429 */
12430 if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
12431 !nested_ept) {
12432 if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
12433 *entry_failure_code = ENTRY_FAIL_PDPTE;
12434 return 1;
12435 }
12436 }
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012437 }
12438
Junaid Shahid50c28f22018-06-27 14:59:11 -070012439 if (!nested_ept)
Junaid Shahidade61e22018-06-27 14:59:15 -070012440 kvm_mmu_new_cr3(vcpu, cr3, false);
Junaid Shahid50c28f22018-06-27 14:59:11 -070012441
12442 vcpu->arch.cr3 = cr3;
12443 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
12444
12445 kvm_init_mmu(vcpu, false);
12446
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012447 return 0;
12448}
12449
Liran Alonefebf0a2018-10-08 23:42:20 +030012450/*
12451 * Returns if KVM is able to config CPU to tag TLB entries
12452 * populated by L2 differently than TLB entries populated
12453 * by L1.
12454 *
12455 * If L1 uses EPT, then TLB entries are tagged with different EPTP.
12456 *
12457 * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
12458 * with different VPID (L1 entries are tagged with vmx->vpid
12459 * while L2 entries are tagged with vmx->nested.vpid02).
12460 */
12461static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
12462{
12463 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
12464
12465 return nested_cpu_has_ept(vmcs12) ||
12466 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
12467}
12468
Sean Christopherson3df5c372018-09-26 09:23:44 -070012469static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
12470{
12471 if (vmx->nested.nested_run_pending &&
12472 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
12473 return vmcs12->guest_ia32_efer;
12474 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
12475 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
12476 else
12477 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
12478}
12479
Sean Christopherson09abe322018-09-26 09:23:50 -070012480static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
Paolo Bonzini74a497f2017-12-20 13:55:39 +010012481{
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012482 /*
Sean Christopherson9d6105b2018-09-26 09:23:51 -070012483 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
Sean Christopherson09abe322018-09-26 09:23:50 -070012484 * according to L0's settings (vmcs12 is irrelevant here). Host
12485 * fields that come from L0 and are not constant, e.g. HOST_CR3,
12486 * will be set as needed prior to VMLAUNCH/VMRESUME.
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012487 */
Sean Christopherson9d6105b2018-09-26 09:23:51 -070012488 if (vmx->nested.vmcs02_initialized)
Sean Christopherson09abe322018-09-26 09:23:50 -070012489 return;
Sean Christopherson9d6105b2018-09-26 09:23:51 -070012490 vmx->nested.vmcs02_initialized = true;
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012491
Sean Christopherson52017602018-09-26 09:23:57 -070012492 /*
12493 * We don't care what the EPTP value is we just need to guarantee
12494 * it's valid so we don't get a false positive when doing early
12495 * consistency checks.
12496 */
12497 if (enable_ept && nested_early_check)
12498 vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
12499
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012500 /* All VMFUNCs are currently emulated through L0 vmexits. */
12501 if (cpu_has_vmx_vmfunc())
12502 vmcs_write64(VM_FUNCTION_CONTROL, 0);
12503
Sean Christopherson09abe322018-09-26 09:23:50 -070012504 if (cpu_has_vmx_posted_intr())
12505 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
12506
12507 if (cpu_has_vmx_msr_bitmap())
12508 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
12509
12510 if (enable_pml)
12511 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012512
12513 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012514 * Set the MSR load/store lists to match L0's settings. Only the
12515 * addresses are constant (for vmcs02), the counts can change based
12516 * on L2's behavior, e.g. switching to/from long mode.
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012517 */
12518 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040012519 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040012520 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012521
Sean Christopherson09abe322018-09-26 09:23:50 -070012522 vmx_set_constant_host_state(vmx);
12523}
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012524
Sean Christopherson09abe322018-09-26 09:23:50 -070012525static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
12526 struct vmcs12 *vmcs12)
12527{
12528 prepare_vmcs02_constant_state(vmx);
12529
12530 vmcs_write64(VMCS_LINK_POINTER, -1ull);
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012531
12532 if (enable_vpid) {
12533 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
12534 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
12535 else
12536 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
12537 }
Paolo Bonzini74a497f2017-12-20 13:55:39 +010012538}
12539
Sean Christopherson09abe322018-09-26 09:23:50 -070012540static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012541{
Bandan Das03efce62017-05-05 15:25:15 -040012542 u32 exec_control, vmcs12_exec_ctrl;
Sean Christopherson09abe322018-09-26 09:23:50 -070012543 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012544
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020012545 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
Sean Christopherson09abe322018-09-26 09:23:50 -070012546 prepare_vmcs02_early_full(vmx, vmcs12);
Sean Christopherson9d1887e2018-03-05 09:33:27 -080012547
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012548 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012549 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
12550 * entry, but only if the current (host) sp changed from the value
12551 * we wrote last (vmx->host_rsp). This cache is no longer relevant
12552 * if we switch vmcs, and rather than hold a separate cache per vmcs,
Sean Christopherson52017602018-09-26 09:23:57 -070012553 * here we just force the write to happen on entry. host_rsp will
12554 * also be written unconditionally by nested_vmx_check_vmentry_hw()
12555 * if we are doing early consistency checks via hardware.
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012556 */
Sean Christopherson09abe322018-09-26 09:23:50 -070012557 vmx->host_rsp = 0;
Paolo Bonzini8665c3f2017-12-20 13:56:53 +010012558
Sean Christopherson09abe322018-09-26 09:23:50 -070012559 /*
12560 * PIN CONTROLS
12561 */
Jan Kiszkaf41245002014-03-07 20:03:13 +010012562 exec_control = vmcs12->pin_based_vm_exec_control;
Wincy Van705699a2015-02-03 23:58:17 +080012563
Sean Christophersonf459a702018-08-27 15:21:11 -070012564 /* Preemption timer setting is computed directly in vmx_vcpu_run. */
Paolo Bonzini9314006db2016-07-06 13:23:51 +020012565 exec_control |= vmcs_config.pin_based_exec_ctrl;
Sean Christophersonf459a702018-08-27 15:21:11 -070012566 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
12567 vmx->loaded_vmcs->hv_timer_armed = false;
Paolo Bonzini9314006db2016-07-06 13:23:51 +020012568
12569 /* Posted interrupts setting is only taken from vmcs12. */
Wincy Van705699a2015-02-03 23:58:17 +080012570 if (nested_cpu_has_posted_intr(vmcs12)) {
Wincy Van705699a2015-02-03 23:58:17 +080012571 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
12572 vmx->nested.pi_pending = false;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012573 } else {
Wincy Van705699a2015-02-03 23:58:17 +080012574 exec_control &= ~PIN_BASED_POSTED_INTR;
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012575 }
Jan Kiszkaf41245002014-03-07 20:03:13 +010012576 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012577
Sean Christopherson09abe322018-09-26 09:23:50 -070012578 /*
12579 * EXEC CONTROLS
12580 */
12581 exec_control = vmx_exec_control(vmx); /* L0's desires */
12582 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
12583 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
12584 exec_control &= ~CPU_BASED_TPR_SHADOW;
12585 exec_control |= vmcs12->cpu_based_vm_exec_control;
Jan Kiszka0238ea92013-03-13 11:31:24 +010012586
Sean Christopherson09abe322018-09-26 09:23:50 -070012587 /*
12588 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
12589 * nested_get_vmcs12_pages can't fix it up, the illegal value
12590 * will result in a VM entry failure.
12591 */
12592 if (exec_control & CPU_BASED_TPR_SHADOW) {
12593 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
12594 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
12595 } else {
12596#ifdef CONFIG_X86_64
12597 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
12598 CPU_BASED_CR8_STORE_EXITING;
12599#endif
12600 }
12601
12602 /*
12603 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
12604 * for I/O port accesses.
12605 */
12606 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
12607 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
12608 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
12609
12610 /*
12611 * SECONDARY EXEC CONTROLS
12612 */
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012613 if (cpu_has_secondary_exec_ctrls()) {
Paolo Bonzini80154d72017-08-24 13:55:35 +020012614 exec_control = vmx->secondary_exec_control;
Xiao Guangronge2821622015-09-09 14:05:52 +080012615
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012616 /* Take the following fields only from vmcs12 */
Paolo Bonzini696dfd92014-05-07 11:20:54 +020012617 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
Paolo Bonzini90a2db62017-07-27 13:22:13 +020012618 SECONDARY_EXEC_ENABLE_INVPCID |
Jan Kiszkab3a2a902015-03-23 19:27:19 +010012619 SECONDARY_EXEC_RDTSCP |
Paolo Bonzini3db13482017-08-24 14:48:03 +020012620 SECONDARY_EXEC_XSAVES |
Paolo Bonzini696dfd92014-05-07 11:20:54 +020012621 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
Bandan Das27c42a12017-08-03 15:54:42 -040012622 SECONDARY_EXEC_APIC_REGISTER_VIRT |
12623 SECONDARY_EXEC_ENABLE_VMFUNC);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012624 if (nested_cpu_has(vmcs12,
Bandan Das03efce62017-05-05 15:25:15 -040012625 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
12626 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
12627 ~SECONDARY_EXEC_ENABLE_PML;
12628 exec_control |= vmcs12_exec_ctrl;
12629 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012630
Liran Alon32c7acf2018-06-23 02:35:11 +030012631 /* VMCS shadowing for L2 is emulated for now */
12632 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
12633
Paolo Bonzini25a2e4f2017-12-20 14:05:21 +010012634 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
Wincy Van608406e2015-02-03 23:57:51 +080012635 vmcs_write16(GUEST_INTR_STATUS,
12636 vmcs12->guest_intr_status);
Wincy Van608406e2015-02-03 23:57:51 +080012637
Jim Mattson6beb7bd2016-11-30 12:03:45 -080012638 /*
12639 * Write an illegal value to APIC_ACCESS_ADDR. Later,
12640 * nested_get_vmcs12_pages will either fix it up or
12641 * remove the VM execution control.
12642 */
12643 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
12644 vmcs_write64(APIC_ACCESS_ADDR, -1ull);
12645
Sean Christopherson0b665d32018-08-14 09:33:34 -070012646 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
12647 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
12648
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012649 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
12650 }
12651
Jim Mattson83bafef2016-10-04 10:48:38 -070012652 /*
Sean Christopherson09abe322018-09-26 09:23:50 -070012653 * ENTRY CONTROLS
12654 *
Sean Christopherson3df5c372018-09-26 09:23:44 -070012655 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
Sean Christopherson09abe322018-09-26 09:23:50 -070012656 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
12657 * on the related bits (if supported by the CPU) in the hope that
12658 * we can avoid VMWrites during vmx_set_efer().
Sean Christopherson3df5c372018-09-26 09:23:44 -070012659 */
Sean Christopherson3df5c372018-09-26 09:23:44 -070012660 exec_control = (vmcs12->vm_entry_controls | vmcs_config.vmentry_ctrl) &
12661 ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
12662 if (cpu_has_load_ia32_efer) {
12663 if (guest_efer & EFER_LMA)
12664 exec_control |= VM_ENTRY_IA32E_MODE;
12665 if (guest_efer != host_efer)
12666 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
12667 }
12668 vm_entry_controls_init(vmx, exec_control);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012669
Sean Christopherson09abe322018-09-26 09:23:50 -070012670 /*
12671 * EXIT CONTROLS
12672 *
12673 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
12674 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
12675 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
12676 */
12677 exec_control = vmcs_config.vmexit_ctrl;
12678 if (cpu_has_load_ia32_efer && guest_efer != host_efer)
12679 exec_control |= VM_EXIT_LOAD_IA32_EFER;
12680 vm_exit_controls_init(vmx, exec_control);
12681
12682 /*
12683 * Conceptually we want to copy the PML address and index from
12684 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
12685 * since we always flush the log on each vmexit and never change
12686 * the PML address (once set), this happens to be equivalent to
12687 * simply resetting the index in vmcs02.
12688 */
12689 if (enable_pml)
12690 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
12691
12692 /*
12693 * Interrupt/Exception Fields
12694 */
12695 if (vmx->nested.nested_run_pending) {
12696 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
12697 vmcs12->vm_entry_intr_info_field);
12698 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
12699 vmcs12->vm_entry_exception_error_code);
12700 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
12701 vmcs12->vm_entry_instruction_len);
12702 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
12703 vmcs12->guest_interruptibility_info);
12704 vmx->loaded_vmcs->nmi_known_unmasked =
12705 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
12706 } else {
12707 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
12708 }
12709}
12710
12711static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
12712{
Vitaly Kuznetsovc4ebd622018-10-16 18:50:04 +020012713 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
Sean Christopherson09abe322018-09-26 09:23:50 -070012714
Vitaly Kuznetsovc4ebd622018-10-16 18:50:04 +020012715 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
12716 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
12717 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
12718 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
12719 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
12720 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
12721 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
12722 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
12723 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
12724 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
12725 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
12726 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
12727 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
12728 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
12729 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
12730 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
12731 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
12732 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
12733 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
12734 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
12735 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
12736 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
12737 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
12738 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
12739 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
12740 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
12741 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
12742 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
12743 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
12744 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
12745 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
12746 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
12747 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
12748 }
12749
12750 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
12751 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
12752 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
12753 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
12754 vmcs12->guest_pending_dbg_exceptions);
12755 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
12756 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
12757
12758 /*
12759 * L1 may access the L2's PDPTR, so save them to construct
12760 * vmcs12
12761 */
12762 if (enable_ept) {
12763 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
12764 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
12765 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
12766 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
12767 }
12768 }
Sean Christopherson09abe322018-09-26 09:23:50 -070012769
12770 if (nested_cpu_has_xsaves(vmcs12))
12771 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
12772
12773 /*
12774 * Whether page-faults are trapped is determined by a combination of
12775 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
12776 * If enable_ept, L0 doesn't care about page faults and we should
12777 * set all of these to L1's desires. However, if !enable_ept, L0 does
12778 * care about (at least some) page faults, and because it is not easy
12779 * (if at all possible?) to merge L0 and L1's desires, we simply ask
12780 * to exit on each and every L2 page fault. This is done by setting
12781 * MASK=MATCH=0 and (see below) EB.PF=1.
12782 * Note that below we don't need special code to set EB.PF beyond the
12783 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
12784 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
12785 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
12786 */
12787 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
12788 enable_ept ? vmcs12->page_fault_error_code_mask : 0);
12789 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
12790 enable_ept ? vmcs12->page_fault_error_code_match : 0);
12791
12792 if (cpu_has_vmx_apicv()) {
12793 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
12794 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
12795 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
12796 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
12797 }
12798
12799 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
12800 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
12801
12802 set_cr4_guest_host_mask(vmx);
12803
12804 if (kvm_mpx_supported()) {
12805 if (vmx->nested.nested_run_pending &&
12806 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
12807 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
12808 else
12809 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
12810 }
Sean Christopherson09abe322018-09-26 09:23:50 -070012811}
12812
12813/*
12814 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
12815 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
12816 * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
12817 * guest in a way that will both be appropriate to L1's requests, and our
12818 * needs. In addition to modifying the active vmcs (which is vmcs02), this
12819 * function also has additional necessary side-effects, like setting various
12820 * vcpu->arch fields.
12821 * Returns 0 on success, 1 on failure. Invalid state exit qualification code
12822 * is assigned to entry_failure_code on failure.
12823 */
12824static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
12825 u32 *entry_failure_code)
12826{
12827 struct vcpu_vmx *vmx = to_vmx(vcpu);
Vitaly Kuznetsovc4ebd622018-10-16 18:50:04 +020012828 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
Sean Christopherson09abe322018-09-26 09:23:50 -070012829
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020012830 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
Sean Christopherson09abe322018-09-26 09:23:50 -070012831 prepare_vmcs02_full(vmx, vmcs12);
12832 vmx->nested.dirty_vmcs12 = false;
12833 }
12834
12835 /*
12836 * First, the fields that are shadowed. This must be kept in sync
12837 * with vmx_shadow_fields.h.
12838 */
Vitaly Kuznetsovc4ebd622018-10-16 18:50:04 +020012839 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
12840 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
12841 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
12842 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
12843 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
12844 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
12845 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
12846 }
Sean Christopherson09abe322018-09-26 09:23:50 -070012847
12848 if (vmx->nested.nested_run_pending &&
12849 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
12850 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
12851 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
12852 } else {
12853 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
12854 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
12855 }
12856 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
12857
12858 vmx->nested.preemption_timer_expired = false;
12859 if (nested_cpu_has_preemption_timer(vmcs12))
12860 vmx_start_preemption_timer(vcpu);
12861
12862 /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
12863 * bitwise-or of what L1 wants to trap for L2, and what we want to
12864 * trap. Note that CR0.TS also needs updating - we do this later.
12865 */
12866 update_exception_bitmap(vcpu);
12867 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
12868 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
12869
Jim Mattson6514dc32018-04-26 16:09:12 -070012870 if (vmx->nested.nested_run_pending &&
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012871 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012872 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020012873 vcpu->arch.pat = vmcs12->guest_ia32_pat;
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012874 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012875 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080012876 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012877
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020012878 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
12879
Peter Feinerc95ba922016-08-17 09:36:47 -070012880 if (kvm_has_tsc_control)
12881 decache_tsc_multiplier(vmx);
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012882
12883 if (enable_vpid) {
12884 /*
Wanpeng Li5c614b32015-10-13 09:18:36 -070012885 * There is no direct mapping between vpid02 and vpid12, the
12886 * vpid02 is per-vCPU for L0 and reused while the value of
12887 * vpid12 is changed w/ one invvpid during nested vmentry.
12888 * The vpid12 is allocated by L1 for L2, so it will not
12889 * influence global bitmap(for vpid01 and vpid02 allocation)
12890 * even if spawn a lot of nested vCPUs.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012891 */
Liran Alonefebf0a2018-10-08 23:42:20 +030012892 if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
Wanpeng Li5c614b32015-10-13 09:18:36 -070012893 if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
12894 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
Liran Alonefebf0a2018-10-08 23:42:20 +030012895 __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
Wanpeng Li5c614b32015-10-13 09:18:36 -070012896 }
12897 } else {
Liran Alon14389212018-10-08 23:42:17 +030012898 /*
12899 * If L1 use EPT, then L0 needs to execute INVEPT on
12900 * EPTP02 instead of EPTP01. Therefore, delay TLB
12901 * flush until vmcs02->eptp is fully updated by
12902 * KVM_REQ_LOAD_CR3. Note that this assumes
12903 * KVM_REQ_TLB_FLUSH is evaluated after
12904 * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
12905 */
12906 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Wanpeng Li5c614b32015-10-13 09:18:36 -070012907 }
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012908 }
12909
Sean Christopherson5b8ba412018-09-26 09:23:40 -070012910 if (nested_cpu_has_ept(vmcs12))
12911 nested_ept_init_mmu_context(vcpu);
12912 else if (nested_cpu_has2(vmcs12,
12913 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
Junaid Shahida468f2d2018-04-26 13:09:50 -070012914 vmx_flush_tlb(vcpu, true);
Nadav Har'El155a97a2013-08-05 11:07:16 +030012915
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012916 /*
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080012917 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
12918 * bits which we consider mandatory enabled.
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012919 * The CR0_READ_SHADOW is what L2 should have expected to read given
12920 * the specifications by L1; It's not enough to take
12921 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
12922 * have more bits than L1 expected.
12923 */
12924 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
12925 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
12926
12927 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
12928 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
12929
Sean Christopherson09abe322018-09-26 09:23:50 -070012930 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
Sean Christopherson3df5c372018-09-26 09:23:44 -070012931 /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
David Matlack5a6a9742016-11-29 18:14:10 -080012932 vmx_set_efer(vcpu, vcpu->arch.efer);
12933
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012934 /*
12935 * Guest state is invalid and unrestricted guest is disabled,
12936 * which means L1 attempted VMEntry to L2 with invalid state.
12937 * Fail the VMEntry.
12938 */
Paolo Bonzini3184a992018-03-21 14:20:18 +010012939 if (vmx->emulation_required) {
12940 *entry_failure_code = ENTRY_FAIL_DEFAULT;
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012941 return 1;
Paolo Bonzini3184a992018-03-21 14:20:18 +010012942 }
Sean Christopherson2bb8caf2018-03-12 10:56:13 -070012943
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012944 /* Shadow page tables on either EPT or shadow page tables. */
Ladi Prosek7ad658b2017-03-23 07:18:08 +010012945 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
Ladi Prosek9ed38ffa2016-11-30 16:03:10 +010012946 entry_failure_code))
12947 return 1;
Ladi Prosek7ca29de2016-11-30 16:03:08 +010012948
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030012949 if (!enable_ept)
12950 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
12951
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012952 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
12953 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
Ladi Prosekee146c12016-11-30 16:03:09 +010012954 return 0;
Nadav Har'Elfe3ef052011-05-25 23:10:02 +030012955}
12956
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050012957static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
12958{
12959 if (!nested_cpu_has_nmi_exiting(vmcs12) &&
12960 nested_cpu_has_virtual_nmis(vmcs12))
12961 return -EINVAL;
12962
12963 if (!nested_cpu_has_virtual_nmis(vmcs12) &&
12964 nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
12965 return -EINVAL;
12966
12967 return 0;
12968}
12969
Jim Mattsonca0bde22016-11-30 12:03:46 -080012970static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
12971{
12972 struct vcpu_vmx *vmx = to_vmx(vcpu);
Sean Christopherson64a919f2018-09-26 09:23:39 -070012973 bool ia32e;
Jim Mattsonca0bde22016-11-30 12:03:46 -080012974
12975 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
12976 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
12977 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12978
Krish Sadhukhanba8e23d2018-09-04 14:42:58 -040012979 if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
12980 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12981
Jim Mattson56a20512017-07-06 16:33:06 -070012982 if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
12983 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12984
Jim Mattsonca0bde22016-11-30 12:03:46 -080012985 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
12986 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12987
Krish Sadhukhanf0f4cf52018-04-11 01:10:16 -040012988 if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
12989 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12990
Jim Mattson712b12d2017-08-24 13:24:47 -070012991 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
12992 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12993
Jim Mattsonca0bde22016-11-30 12:03:46 -080012994 if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
12995 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12996
12997 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
12998 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
12999
Bandan Dasc5f983f2017-05-05 15:25:14 -040013000 if (nested_vmx_check_pml_controls(vcpu, vmcs12))
13001 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13002
Liran Alona8a7c022018-06-23 02:35:06 +030013003 if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
13004 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13005
Jim Mattsonca0bde22016-11-30 12:03:46 -080013006 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013007 vmx->nested.msrs.procbased_ctls_low,
13008 vmx->nested.msrs.procbased_ctls_high) ||
Jim Mattson2e5b0bd2017-05-04 11:51:58 -070013009 (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
13010 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013011 vmx->nested.msrs.secondary_ctls_low,
13012 vmx->nested.msrs.secondary_ctls_high)) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080013013 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013014 vmx->nested.msrs.pinbased_ctls_low,
13015 vmx->nested.msrs.pinbased_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080013016 !vmx_control_verify(vmcs12->vm_exit_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013017 vmx->nested.msrs.exit_ctls_low,
13018 vmx->nested.msrs.exit_ctls_high) ||
Jim Mattsonca0bde22016-11-30 12:03:46 -080013019 !vmx_control_verify(vmcs12->vm_entry_controls,
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013020 vmx->nested.msrs.entry_ctls_low,
13021 vmx->nested.msrs.entry_ctls_high))
Jim Mattsonca0bde22016-11-30 12:03:46 -080013022 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13023
Krish Sadhukhan0c7f6502018-02-20 21:24:39 -050013024 if (nested_vmx_check_nmi_controls(vmcs12))
Jim Mattsonca0bde22016-11-30 12:03:46 -080013025 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13026
Bandan Das41ab9372017-08-03 15:54:43 -040013027 if (nested_cpu_has_vmfunc(vmcs12)) {
13028 if (vmcs12->vm_function_control &
Paolo Bonzini6677f3d2018-02-26 13:40:08 +010013029 ~vmx->nested.msrs.vmfunc_controls)
Bandan Das41ab9372017-08-03 15:54:43 -040013030 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13031
13032 if (nested_cpu_has_eptp_switching(vmcs12)) {
13033 if (!nested_cpu_has_ept(vmcs12) ||
13034 !page_address_valid(vcpu, vmcs12->eptp_list_address))
13035 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13036 }
13037 }
Bandan Das27c42a12017-08-03 15:54:42 -040013038
Jim Mattsonc7c2c7092017-05-05 11:28:09 -070013039 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
13040 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13041
Jim Mattsonca0bde22016-11-30 12:03:46 -080013042 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
13043 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
13044 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
13045 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
13046
Marc Orr04473782018-06-20 17:21:29 -070013047 /*
Sean Christopherson64a919f2018-09-26 09:23:39 -070013048 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
13049 * IA32_EFER MSR must be 0 in the field for that register. In addition,
13050 * the values of the LMA and LME bits in the field must each be that of
13051 * the host address-space size VM-exit control.
13052 */
13053 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
13054 ia32e = (vmcs12->vm_exit_controls &
13055 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
13056 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
13057 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
13058 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
13059 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
13060 }
13061
13062 /*
Marc Orr04473782018-06-20 17:21:29 -070013063 * From the Intel SDM, volume 3:
13064 * Fields relevant to VM-entry event injection must be set properly.
13065 * These fields are the VM-entry interruption-information field, the
13066 * VM-entry exception error code, and the VM-entry instruction length.
13067 */
13068 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
13069 u32 intr_info = vmcs12->vm_entry_intr_info_field;
13070 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
13071 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
13072 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
13073 bool should_have_error_code;
13074 bool urg = nested_cpu_has2(vmcs12,
13075 SECONDARY_EXEC_UNRESTRICTED_GUEST);
13076 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
13077
13078 /* VM-entry interruption-info field: interruption type */
13079 if (intr_type == INTR_TYPE_RESERVED ||
13080 (intr_type == INTR_TYPE_OTHER_EVENT &&
13081 !nested_cpu_supports_monitor_trap_flag(vcpu)))
13082 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13083
13084 /* VM-entry interruption-info field: vector */
13085 if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
13086 (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
13087 (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
13088 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13089
13090 /* VM-entry interruption-info field: deliver error code */
13091 should_have_error_code =
13092 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
13093 x86_exception_has_error_code(vector);
13094 if (has_error_code != should_have_error_code)
13095 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13096
13097 /* VM-entry exception error code */
13098 if (has_error_code &&
13099 vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
13100 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13101
13102 /* VM-entry interruption-info field: reserved bits */
13103 if (intr_info & INTR_INFO_RESVD_BITS_MASK)
13104 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13105
13106 /* VM-entry instruction length */
13107 switch (intr_type) {
13108 case INTR_TYPE_SOFT_EXCEPTION:
13109 case INTR_TYPE_SOFT_INTR:
13110 case INTR_TYPE_PRIV_SW_EXCEPTION:
13111 if ((vmcs12->vm_entry_instruction_len > 15) ||
13112 (vmcs12->vm_entry_instruction_len == 0 &&
13113 !nested_cpu_has_zero_length_injection(vcpu)))
13114 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13115 }
13116 }
13117
Sean Christopherson5b8ba412018-09-26 09:23:40 -070013118 if (nested_cpu_has_ept(vmcs12) &&
13119 !valid_ept_address(vcpu, vmcs12->ept_pointer))
13120 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
13121
Jim Mattsonca0bde22016-11-30 12:03:46 -080013122 return 0;
13123}
13124
Liran Alonf145d902018-06-23 02:35:07 +030013125static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
13126 struct vmcs12 *vmcs12)
13127{
13128 int r;
13129 struct page *page;
13130 struct vmcs12 *shadow;
13131
13132 if (vmcs12->vmcs_link_pointer == -1ull)
13133 return 0;
13134
13135 if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
13136 return -EINVAL;
13137
13138 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
13139 if (is_error_page(page))
13140 return -EINVAL;
13141
13142 r = 0;
13143 shadow = kmap(page);
13144 if (shadow->hdr.revision_id != VMCS12_REVISION ||
13145 shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
13146 r = -EINVAL;
13147 kunmap(page);
13148 kvm_release_page_clean(page);
13149 return r;
13150}
13151
Jim Mattsonca0bde22016-11-30 12:03:46 -080013152static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
13153 u32 *exit_qual)
13154{
13155 bool ia32e;
13156
13157 *exit_qual = ENTRY_FAIL_DEFAULT;
13158
13159 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
13160 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
13161 return 1;
13162
Liran Alonf145d902018-06-23 02:35:07 +030013163 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
Jim Mattsonca0bde22016-11-30 12:03:46 -080013164 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
13165 return 1;
13166 }
13167
13168 /*
13169 * If the load IA32_EFER VM-entry control is 1, the following checks
13170 * are performed on the field for the IA32_EFER MSR:
13171 * - Bits reserved in the IA32_EFER MSR must be 0.
13172 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
13173 * the IA-32e mode guest VM-exit control. It must also be identical
13174 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
13175 * CR0.PG) is 1.
13176 */
13177 if (to_vmx(vcpu)->nested.nested_run_pending &&
13178 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
13179 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
13180 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
13181 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
13182 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
13183 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
13184 return 1;
13185 }
13186
Wanpeng Lif1b026a2017-11-05 16:54:48 -080013187 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
13188 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
13189 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
13190 return 1;
13191
Jim Mattsonca0bde22016-11-30 12:03:46 -080013192 return 0;
13193}
13194
Sean Christopherson52017602018-09-26 09:23:57 -070013195static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
13196{
13197 struct vcpu_vmx *vmx = to_vmx(vcpu);
13198 unsigned long cr3, cr4;
13199
13200 if (!nested_early_check)
13201 return 0;
13202
13203 if (vmx->msr_autoload.host.nr)
13204 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
13205 if (vmx->msr_autoload.guest.nr)
13206 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
13207
13208 preempt_disable();
13209
13210 vmx_prepare_switch_to_guest(vcpu);
13211
13212 /*
13213 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
13214 * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
13215 * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
13216 * there is no need to preserve other bits or save/restore the field.
13217 */
13218 vmcs_writel(GUEST_RFLAGS, 0);
13219
13220 vmcs_writel(HOST_RIP, vmx_early_consistency_check_return);
13221
13222 cr3 = __get_current_cr3_fast();
13223 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
13224 vmcs_writel(HOST_CR3, cr3);
13225 vmx->loaded_vmcs->host_state.cr3 = cr3;
13226 }
13227
13228 cr4 = cr4_read_shadow();
13229 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
13230 vmcs_writel(HOST_CR4, cr4);
13231 vmx->loaded_vmcs->host_state.cr4 = cr4;
13232 }
13233
13234 vmx->__launched = vmx->loaded_vmcs->launched;
13235
13236 asm(
13237 /* Set HOST_RSP */
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013238 __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013239 "mov %%" _ASM_SP ", %c[host_rsp](%0)\n\t"
13240
13241 /* Check if vmlaunch of vmresume is needed */
13242 "cmpl $0, %c[launched](%0)\n\t"
13243 "je 1f\n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013244 __ex("vmresume") "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013245 "jmp 2f\n\t"
Uros Bizjak4b1e5472018-10-11 19:40:44 +020013246 "1: " __ex("vmlaunch") "\n\t"
Sean Christopherson52017602018-09-26 09:23:57 -070013247 "jmp 2f\n\t"
13248 "2: "
13249
13250 /* Set vmx->fail accordingly */
13251 "setbe %c[fail](%0)\n\t"
13252
13253 ".pushsection .rodata\n\t"
13254 ".global vmx_early_consistency_check_return\n\t"
13255 "vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t"
13256 ".popsection"
13257 :
13258 : "c"(vmx), "d"((unsigned long)HOST_RSP),
13259 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
13260 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
13261 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp))
13262 : "rax", "cc", "memory"
13263 );
13264
13265 vmcs_writel(HOST_RIP, vmx_return);
13266
13267 preempt_enable();
13268
13269 if (vmx->msr_autoload.host.nr)
13270 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
13271 if (vmx->msr_autoload.guest.nr)
13272 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
13273
13274 if (vmx->fail) {
13275 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
13276 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
13277 vmx->fail = 0;
13278 return 1;
13279 }
13280
13281 /*
13282 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
13283 */
13284 local_irq_enable();
13285 if (hw_breakpoint_active())
13286 set_debugreg(__this_cpu_read(cpu_dr7), 7);
13287
13288 /*
13289 * A non-failing VMEntry means we somehow entered guest mode with
13290 * an illegal RIP, and that's just the tip of the iceberg. There
13291 * is no telling what memory has been modified or what state has
13292 * been exposed to unknown code. Hitting this all but guarantees
13293 * a (very critical) hardware issue.
13294 */
13295 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
13296 VMX_EXIT_REASONS_FAILED_VMENTRY));
13297
13298 return 0;
13299}
13300STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
13301
Sean Christophersona633e412018-09-26 09:23:47 -070013302static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
13303 struct vmcs12 *vmcs12);
13304
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013305/*
Sean Christophersona633e412018-09-26 09:23:47 -070013306 * If from_vmentry is false, this is being called from state restore (either RSM
Jim Mattson8fcc4b52018-07-10 11:27:20 +020013307 * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
Sean Christopherson52017602018-09-26 09:23:57 -070013308+ *
13309+ * Returns:
13310+ * 0 - success, i.e. proceed with actual VMEnter
13311+ * 1 - consistency check VMExit
13312+ * -1 - consistency check VMFail
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013313 */
Sean Christophersona633e412018-09-26 09:23:47 -070013314static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
13315 bool from_vmentry)
Jim Mattson858e25c2016-11-30 12:03:47 -080013316{
13317 struct vcpu_vmx *vmx = to_vmx(vcpu);
13318 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
Paolo Bonzini7e712682018-10-03 13:44:26 +020013319 bool evaluate_pending_interrupts;
Sean Christophersona633e412018-09-26 09:23:47 -070013320 u32 exit_reason = EXIT_REASON_INVALID_STATE;
13321 u32 exit_qual;
Jim Mattson858e25c2016-11-30 12:03:47 -080013322
Paolo Bonzini7e712682018-10-03 13:44:26 +020013323 evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
13324 (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
13325 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
13326 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
Liran Alonb5861e52018-09-03 15:20:22 +030013327
Jim Mattson858e25c2016-11-30 12:03:47 -080013328 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
13329 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
Liran Alon62cf9bd812018-09-14 03:25:54 +030013330 if (kvm_mpx_supported() &&
13331 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
13332 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattson858e25c2016-11-30 12:03:47 -080013333
Jim Mattsonde3a0022017-11-27 17:22:25 -060013334 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
Jim Mattson858e25c2016-11-30 12:03:47 -080013335
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013336 prepare_vmcs02_early(vmx, vmcs12);
13337
13338 if (from_vmentry) {
13339 nested_get_vmcs12_pages(vcpu);
13340
Sean Christopherson52017602018-09-26 09:23:57 -070013341 if (nested_vmx_check_vmentry_hw(vcpu)) {
13342 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
13343 return -1;
13344 }
13345
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013346 if (check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
13347 goto vmentry_fail_vmexit;
13348 }
13349
13350 enter_guest_mode(vcpu);
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013351 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
13352 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
13353
Sean Christophersona633e412018-09-26 09:23:47 -070013354 if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
Sean Christopherson39f9c382018-09-26 09:23:48 -070013355 goto vmentry_fail_vmexit_guest_mode;
Jim Mattson858e25c2016-11-30 12:03:47 -080013356
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013357 if (from_vmentry) {
Sean Christophersona633e412018-09-26 09:23:47 -070013358 exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
13359 exit_qual = nested_vmx_load_msr(vcpu,
13360 vmcs12->vm_entry_msr_load_addr,
13361 vmcs12->vm_entry_msr_load_count);
13362 if (exit_qual)
Sean Christopherson39f9c382018-09-26 09:23:48 -070013363 goto vmentry_fail_vmexit_guest_mode;
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013364 } else {
13365 /*
13366 * The MMU is not initialized to point at the right entities yet and
13367 * "get pages" would need to read data from the guest (i.e. we will
13368 * need to perform gpa to hpa translation). Request a call
13369 * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
13370 * have already been set at vmentry time and should not be reset.
13371 */
13372 kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
13373 }
Jim Mattson858e25c2016-11-30 12:03:47 -080013374
Jim Mattson858e25c2016-11-30 12:03:47 -080013375 /*
Liran Alonb5861e52018-09-03 15:20:22 +030013376 * If L1 had a pending IRQ/NMI until it executed
13377 * VMLAUNCH/VMRESUME which wasn't delivered because it was
13378 * disallowed (e.g. interrupts disabled), L0 needs to
13379 * evaluate if this pending event should cause an exit from L2
13380 * to L1 or delivered directly to L2 (e.g. In case L1 don't
13381 * intercept EXTERNAL_INTERRUPT).
13382 *
Paolo Bonzini7e712682018-10-03 13:44:26 +020013383 * Usually this would be handled by the processor noticing an
13384 * IRQ/NMI window request, or checking RVI during evaluation of
13385 * pending virtual interrupts. However, this setting was done
13386 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
13387 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
Liran Alonb5861e52018-09-03 15:20:22 +030013388 */
Paolo Bonzini7e712682018-10-03 13:44:26 +020013389 if (unlikely(evaluate_pending_interrupts))
Liran Alonb5861e52018-09-03 15:20:22 +030013390 kvm_make_request(KVM_REQ_EVENT, vcpu);
Liran Alonb5861e52018-09-03 15:20:22 +030013391
13392 /*
Jim Mattson858e25c2016-11-30 12:03:47 -080013393 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
13394 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
13395 * returned as far as L1 is concerned. It will only return (and set
13396 * the success flag) when L2 exits (see nested_vmx_vmexit()).
13397 */
13398 return 0;
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013399
Sean Christophersona633e412018-09-26 09:23:47 -070013400 /*
13401 * A failed consistency check that leads to a VMExit during L1's
13402 * VMEnter to L2 is a variation of a normal VMexit, as explained in
13403 * 26.7 "VM-entry failures during or after loading guest state".
13404 */
Sean Christopherson39f9c382018-09-26 09:23:48 -070013405vmentry_fail_vmexit_guest_mode:
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013406 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
13407 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
13408 leave_guest_mode(vcpu);
Sean Christopherson16fb9a42018-09-26 09:23:52 -070013409
13410vmentry_fail_vmexit:
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020013411 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Sean Christophersona633e412018-09-26 09:23:47 -070013412
13413 if (!from_vmentry)
13414 return 1;
13415
Sean Christophersona633e412018-09-26 09:23:47 -070013416 load_vmcs12_host_state(vcpu, vmcs12);
13417 vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
13418 vmcs12->exit_qualification = exit_qual;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013419 if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
13420 vmx->nested.need_vmcs12_sync = true;
Sean Christophersona633e412018-09-26 09:23:47 -070013421 return 1;
Jim Mattson858e25c2016-11-30 12:03:47 -080013422}
13423
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013424/*
13425 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
13426 * for running an L2 nested guest.
13427 */
13428static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
13429{
13430 struct vmcs12 *vmcs12;
13431 struct vcpu_vmx *vmx = to_vmx(vcpu);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070013432 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
Jim Mattsonca0bde22016-11-30 12:03:46 -080013433 int ret;
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013434
Kyle Hueyeb277562016-11-29 12:40:39 -080013435 if (!nested_vmx_check_permission(vcpu))
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013436 return 1;
13437
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020013438 if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true))
Vitaly Kuznetsovb8bbab92018-10-16 18:50:03 +020013439 return 1;
13440
13441 if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013442 return nested_vmx_failInvalid(vcpu);
Kyle Hueyeb277562016-11-29 12:40:39 -080013443
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013444 vmcs12 = get_vmcs12(vcpu);
13445
Liran Alona6192d42018-06-23 02:35:04 +030013446 /*
13447 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
13448 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
13449 * rather than RFLAGS.ZF, and no error number is stored to the
13450 * VM-instruction error field.
13451 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013452 if (vmcs12->hdr.shadow_vmcs)
13453 return nested_vmx_failInvalid(vcpu);
Liran Alona6192d42018-06-23 02:35:04 +030013454
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013455 if (vmx->nested.hv_evmcs) {
13456 copy_enlightened_to_vmcs12(vmx);
13457 /* Enlightened VMCS doesn't have launch state */
13458 vmcs12->launch_state = !launch;
13459 } else if (enable_shadow_vmcs) {
Abel Gordon012f83c2013-04-18 14:39:25 +030013460 copy_shadow_to_vmcs12(vmx);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020013461 }
Abel Gordon012f83c2013-04-18 14:39:25 +030013462
Nadav Har'El7c177932011-05-25 23:12:04 +030013463 /*
13464 * The nested entry process starts with enforcing various prerequisites
13465 * on vmcs12 as required by the Intel SDM, and act appropriately when
13466 * they fail: As the SDM explains, some conditions should cause the
13467 * instruction to fail, while others will cause the instruction to seem
13468 * to succeed, but return an EXIT_REASON_INVALID_STATE.
13469 * To speed up the normal (success) code path, we should avoid checking
13470 * for misconfigurations which will anyway be caught by the processor
13471 * when using the merged vmcs02.
13472 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013473 if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
13474 return nested_vmx_failValid(vcpu,
13475 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
Jim Mattsonb3f1dfb2017-07-17 12:00:34 -070013476
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013477 if (vmcs12->launch_state == launch)
13478 return nested_vmx_failValid(vcpu,
Nadav Har'El7c177932011-05-25 23:12:04 +030013479 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
13480 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
Nadav Har'El7c177932011-05-25 23:12:04 +030013481
Jim Mattsonca0bde22016-11-30 12:03:46 -080013482 ret = check_vmentry_prereqs(vcpu, vmcs12);
Sean Christopherson09abb5e2018-09-26 09:23:55 -070013483 if (ret)
13484 return nested_vmx_failValid(vcpu, ret);
Paolo Bonzini26539bd2013-04-15 15:00:27 +020013485
Nadav Har'El7c177932011-05-25 23:12:04 +030013486 /*
13487 * We're finally done with prerequisite checking, and can start with
13488 * the nested entry.
13489 */
Jim Mattson6514dc32018-04-26 16:09:12 -070013490 vmx->nested.nested_run_pending = 1;
Sean Christophersona633e412018-09-26 09:23:47 -070013491 ret = nested_vmx_enter_non_root_mode(vcpu, true);
Sean Christopherson52017602018-09-26 09:23:57 -070013492 vmx->nested.nested_run_pending = !ret;
13493 if (ret > 0)
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020013494 return 1;
Sean Christopherson52017602018-09-26 09:23:57 -070013495 else if (ret)
13496 return nested_vmx_failValid(vcpu,
13497 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Wincy Vanff651cb2014-12-11 08:52:58 +030013498
Paolo Bonzinic595cee2018-07-02 13:07:14 +020013499 /* Hide L1D cache contents from the nested guest. */
13500 vmx->vcpu.arch.l1tf_flush_l1d = true;
13501
Chao Gao135a06c2018-02-11 10:06:30 +080013502 /*
Sean Christophersond63907d2018-09-26 09:23:45 -070013503 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
Liran Alon61ada742018-06-23 02:35:08 +030013504 * also be used as part of restoring nVMX state for
13505 * snapshot restore (migration).
13506 *
13507 * In this flow, it is assumed that vmcs12 cache was
13508 * trasferred as part of captured nVMX state and should
13509 * therefore not be read from guest memory (which may not
13510 * exist on destination host yet).
13511 */
13512 nested_cache_shadow_vmcs12(vcpu, vmcs12);
13513
13514 /*
Chao Gao135a06c2018-02-11 10:06:30 +080013515 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
13516 * by event injection, halt vcpu.
13517 */
13518 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
Jim Mattson6514dc32018-04-26 16:09:12 -070013519 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
13520 vmx->nested.nested_run_pending = 0;
Joel Schopp5cb56052015-03-02 13:43:31 -060013521 return kvm_vcpu_halt(vcpu);
Jim Mattson6514dc32018-04-26 16:09:12 -070013522 }
Nadav Har'Elcd232ad2011-05-25 23:10:33 +030013523 return 1;
13524}
13525
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013526/*
13527 * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
13528 * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
13529 * This function returns the new value we should put in vmcs12.guest_cr0.
13530 * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
13531 * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
13532 * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
13533 * didn't trap the bit, because if L1 did, so would L0).
13534 * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
13535 * been modified by L2, and L1 knows it. So just leave the old value of
13536 * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
13537 * isn't relevant, because if L0 traps this bit it can set it to anything.
13538 * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
13539 * changed these bits, and therefore they need to be updated, but L0
13540 * didn't necessarily allow them to be changed in GUEST_CR0 - and rather
13541 * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
13542 */
13543static inline unsigned long
13544vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
13545{
13546 return
13547 /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
13548 /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
13549 /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
13550 vcpu->arch.cr0_guest_owned_bits));
13551}
13552
13553static inline unsigned long
13554vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
13555{
13556 return
13557 /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
13558 /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
13559 /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
13560 vcpu->arch.cr4_guest_owned_bits));
13561}
13562
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013563static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
13564 struct vmcs12 *vmcs12)
13565{
13566 u32 idt_vectoring;
13567 unsigned int nr;
13568
Wanpeng Li664f8e22017-08-24 03:35:09 -070013569 if (vcpu->arch.exception.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013570 nr = vcpu->arch.exception.nr;
13571 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
13572
13573 if (kvm_exception_is_soft(nr)) {
13574 vmcs12->vm_exit_instruction_len =
13575 vcpu->arch.event_exit_inst_len;
13576 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
13577 } else
13578 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
13579
13580 if (vcpu->arch.exception.has_error_code) {
13581 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
13582 vmcs12->idt_vectoring_error_code =
13583 vcpu->arch.exception.error_code;
13584 }
13585
13586 vmcs12->idt_vectoring_info_field = idt_vectoring;
Jan Kiszkacd2633c2013-10-23 17:42:15 +010013587 } else if (vcpu->arch.nmi_injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013588 vmcs12->idt_vectoring_info_field =
13589 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
Liran Alon04140b42018-03-23 03:01:31 +030013590 } else if (vcpu->arch.interrupt.injected) {
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013591 nr = vcpu->arch.interrupt.nr;
13592 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
13593
13594 if (vcpu->arch.interrupt.soft) {
13595 idt_vectoring |= INTR_TYPE_SOFT_INTR;
13596 vmcs12->vm_entry_instruction_len =
13597 vcpu->arch.event_exit_inst_len;
13598 } else
13599 idt_vectoring |= INTR_TYPE_EXT_INTR;
13600
13601 vmcs12->idt_vectoring_info_field = idt_vectoring;
13602 }
13603}
13604
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013605static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
13606{
13607 struct vcpu_vmx *vmx = to_vmx(vcpu);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013608 unsigned long exit_qual;
Liran Alon917dc602017-11-05 16:07:43 +020013609 bool block_nested_events =
13610 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
Wanpeng Liacc9ab62017-02-27 04:24:39 -080013611
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013612 if (vcpu->arch.exception.pending &&
13613 nested_vmx_check_exception(vcpu, &exit_qual)) {
Liran Alon917dc602017-11-05 16:07:43 +020013614 if (block_nested_events)
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013615 return -EBUSY;
13616 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
Wanpeng Libfcf83b2017-08-24 03:35:11 -070013617 return 0;
13618 }
13619
Jan Kiszkaf41245002014-03-07 20:03:13 +010013620 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
13621 vmx->nested.preemption_timer_expired) {
Liran Alon917dc602017-11-05 16:07:43 +020013622 if (block_nested_events)
Jan Kiszkaf41245002014-03-07 20:03:13 +010013623 return -EBUSY;
13624 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
13625 return 0;
13626 }
13627
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013628 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020013629 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013630 return -EBUSY;
13631 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
13632 NMI_VECTOR | INTR_TYPE_NMI_INTR |
13633 INTR_INFO_VALID_MASK, 0);
13634 /*
13635 * The NMI-triggered VM exit counts as injection:
13636 * clear this one and block further NMIs.
13637 */
13638 vcpu->arch.nmi_pending = 0;
13639 vmx_set_nmi_mask(vcpu, true);
13640 return 0;
13641 }
13642
13643 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
13644 nested_exit_on_intr(vcpu)) {
Liran Alon917dc602017-11-05 16:07:43 +020013645 if (block_nested_events)
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013646 return -EBUSY;
13647 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
Wincy Van705699a2015-02-03 23:58:17 +080013648 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013649 }
13650
David Hildenbrand6342c502017-01-25 11:58:58 +010013651 vmx_complete_nested_posted_interrupt(vcpu);
13652 return 0;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010013653}
13654
Sean Christophersond264ee02018-08-27 15:21:12 -070013655static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
13656{
13657 to_vmx(vcpu)->req_immediate_exit = true;
13658}
13659
Jan Kiszkaf41245002014-03-07 20:03:13 +010013660static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
13661{
13662 ktime_t remaining =
13663 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
13664 u64 value;
13665
13666 if (ktime_to_ns(remaining) <= 0)
13667 return 0;
13668
13669 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
13670 do_div(value, 1000000);
13671 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
13672}
13673
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013674/*
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013675 * Update the guest state fields of vmcs12 to reflect changes that
13676 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
13677 * VM-entry controls is also updated, since this is really a guest
13678 * state bit.)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013679 */
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013680static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013681{
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013682 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
13683 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
13684
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013685 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
13686 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
13687 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
13688
13689 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
13690 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
13691 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
13692 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
13693 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
13694 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
13695 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
13696 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
13697 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
13698 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
13699 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
13700 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
13701 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
13702 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
13703 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
13704 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
13705 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
13706 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
13707 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
13708 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
13709 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
13710 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
13711 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
13712 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
13713 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
13714 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
13715 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
13716 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
13717 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
13718 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
13719 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
13720 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
13721 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
13722 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
13723 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
13724 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
13725
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013726 vmcs12->guest_interruptibility_info =
13727 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
13728 vmcs12->guest_pending_dbg_exceptions =
13729 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
Jan Kiszka3edf1e62014-01-04 18:47:24 +010013730 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
13731 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
13732 else
13733 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013734
Jan Kiszkaf41245002014-03-07 20:03:13 +010013735 if (nested_cpu_has_preemption_timer(vmcs12)) {
13736 if (vmcs12->vm_exit_controls &
13737 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
13738 vmcs12->vmx_preemption_timer_value =
13739 vmx_get_preemption_timer_value(vcpu);
13740 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
13741 }
Arthur Chunqi Li7854cbc2013-09-16 16:11:44 +080013742
Nadav Har'El3633cfc2013-08-05 11:07:07 +030013743 /*
13744 * In some cases (usually, nested EPT), L2 is allowed to change its
13745 * own CR3 without exiting. If it has changed it, we must keep it.
13746 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
13747 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
13748 *
13749 * Additionally, restore L2's PDPTR to vmcs12.
13750 */
13751 if (enable_ept) {
Paolo Bonzinif3531052015-12-03 15:49:56 +010013752 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
Nadav Har'El3633cfc2013-08-05 11:07:07 +030013753 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
13754 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
13755 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
13756 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
13757 }
13758
Jim Mattsond281e132017-06-01 12:44:46 -070013759 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
Jan Dakinevich119a9c02016-09-04 21:22:47 +030013760
Wincy Van608406e2015-02-03 23:57:51 +080013761 if (nested_cpu_has_vid(vmcs12))
13762 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
13763
Jan Kiszkac18911a2013-03-13 16:06:41 +010013764 vmcs12->vm_entry_controls =
13765 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
Gleb Natapov2961e8762013-11-25 15:37:13 +020013766 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
Jan Kiszkac18911a2013-03-13 16:06:41 +010013767
Jan Kiszka2996fca2014-06-16 13:59:43 +020013768 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
13769 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
13770 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
13771 }
13772
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013773 /* TODO: These cannot have changed unless we have MSR bitmaps and
13774 * the relevant bit asks not to trap the change */
Jan Kiszkab8c07d52013-04-06 13:51:21 +020013775 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013776 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
Jan Kiszka10ba54a2013-08-08 16:26:31 +020013777 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
13778 vmcs12->guest_ia32_efer = vcpu->arch.efer;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013779 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
13780 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
13781 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
Paolo Bonzinia87036a2016-03-08 09:52:13 +010013782 if (kvm_mpx_supported())
Paolo Bonzini36be0b92014-02-24 12:30:04 +010013783 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
Jim Mattsoncf8b84f2016-11-30 12:03:42 -080013784}
13785
13786/*
13787 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
13788 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
13789 * and this function updates it to reflect the changes to the guest state while
13790 * L2 was running (and perhaps made some exits which were handled directly by L0
13791 * without going back to L1), and to reflect the exit reason.
13792 * Note that we do not have to copy here all VMCS fields, just those that
13793 * could have changed by the L2 guest or the exit - i.e., the guest-state and
13794 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
13795 * which already writes to vmcs12 directly.
13796 */
13797static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
13798 u32 exit_reason, u32 exit_intr_info,
13799 unsigned long exit_qualification)
13800{
13801 /* update guest state fields: */
13802 sync_vmcs12(vcpu, vmcs12);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013803
13804 /* update exit information fields: */
13805
Jan Kiszka533558b2014-01-04 18:47:20 +010013806 vmcs12->vm_exit_reason = exit_reason;
13807 vmcs12->exit_qualification = exit_qualification;
Jan Kiszka533558b2014-01-04 18:47:20 +010013808 vmcs12->vm_exit_intr_info = exit_intr_info;
Paolo Bonzini7313c692017-07-27 10:31:25 +020013809
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013810 vmcs12->idt_vectoring_info_field = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013811 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
13812 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
13813
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013814 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
Jim Mattson7cdc2d62017-07-06 16:33:05 -070013815 vmcs12->launch_state = 1;
13816
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013817 /* vm_entry_intr_info_field is cleared on exit. Emulate this
13818 * instead of reading the real value. */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013819 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
Jan Kiszka5f3d5792013-04-14 12:12:46 +020013820
13821 /*
13822 * Transfer the event that L0 or L1 may wanted to inject into
13823 * L2 to IDT_VECTORING_INFO_FIELD.
13824 */
13825 vmcs12_save_pending_event(vcpu, vmcs12);
13826 }
13827
13828 /*
13829 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
13830 * preserved above and would only end up incorrectly in L1.
13831 */
13832 vcpu->arch.nmi_injected = false;
13833 kvm_clear_exception_queue(vcpu);
13834 kvm_clear_interrupt_queue(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013835}
13836
13837/*
13838 * A part of what we need to when the nested L2 guest exits and we want to
13839 * run its L1 parent, is to reset L1's guest state to the host state specified
13840 * in vmcs12.
13841 * This function is to be called not only on normal nested exit, but also on
13842 * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
13843 * Failures During or After Loading Guest State").
13844 * This function should be called when the active VMCS is L1's (vmcs01).
13845 */
Jan Kiszka733568f2013-02-23 15:07:47 +010013846static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
13847 struct vmcs12 *vmcs12)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013848{
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013849 struct kvm_segment seg;
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013850 u32 entry_failure_code;
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013851
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013852 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
13853 vcpu->arch.efer = vmcs12->host_ia32_efer;
Jan Kiszkad1fa0352013-04-14 12:44:54 +020013854 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013855 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
13856 else
13857 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
13858 vmx_set_efer(vcpu, vcpu->arch.efer);
13859
13860 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
13861 kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
H. Peter Anvin1adfa762013-04-27 16:10:11 -070013862 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
Sean Christophersoncb61de22018-09-26 09:23:53 -070013863 vmx_set_interrupt_shadow(vcpu, 0);
13864
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013865 /*
13866 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013867 * actually changed, because vmx_set_cr0 refers to efer set above.
13868 *
13869 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
13870 * (KVM doesn't change it);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013871 */
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013872 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
Jan Kiszka9e3e4dbf2013-09-03 21:11:45 +020013873 vmx_set_cr0(vcpu, vmcs12->host_cr0);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013874
Paolo Bonzinibd7e5b02017-02-03 21:18:52 -080013875 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013876 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
Haozhong Zhang8eb3f872017-10-10 15:01:22 +080013877 vmx_set_cr4(vcpu, vmcs12->host_cr4);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013878
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013879 nested_ept_uninit_mmu_context(vcpu);
13880
13881 /*
13882 * Only PDPTE load can fail as the value of cr3 was checked on entry and
13883 * couldn't have changed.
13884 */
13885 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
13886 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
13887
13888 if (!enable_ept)
13889 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
Gleb Natapovfeaf0c7d2013-09-25 12:51:36 +030013890
Liran Alon6f1e03b2018-05-22 17:16:14 +030013891 /*
Liran Alonefebf0a2018-10-08 23:42:20 +030013892 * If vmcs01 doesn't use VPID, CPU flushes TLB on every
Liran Alon6f1e03b2018-05-22 17:16:14 +030013893 * VMEntry/VMExit. Thus, no need to flush TLB.
13894 *
Liran Alonefebf0a2018-10-08 23:42:20 +030013895 * If vmcs12 doesn't use VPID, L1 expects TLB to be
13896 * flushed on every VMEntry/VMExit.
Liran Alon6f1e03b2018-05-22 17:16:14 +030013897 *
Liran Alonefebf0a2018-10-08 23:42:20 +030013898 * Otherwise, we can preserve TLB entries as long as we are
13899 * able to tag L1 TLB entries differently than L2 TLB entries.
Liran Alon14389212018-10-08 23:42:17 +030013900 *
13901 * If vmcs12 uses EPT, we need to execute this flush on EPTP01
13902 * and therefore we request the TLB flush to happen only after VMCS EPTP
13903 * has been set by KVM_REQ_LOAD_CR3.
Liran Alon6f1e03b2018-05-22 17:16:14 +030013904 */
13905 if (enable_vpid &&
Liran Alonefebf0a2018-10-08 23:42:20 +030013906 (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
Liran Alon14389212018-10-08 23:42:17 +030013907 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013908 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013909
13910 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
13911 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
13912 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
13913 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
13914 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
Ladi Prosek21f2d552017-10-11 16:54:42 +020013915 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
13916 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013917
Paolo Bonzini36be0b92014-02-24 12:30:04 +010013918 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
13919 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
13920 vmcs_write64(GUEST_BNDCFGS, 0);
13921
Jan Kiszka44811c02013-08-04 17:17:27 +020013922 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013923 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
Jan Kiszka44811c02013-08-04 17:17:27 +020013924 vcpu->arch.pat = vmcs12->host_ia32_pat;
13925 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013926 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
13927 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
13928 vmcs12->host_ia32_perf_global_ctrl);
Jan Kiszka503cd0c2013-03-03 13:05:44 +010013929
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013930 /* Set L1 segment info according to Intel SDM
13931 27.5.2 Loading Host Segment and Descriptor-Table Registers */
13932 seg = (struct kvm_segment) {
13933 .base = 0,
13934 .limit = 0xFFFFFFFF,
13935 .selector = vmcs12->host_cs_selector,
13936 .type = 11,
13937 .present = 1,
13938 .s = 1,
13939 .g = 1
13940 };
13941 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
13942 seg.l = 1;
13943 else
13944 seg.db = 1;
13945 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
13946 seg = (struct kvm_segment) {
13947 .base = 0,
13948 .limit = 0xFFFFFFFF,
13949 .type = 3,
13950 .present = 1,
13951 .s = 1,
13952 .db = 1,
13953 .g = 1
13954 };
13955 seg.selector = vmcs12->host_ds_selector;
13956 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
13957 seg.selector = vmcs12->host_es_selector;
13958 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
13959 seg.selector = vmcs12->host_ss_selector;
13960 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
13961 seg.selector = vmcs12->host_fs_selector;
13962 seg.base = vmcs12->host_fs_base;
13963 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
13964 seg.selector = vmcs12->host_gs_selector;
13965 seg.base = vmcs12->host_gs_base;
13966 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
13967 seg = (struct kvm_segment) {
Gleb Natapov205befd2013-08-04 15:08:06 +030013968 .base = vmcs12->host_tr_base,
Arthur Chunqi Li21feb4e2013-07-15 16:04:08 +080013969 .limit = 0x67,
13970 .selector = vmcs12->host_tr_selector,
13971 .type = 11,
13972 .present = 1
13973 };
13974 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
13975
Jan Kiszka503cd0c2013-03-03 13:05:44 +010013976 kvm_set_dr(vcpu, 7, 0x400);
13977 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
Wincy Vanff651cb2014-12-11 08:52:58 +030013978
Wincy Van3af18d92015-02-03 23:49:31 +080013979 if (cpu_has_vmx_msr_bitmap())
Paolo Bonzini904e14f2018-01-16 16:51:18 +010013980 vmx_update_msr_bitmap(vcpu);
Wincy Van3af18d92015-02-03 23:49:31 +080013981
Wincy Vanff651cb2014-12-11 08:52:58 +030013982 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
13983 vmcs12->vm_exit_msr_load_count))
13984 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030013985}
13986
Sean Christophersonbd18bff2018-08-22 14:57:07 -070013987static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
13988{
13989 struct shared_msr_entry *efer_msr;
13990 unsigned int i;
13991
13992 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
13993 return vmcs_read64(GUEST_IA32_EFER);
13994
13995 if (cpu_has_load_ia32_efer)
13996 return host_efer;
13997
13998 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
13999 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
14000 return vmx->msr_autoload.guest.val[i].value;
14001 }
14002
14003 efer_msr = find_msr_entry(vmx, MSR_EFER);
14004 if (efer_msr)
14005 return efer_msr->data;
14006
14007 return host_efer;
14008}
14009
14010static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
14011{
14012 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
14013 struct vcpu_vmx *vmx = to_vmx(vcpu);
14014 struct vmx_msr_entry g, h;
14015 struct msr_data msr;
14016 gpa_t gpa;
14017 u32 i, j;
14018
14019 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
14020
14021 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
14022 /*
14023 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
14024 * as vmcs01.GUEST_DR7 contains a userspace defined value
14025 * and vcpu->arch.dr7 is not squirreled away before the
14026 * nested VMENTER (not worth adding a variable in nested_vmx).
14027 */
14028 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
14029 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
14030 else
14031 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
14032 }
14033
14034 /*
14035 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
14036 * handle a variety of side effects to KVM's software model.
14037 */
14038 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
14039
14040 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
14041 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
14042
14043 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
14044 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
14045
14046 nested_ept_uninit_mmu_context(vcpu);
14047 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
14048 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
14049
14050 /*
14051 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
14052 * from vmcs01 (if necessary). The PDPTRs are not loaded on
14053 * VMFail, like everything else we just need to ensure our
14054 * software model is up-to-date.
14055 */
14056 ept_save_pdptrs(vcpu);
14057
14058 kvm_mmu_reset_context(vcpu);
14059
14060 if (cpu_has_vmx_msr_bitmap())
14061 vmx_update_msr_bitmap(vcpu);
14062
14063 /*
14064 * This nasty bit of open coding is a compromise between blindly
14065 * loading L1's MSRs using the exit load lists (incorrect emulation
14066 * of VMFail), leaving the nested VM's MSRs in the software model
14067 * (incorrect behavior) and snapshotting the modified MSRs (too
14068 * expensive since the lists are unbound by hardware). For each
14069 * MSR that was (prematurely) loaded from the nested VMEntry load
14070 * list, reload it from the exit load list if it exists and differs
14071 * from the guest value. The intent is to stuff host state as
14072 * silently as possible, not to fully process the exit load list.
14073 */
14074 msr.host_initiated = false;
14075 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
14076 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
14077 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
14078 pr_debug_ratelimited(
14079 "%s read MSR index failed (%u, 0x%08llx)\n",
14080 __func__, i, gpa);
14081 goto vmabort;
14082 }
14083
14084 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
14085 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
14086 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
14087 pr_debug_ratelimited(
14088 "%s read MSR failed (%u, 0x%08llx)\n",
14089 __func__, j, gpa);
14090 goto vmabort;
14091 }
14092 if (h.index != g.index)
14093 continue;
14094 if (h.value == g.value)
14095 break;
14096
14097 if (nested_vmx_load_msr_check(vcpu, &h)) {
14098 pr_debug_ratelimited(
14099 "%s check failed (%u, 0x%x, 0x%x)\n",
14100 __func__, j, h.index, h.reserved);
14101 goto vmabort;
14102 }
14103
14104 msr.index = h.index;
14105 msr.data = h.value;
14106 if (kvm_set_msr(vcpu, &msr)) {
14107 pr_debug_ratelimited(
14108 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
14109 __func__, j, h.index, h.value);
14110 goto vmabort;
14111 }
14112 }
14113 }
14114
14115 return;
14116
14117vmabort:
14118 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
14119}
14120
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014121/*
14122 * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
14123 * and modify vmcs12 to make it see what it would expect to see there if
14124 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
14125 */
Jan Kiszka533558b2014-01-04 18:47:20 +010014126static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
14127 u32 exit_intr_info,
14128 unsigned long exit_qualification)
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014129{
14130 struct vcpu_vmx *vmx = to_vmx(vcpu);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014131 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
14132
Jan Kiszka5f3d5792013-04-14 12:12:46 +020014133 /* trying to cancel vmlaunch/vmresume is a bug */
14134 WARN_ON_ONCE(vmx->nested.nested_run_pending);
14135
Jim Mattson4f350c62017-09-14 16:31:44 -070014136 leave_guest_mode(vcpu);
14137
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020014138 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
14139 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
14140
Jim Mattson4f350c62017-09-14 16:31:44 -070014141 if (likely(!vmx->fail)) {
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014142 if (exit_reason == -1)
14143 sync_vmcs12(vcpu, vmcs12);
14144 else
14145 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
14146 exit_qualification);
Jim Mattson4f350c62017-09-14 16:31:44 -070014147
Liran Alon61ada742018-06-23 02:35:08 +030014148 /*
14149 * Must happen outside of sync_vmcs12() as it will
14150 * also be used to capture vmcs12 cache as part of
14151 * capturing nVMX state for snapshot (migration).
14152 *
14153 * Otherwise, this flush will dirty guest memory at a
14154 * point it is already assumed by user-space to be
14155 * immutable.
14156 */
14157 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
14158
Jim Mattson4f350c62017-09-14 16:31:44 -070014159 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
14160 vmcs12->vm_exit_msr_store_count))
14161 nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
Sean Christopherson2768c0c2018-09-26 09:23:58 -070014162 } else {
14163 /*
14164 * The only expected VM-instruction error is "VM entry with
14165 * invalid control field(s)." Anything else indicates a
14166 * problem with L0. And we should never get here with a
14167 * VMFail of any type if early consistency checks are enabled.
14168 */
14169 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
14170 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
14171 WARN_ON_ONCE(nested_early_check);
Bandan Das77b0f5d2014-04-19 18:17:45 -040014172 }
14173
Jim Mattson4f350c62017-09-14 16:31:44 -070014174 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
Jan Kiszka36c3cc42013-02-23 22:35:37 +010014175
Paolo Bonzini9314006db2016-07-06 13:23:51 +020014176 /* Update any VMCS fields that might have changed while L2 ran */
Konrad Rzeszutek Wilk33966dd62018-06-20 13:58:37 -040014177 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
14178 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
Paolo Bonziniea26e4e2016-11-01 00:39:48 +010014179 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
Sean Christophersonf459a702018-08-27 15:21:11 -070014180
Peter Feinerc95ba922016-08-17 09:36:47 -070014181 if (kvm_has_tsc_control)
14182 decache_tsc_multiplier(vmx);
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014183
Jim Mattson8d860bb2018-05-09 16:56:05 -040014184 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
14185 vmx->nested.change_vmcs01_virtual_apic_mode = false;
14186 vmx_set_virtual_apic_mode(vcpu);
Jim Mattsonfb6c8192017-03-16 13:53:59 -070014187 } else if (!nested_cpu_has_ept(vmcs12) &&
14188 nested_cpu_has2(vmcs12,
14189 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
Junaid Shahida468f2d2018-04-26 13:09:50 -070014190 vmx_flush_tlb(vcpu, true);
Radim Krčmářdccbfcf2016-08-08 20:16:23 +020014191 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014192
14193 /* This is needed for same reason as it was needed in prepare_vmcs02 */
14194 vmx->host_rsp = 0;
14195
14196 /* Unpin physical memory we referred to in vmcs02 */
14197 if (vmx->nested.apic_access_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020014198 kvm_release_page_dirty(vmx->nested.apic_access_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014199 vmx->nested.apic_access_page = NULL;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014200 }
Wanpeng Lia7c0b072014-08-21 19:46:50 +080014201 if (vmx->nested.virtual_apic_page) {
David Hildenbrand53a70da2017-08-03 18:11:05 +020014202 kvm_release_page_dirty(vmx->nested.virtual_apic_page);
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014203 vmx->nested.virtual_apic_page = NULL;
Wanpeng Lia7c0b072014-08-21 19:46:50 +080014204 }
Wincy Van705699a2015-02-03 23:58:17 +080014205 if (vmx->nested.pi_desc_page) {
14206 kunmap(vmx->nested.pi_desc_page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020014207 kvm_release_page_dirty(vmx->nested.pi_desc_page);
Wincy Van705699a2015-02-03 23:58:17 +080014208 vmx->nested.pi_desc_page = NULL;
14209 vmx->nested.pi_desc = NULL;
14210 }
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014211
14212 /*
Tang Chen38b99172014-09-24 15:57:54 +080014213 * We are now running in L2, mmu_notifier will force to reload the
14214 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
14215 */
Wanpeng Lic83b6d12016-09-06 17:20:33 +080014216 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
Tang Chen38b99172014-09-24 15:57:54 +080014217
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014218 if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
14219 vmx->nested.need_vmcs12_sync = true;
Jan Kiszkab6b8a142014-03-07 20:03:12 +010014220
14221 /* in case we halted in L2 */
14222 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Jim Mattson4f350c62017-09-14 16:31:44 -070014223
14224 if (likely(!vmx->fail)) {
14225 /*
14226 * TODO: SDM says that with acknowledge interrupt on
14227 * exit, bit 31 of the VM-exit interrupt information
14228 * (valid interrupt) is always set to 1 on
14229 * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
14230 * need kvm_cpu_has_interrupt(). See the commit
14231 * message for details.
14232 */
14233 if (nested_exit_intr_ack_set(vcpu) &&
14234 exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
14235 kvm_cpu_has_interrupt(vcpu)) {
14236 int irq = kvm_cpu_get_interrupt(vcpu);
14237 WARN_ON(irq < 0);
14238 vmcs12->vm_exit_intr_info = irq |
14239 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
14240 }
14241
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014242 if (exit_reason != -1)
14243 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
14244 vmcs12->exit_qualification,
14245 vmcs12->idt_vectoring_info_field,
14246 vmcs12->vm_exit_intr_info,
14247 vmcs12->vm_exit_intr_error_code,
14248 KVM_ISA_VMX);
Jim Mattson4f350c62017-09-14 16:31:44 -070014249
14250 load_vmcs12_host_state(vcpu, vmcs12);
14251
14252 return;
14253 }
Sean Christopherson09abb5e2018-09-26 09:23:55 -070014254
Jim Mattson4f350c62017-09-14 16:31:44 -070014255 /*
14256 * After an early L2 VM-entry failure, we're now back
14257 * in L1 which thinks it just finished a VMLAUNCH or
14258 * VMRESUME instruction, so we need to set the failure
14259 * flag and the VM-instruction error field of the VMCS
Sean Christophersoncb61de22018-09-26 09:23:53 -070014260 * accordingly, and skip the emulated instruction.
Jim Mattson4f350c62017-09-14 16:31:44 -070014261 */
Sean Christopherson09abb5e2018-09-26 09:23:55 -070014262 (void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
Sean Christophersoncb61de22018-09-26 09:23:53 -070014263
Sean Christophersonbd18bff2018-08-22 14:57:07 -070014264 /*
14265 * Restore L1's host state to KVM's software model. We're here
14266 * because a consistency check was caught by hardware, which
14267 * means some amount of guest state has been propagated to KVM's
14268 * model and needs to be unwound to the host's state.
14269 */
14270 nested_vmx_restore_host_state(vcpu);
Wanpeng Li5af41572017-11-05 16:54:49 -080014271
Jim Mattson4f350c62017-09-14 16:31:44 -070014272 vmx->fail = 0;
Nadav Har'El4704d0b2011-05-25 23:11:34 +030014273}
14274
Nadav Har'El7c177932011-05-25 23:12:04 +030014275/*
Jan Kiszka42124922014-01-04 18:47:19 +010014276 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
14277 */
14278static void vmx_leave_nested(struct kvm_vcpu *vcpu)
14279{
Wanpeng Li2f707d92017-03-06 04:03:28 -080014280 if (is_guest_mode(vcpu)) {
14281 to_vmx(vcpu)->nested.nested_run_pending = 0;
Jan Kiszka533558b2014-01-04 18:47:20 +010014282 nested_vmx_vmexit(vcpu, -1, 0, 0);
Wanpeng Li2f707d92017-03-06 04:03:28 -080014283 }
Vitaly Kuznetsov14c07ad2018-10-08 21:28:08 +020014284 free_nested(vcpu);
Jan Kiszka42124922014-01-04 18:47:19 +010014285}
14286
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020014287static int vmx_check_intercept(struct kvm_vcpu *vcpu,
14288 struct x86_instruction_info *info,
14289 enum x86_intercept_stage stage)
14290{
Paolo Bonzinifb6d4d32016-07-12 11:04:26 +020014291 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
14292 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
14293
14294 /*
14295 * RDPID causes #UD if disabled through secondary execution controls.
14296 * Because it is marked as EmulateOnUD, we need to intercept it here.
14297 */
14298 if (info->intercept == x86_intercept_rdtscp &&
14299 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
14300 ctxt->exception.vector = UD_VECTOR;
14301 ctxt->exception.error_code_valid = false;
14302 return X86EMUL_PROPAGATE_FAULT;
14303 }
14304
14305 /* TODO: check more intercepts... */
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020014306 return X86EMUL_CONTINUE;
14307}
14308
Yunhong Jiang64672c92016-06-13 14:19:59 -070014309#ifdef CONFIG_X86_64
14310/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
14311static inline int u64_shl_div_u64(u64 a, unsigned int shift,
14312 u64 divisor, u64 *result)
14313{
14314 u64 low = a << shift, high = a >> (64 - shift);
14315
14316 /* To avoid the overflow on divq */
14317 if (high >= divisor)
14318 return 1;
14319
14320 /* Low hold the result, high hold rem which is discarded */
14321 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
14322 "rm" (divisor), "0" (low), "1" (high));
14323 *result = low;
14324
14325 return 0;
14326}
14327
14328static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
14329{
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020014330 struct vcpu_vmx *vmx;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080014331 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
KarimAllah Ahmed386c6dd2018-04-10 14:15:46 +020014332
14333 if (kvm_mwait_in_guest(vcpu->kvm))
14334 return -EOPNOTSUPP;
14335
14336 vmx = to_vmx(vcpu);
14337 tscl = rdtsc();
14338 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
14339 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
Wanpeng Lic5ce8232018-05-29 14:53:17 +080014340 lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
14341
14342 if (delta_tsc > lapic_timer_advance_cycles)
14343 delta_tsc -= lapic_timer_advance_cycles;
14344 else
14345 delta_tsc = 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014346
14347 /* Convert to host delta tsc if tsc scaling is enabled */
14348 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
14349 u64_shl_div_u64(delta_tsc,
14350 kvm_tsc_scaling_ratio_frac_bits,
14351 vcpu->arch.tsc_scaling_ratio,
14352 &delta_tsc))
14353 return -ERANGE;
14354
14355 /*
14356 * If the delta tsc can't fit in the 32 bit after the multi shift,
14357 * we can't use the preemption timer.
14358 * It's possible that it fits on later vmentries, but checking
14359 * on every vmentry is costly so we just use an hrtimer.
14360 */
14361 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
14362 return -ERANGE;
14363
14364 vmx->hv_deadline_tsc = tscl + delta_tsc;
Wanpeng Lic8533542017-06-29 06:28:09 -070014365 return delta_tsc == 0;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014366}
14367
14368static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
14369{
Sean Christophersonf459a702018-08-27 15:21:11 -070014370 to_vmx(vcpu)->hv_deadline_tsc = -1;
Yunhong Jiang64672c92016-06-13 14:19:59 -070014371}
14372#endif
14373
Paolo Bonzini48d89b92014-08-26 13:27:46 +020014374static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
Radim Krčmářae97a3b2014-08-21 18:08:06 +020014375{
Wanpeng Lib31c1142018-03-12 04:53:04 -070014376 if (!kvm_pause_in_guest(vcpu->kvm))
Radim Krčmářb4a2d312014-08-21 18:08:08 +020014377 shrink_ple_window(vcpu);
Radim Krčmářae97a3b2014-08-21 18:08:06 +020014378}
14379
Kai Huang843e4332015-01-28 10:54:28 +080014380static void vmx_slot_enable_log_dirty(struct kvm *kvm,
14381 struct kvm_memory_slot *slot)
14382{
14383 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
14384 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
14385}
14386
14387static void vmx_slot_disable_log_dirty(struct kvm *kvm,
14388 struct kvm_memory_slot *slot)
14389{
14390 kvm_mmu_slot_set_dirty(kvm, slot);
14391}
14392
14393static void vmx_flush_log_dirty(struct kvm *kvm)
14394{
14395 kvm_flush_pml_buffers(kvm);
14396}
14397
Bandan Dasc5f983f2017-05-05 15:25:14 -040014398static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
14399{
14400 struct vmcs12 *vmcs12;
14401 struct vcpu_vmx *vmx = to_vmx(vcpu);
14402 gpa_t gpa;
14403 struct page *page = NULL;
14404 u64 *pml_address;
14405
14406 if (is_guest_mode(vcpu)) {
14407 WARN_ON_ONCE(vmx->nested.pml_full);
14408
14409 /*
14410 * Check if PML is enabled for the nested guest.
14411 * Whether eptp bit 6 is set is already checked
14412 * as part of A/D emulation.
14413 */
14414 vmcs12 = get_vmcs12(vcpu);
14415 if (!nested_cpu_has_pml(vmcs12))
14416 return 0;
14417
Dan Carpenter47698862017-05-10 22:43:17 +030014418 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
Bandan Dasc5f983f2017-05-05 15:25:14 -040014419 vmx->nested.pml_full = true;
14420 return 1;
14421 }
14422
14423 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
14424
David Hildenbrand5e2f30b2017-08-03 18:11:04 +020014425 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
14426 if (is_error_page(page))
Bandan Dasc5f983f2017-05-05 15:25:14 -040014427 return 0;
14428
14429 pml_address = kmap(page);
14430 pml_address[vmcs12->guest_pml_index--] = gpa;
14431 kunmap(page);
David Hildenbrand53a70da2017-08-03 18:11:05 +020014432 kvm_release_page_clean(page);
Bandan Dasc5f983f2017-05-05 15:25:14 -040014433 }
14434
14435 return 0;
14436}
14437
Kai Huang843e4332015-01-28 10:54:28 +080014438static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
14439 struct kvm_memory_slot *memslot,
14440 gfn_t offset, unsigned long mask)
14441{
14442 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
14443}
14444
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014445static void __pi_post_block(struct kvm_vcpu *vcpu)
14446{
14447 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
14448 struct pi_desc old, new;
14449 unsigned int dest;
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014450
14451 do {
14452 old.control = new.control = pi_desc->control;
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014453 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
14454 "Wakeup handler not enabled while the VCPU is blocked\n");
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014455
14456 dest = cpu_physical_id(vcpu->cpu);
14457
14458 if (x2apic_enabled())
14459 new.ndst = dest;
14460 else
14461 new.ndst = (dest << 8) & 0xFF00;
14462
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014463 /* set 'NV' to 'notification vector' */
14464 new.nv = POSTED_INTR_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020014465 } while (cmpxchg64(&pi_desc->control, old.control,
14466 new.control) != old.control);
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014467
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014468 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
14469 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014470 list_del(&vcpu->blocked_vcpu_list);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014471 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014472 vcpu->pre_pcpu = -1;
14473 }
14474}
14475
Feng Wuefc64402015-09-18 22:29:51 +080014476/*
Feng Wubf9f6ac2015-09-18 22:29:55 +080014477 * This routine does the following things for vCPU which is going
14478 * to be blocked if VT-d PI is enabled.
14479 * - Store the vCPU to the wakeup list, so when interrupts happen
14480 * we can find the right vCPU to wake up.
14481 * - Change the Posted-interrupt descriptor as below:
14482 * 'NDST' <-- vcpu->pre_pcpu
14483 * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
14484 * - If 'ON' is set during this process, which means at least one
14485 * interrupt is posted for this vCPU, we cannot block it, in
14486 * this case, return 1, otherwise, return 0.
14487 *
14488 */
Yunhong Jiangbc225122016-06-13 14:19:58 -070014489static int pi_pre_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014490{
Feng Wubf9f6ac2015-09-18 22:29:55 +080014491 unsigned int dest;
14492 struct pi_desc old, new;
14493 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
14494
14495 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080014496 !irq_remapping_cap(IRQ_POSTING_CAP) ||
14497 !kvm_vcpu_apicv_active(vcpu))
Feng Wubf9f6ac2015-09-18 22:29:55 +080014498 return 0;
14499
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014500 WARN_ON(irqs_disabled());
14501 local_irq_disable();
14502 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
14503 vcpu->pre_pcpu = vcpu->cpu;
14504 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
14505 list_add_tail(&vcpu->blocked_vcpu_list,
14506 &per_cpu(blocked_vcpu_on_cpu,
14507 vcpu->pre_pcpu));
14508 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
14509 }
Feng Wubf9f6ac2015-09-18 22:29:55 +080014510
14511 do {
14512 old.control = new.control = pi_desc->control;
14513
Feng Wubf9f6ac2015-09-18 22:29:55 +080014514 WARN((pi_desc->sn == 1),
14515 "Warning: SN field of posted-interrupts "
14516 "is set before blocking\n");
14517
14518 /*
14519 * Since vCPU can be preempted during this process,
14520 * vcpu->cpu could be different with pre_pcpu, we
14521 * need to set pre_pcpu as the destination of wakeup
14522 * notification event, then we can find the right vCPU
14523 * to wakeup in wakeup handler if interrupts happen
14524 * when the vCPU is in blocked state.
14525 */
14526 dest = cpu_physical_id(vcpu->pre_pcpu);
14527
14528 if (x2apic_enabled())
14529 new.ndst = dest;
14530 else
14531 new.ndst = (dest << 8) & 0xFF00;
14532
14533 /* set 'NV' to 'wakeup vector' */
14534 new.nv = POSTED_INTR_WAKEUP_VECTOR;
Paolo Bonzinic0a16662017-09-28 17:58:41 +020014535 } while (cmpxchg64(&pi_desc->control, old.control,
14536 new.control) != old.control);
Feng Wubf9f6ac2015-09-18 22:29:55 +080014537
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014538 /* We should not block the vCPU if an interrupt is posted for it. */
14539 if (pi_test_on(pi_desc) == 1)
14540 __pi_post_block(vcpu);
14541
14542 local_irq_enable();
14543 return (vcpu->pre_pcpu == -1);
Feng Wubf9f6ac2015-09-18 22:29:55 +080014544}
14545
Yunhong Jiangbc225122016-06-13 14:19:58 -070014546static int vmx_pre_block(struct kvm_vcpu *vcpu)
14547{
14548 if (pi_pre_block(vcpu))
14549 return 1;
14550
Yunhong Jiang64672c92016-06-13 14:19:59 -070014551 if (kvm_lapic_hv_timer_in_use(vcpu))
14552 kvm_lapic_switch_to_sw_timer(vcpu);
14553
Yunhong Jiangbc225122016-06-13 14:19:58 -070014554 return 0;
14555}
14556
14557static void pi_post_block(struct kvm_vcpu *vcpu)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014558{
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014559 if (vcpu->pre_pcpu == -1)
Feng Wubf9f6ac2015-09-18 22:29:55 +080014560 return;
14561
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014562 WARN_ON(irqs_disabled());
14563 local_irq_disable();
Paolo Bonzinicd39e112017-06-06 12:57:04 +020014564 __pi_post_block(vcpu);
Paolo Bonzini8b306e22017-06-06 12:57:05 +020014565 local_irq_enable();
Feng Wubf9f6ac2015-09-18 22:29:55 +080014566}
14567
Yunhong Jiangbc225122016-06-13 14:19:58 -070014568static void vmx_post_block(struct kvm_vcpu *vcpu)
14569{
Yunhong Jiang64672c92016-06-13 14:19:59 -070014570 if (kvm_x86_ops->set_hv_timer)
14571 kvm_lapic_switch_to_hv_timer(vcpu);
14572
Yunhong Jiangbc225122016-06-13 14:19:58 -070014573 pi_post_block(vcpu);
14574}
14575
Feng Wubf9f6ac2015-09-18 22:29:55 +080014576/*
Feng Wuefc64402015-09-18 22:29:51 +080014577 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
14578 *
14579 * @kvm: kvm
14580 * @host_irq: host irq of the interrupt
14581 * @guest_irq: gsi of the interrupt
14582 * @set: set or unset PI
14583 * returns 0 on success, < 0 on failure
14584 */
14585static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
14586 uint32_t guest_irq, bool set)
14587{
14588 struct kvm_kernel_irq_routing_entry *e;
14589 struct kvm_irq_routing_table *irq_rt;
14590 struct kvm_lapic_irq irq;
14591 struct kvm_vcpu *vcpu;
14592 struct vcpu_data vcpu_info;
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010014593 int idx, ret = 0;
Feng Wuefc64402015-09-18 22:29:51 +080014594
14595 if (!kvm_arch_has_assigned_device(kvm) ||
Yang Zhanga0052192016-06-13 09:56:56 +080014596 !irq_remapping_cap(IRQ_POSTING_CAP) ||
14597 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
Feng Wuefc64402015-09-18 22:29:51 +080014598 return 0;
14599
14600 idx = srcu_read_lock(&kvm->irq_srcu);
14601 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
Jan H. Schönherr3a8b0672017-09-07 19:02:30 +010014602 if (guest_irq >= irq_rt->nr_rt_entries ||
14603 hlist_empty(&irq_rt->map[guest_irq])) {
14604 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
14605 guest_irq, irq_rt->nr_rt_entries);
14606 goto out;
14607 }
Feng Wuefc64402015-09-18 22:29:51 +080014608
14609 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
14610 if (e->type != KVM_IRQ_ROUTING_MSI)
14611 continue;
14612 /*
14613 * VT-d PI cannot support posting multicast/broadcast
14614 * interrupts to a vCPU, we still use interrupt remapping
14615 * for these kind of interrupts.
14616 *
14617 * For lowest-priority interrupts, we only support
14618 * those with single CPU as the destination, e.g. user
14619 * configures the interrupts via /proc/irq or uses
14620 * irqbalance to make the interrupts single-CPU.
14621 *
14622 * We will support full lowest-priority interrupt later.
14623 */
14624
Radim Krčmář371313132016-07-12 22:09:27 +020014625 kvm_set_msi_irq(kvm, e, &irq);
Feng Wu23a1c252016-01-25 16:53:32 +080014626 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
14627 /*
14628 * Make sure the IRTE is in remapped mode if
14629 * we don't handle it in posted mode.
14630 */
14631 ret = irq_set_vcpu_affinity(host_irq, NULL);
14632 if (ret < 0) {
14633 printk(KERN_INFO
14634 "failed to back to remapped mode, irq: %u\n",
14635 host_irq);
14636 goto out;
14637 }
14638
Feng Wuefc64402015-09-18 22:29:51 +080014639 continue;
Feng Wu23a1c252016-01-25 16:53:32 +080014640 }
Feng Wuefc64402015-09-18 22:29:51 +080014641
14642 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
14643 vcpu_info.vector = irq.vector;
14644
hu huajun2698d822018-04-11 15:16:40 +080014645 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
Feng Wuefc64402015-09-18 22:29:51 +080014646 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
14647
14648 if (set)
14649 ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
Haozhong Zhangdc91f2e2017-09-18 09:56:49 +080014650 else
Feng Wuefc64402015-09-18 22:29:51 +080014651 ret = irq_set_vcpu_affinity(host_irq, NULL);
Feng Wuefc64402015-09-18 22:29:51 +080014652
14653 if (ret < 0) {
14654 printk(KERN_INFO "%s: failed to update PI IRTE\n",
14655 __func__);
14656 goto out;
14657 }
14658 }
14659
14660 ret = 0;
14661out:
14662 srcu_read_unlock(&kvm->irq_srcu, idx);
14663 return ret;
14664}
14665
Ashok Rajc45dcc72016-06-22 14:59:56 +080014666static void vmx_setup_mce(struct kvm_vcpu *vcpu)
14667{
14668 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
14669 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
14670 FEATURE_CONTROL_LMCE;
14671 else
14672 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
14673 ~FEATURE_CONTROL_LMCE;
14674}
14675
Ladi Prosek72d7b372017-10-11 16:54:41 +020014676static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
14677{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014678 /* we need a nested vmexit to enter SMM, postpone if run is pending */
14679 if (to_vmx(vcpu)->nested.nested_run_pending)
14680 return 0;
Ladi Prosek72d7b372017-10-11 16:54:41 +020014681 return 1;
14682}
14683
Ladi Prosek0234bf82017-10-11 16:54:40 +020014684static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
14685{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014686 struct vcpu_vmx *vmx = to_vmx(vcpu);
14687
14688 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
14689 if (vmx->nested.smm.guest_mode)
14690 nested_vmx_vmexit(vcpu, -1, 0, 0);
14691
14692 vmx->nested.smm.vmxon = vmx->nested.vmxon;
14693 vmx->nested.vmxon = false;
Wanpeng Licaa057a2018-03-12 04:53:03 -070014694 vmx_clear_hlt(vcpu);
Ladi Prosek0234bf82017-10-11 16:54:40 +020014695 return 0;
14696}
14697
14698static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
14699{
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014700 struct vcpu_vmx *vmx = to_vmx(vcpu);
14701 int ret;
14702
14703 if (vmx->nested.smm.vmxon) {
14704 vmx->nested.vmxon = true;
14705 vmx->nested.smm.vmxon = false;
14706 }
14707
14708 if (vmx->nested.smm.guest_mode) {
14709 vcpu->arch.hflags &= ~HF_SMM_MASK;
Sean Christophersona633e412018-09-26 09:23:47 -070014710 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Ladi Prosek72e9cbd2017-10-11 16:54:43 +020014711 vcpu->arch.hflags |= HF_SMM_MASK;
14712 if (ret)
14713 return ret;
14714
14715 vmx->nested.smm.guest_mode = false;
14716 }
Ladi Prosek0234bf82017-10-11 16:54:40 +020014717 return 0;
14718}
14719
Ladi Prosekcc3d9672017-10-17 16:02:39 +020014720static int enable_smi_window(struct kvm_vcpu *vcpu)
14721{
14722 return 0;
14723}
14724
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014725static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
14726{
14727 struct vcpu_vmx *vmx = to_vmx(vcpu);
14728
14729 /*
14730 * In case we do two consecutive get/set_nested_state()s while L2 was
14731 * running hv_evmcs may end up not being mapped (we map it from
14732 * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
14733 * have vmcs12 if it is true.
14734 */
14735 return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
14736 vmx->nested.hv_evmcs;
14737}
14738
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014739static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
14740 struct kvm_nested_state __user *user_kvm_nested_state,
14741 u32 user_data_size)
14742{
14743 struct vcpu_vmx *vmx;
14744 struct vmcs12 *vmcs12;
14745 struct kvm_nested_state kvm_state = {
14746 .flags = 0,
14747 .format = 0,
14748 .size = sizeof(kvm_state),
14749 .vmx.vmxon_pa = -1ull,
14750 .vmx.vmcs_pa = -1ull,
14751 };
14752
14753 if (!vcpu)
14754 return kvm_state.size + 2 * VMCS12_SIZE;
14755
14756 vmx = to_vmx(vcpu);
14757 vmcs12 = get_vmcs12(vcpu);
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014758
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014759 if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
14760 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014761
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014762 if (nested_vmx_allowed(vcpu) &&
14763 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
14764 kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
14765 kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
14766
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014767 if (vmx_has_valid_vmcs12(vcpu)) {
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014768 kvm_state.size += VMCS12_SIZE;
14769
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014770 if (is_guest_mode(vcpu) &&
14771 nested_cpu_has_shadow_vmcs(vmcs12) &&
14772 vmcs12->vmcs_link_pointer != -1ull)
14773 kvm_state.size += VMCS12_SIZE;
14774 }
14775
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014776 if (vmx->nested.smm.vmxon)
14777 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
14778
14779 if (vmx->nested.smm.guest_mode)
14780 kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
14781
14782 if (is_guest_mode(vcpu)) {
14783 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
14784
14785 if (vmx->nested.nested_run_pending)
14786 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
14787 }
14788 }
14789
14790 if (user_data_size < kvm_state.size)
14791 goto out;
14792
14793 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
14794 return -EFAULT;
14795
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014796 if (!vmx_has_valid_vmcs12(vcpu))
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014797 goto out;
14798
14799 /*
14800 * When running L2, the authoritative vmcs12 state is in the
14801 * vmcs02. When running L1, the authoritative vmcs12 state is
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014802 * in the shadow or enlightened vmcs linked to vmcs01, unless
Vitaly Kuznetsov945679e2018-10-16 18:50:02 +020014803 * need_vmcs12_sync is set, in which case, the authoritative
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014804 * vmcs12 state is in the vmcs12 already.
14805 */
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014806 if (is_guest_mode(vcpu)) {
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014807 sync_vmcs12(vcpu, vmcs12);
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014808 } else if (!vmx->nested.need_vmcs12_sync) {
14809 if (vmx->nested.hv_evmcs)
14810 copy_enlightened_to_vmcs12(vmx);
14811 else if (enable_shadow_vmcs)
14812 copy_shadow_to_vmcs12(vmx);
14813 }
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014814
14815 if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
14816 return -EFAULT;
14817
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014818 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
14819 vmcs12->vmcs_link_pointer != -1ull) {
14820 if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
14821 get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
14822 return -EFAULT;
14823 }
14824
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014825out:
14826 return kvm_state.size;
14827}
14828
14829static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
14830 struct kvm_nested_state __user *user_kvm_nested_state,
14831 struct kvm_nested_state *kvm_state)
14832{
14833 struct vcpu_vmx *vmx = to_vmx(vcpu);
14834 struct vmcs12 *vmcs12;
14835 u32 exit_qual;
14836 int ret;
14837
14838 if (kvm_state->format != 0)
14839 return -EINVAL;
14840
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014841 if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
14842 nested_enable_evmcs(vcpu, NULL);
14843
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014844 if (!nested_vmx_allowed(vcpu))
14845 return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
14846
14847 if (kvm_state->vmx.vmxon_pa == -1ull) {
14848 if (kvm_state->vmx.smm.flags)
14849 return -EINVAL;
14850
14851 if (kvm_state->vmx.vmcs_pa != -1ull)
14852 return -EINVAL;
14853
14854 vmx_leave_nested(vcpu);
14855 return 0;
14856 }
14857
14858 if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
14859 return -EINVAL;
14860
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014861 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
14862 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
14863 return -EINVAL;
14864
14865 if (kvm_state->vmx.smm.flags &
14866 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
14867 return -EINVAL;
14868
Paolo Bonzini5bea5122018-09-18 15:19:17 +020014869 /*
14870 * SMM temporarily disables VMX, so we cannot be in guest mode,
14871 * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
14872 * must be zero.
14873 */
14874 if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
14875 return -EINVAL;
14876
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014877 if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
14878 !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
14879 return -EINVAL;
14880
14881 vmx_leave_nested(vcpu);
14882 if (kvm_state->vmx.vmxon_pa == -1ull)
14883 return 0;
14884
14885 vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
14886 ret = enter_vmx_operation(vcpu);
14887 if (ret)
14888 return ret;
14889
Vitaly Kuznetsova1b0c1c2018-10-16 18:50:07 +020014890 /* Empty 'VMXON' state is permitted */
14891 if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
14892 return 0;
14893
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014894 if (kvm_state->vmx.vmcs_pa != -1ull) {
14895 if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
14896 !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
14897 return -EINVAL;
Vitaly Kuznetsova1b0c1c2018-10-16 18:50:07 +020014898
Vitaly Kuznetsov8cab6502018-10-16 18:50:09 +020014899 set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
14900 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
14901 /*
14902 * Sync eVMCS upon entry as we may not have
14903 * HV_X64_MSR_VP_ASSIST_PAGE set up yet.
14904 */
14905 vmx->nested.need_vmcs12_sync = true;
14906 } else {
14907 return -EINVAL;
14908 }
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014909
14910 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
14911 vmx->nested.smm.vmxon = true;
14912 vmx->nested.vmxon = false;
14913
14914 if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
14915 vmx->nested.smm.guest_mode = true;
14916 }
14917
14918 vmcs12 = get_vmcs12(vcpu);
14919 if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
14920 return -EFAULT;
14921
Liran Alon392b2f22018-06-23 02:35:01 +030014922 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014923 return -EINVAL;
14924
14925 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
14926 return 0;
14927
14928 vmx->nested.nested_run_pending =
14929 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
14930
Paolo Bonzinifa58a9f2018-07-18 19:45:51 +020014931 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
14932 vmcs12->vmcs_link_pointer != -1ull) {
14933 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
14934 if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
14935 return -EINVAL;
14936
14937 if (copy_from_user(shadow_vmcs12,
14938 user_kvm_nested_state->data + VMCS12_SIZE,
14939 sizeof(*vmcs12)))
14940 return -EFAULT;
14941
14942 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
14943 !shadow_vmcs12->hdr.shadow_vmcs)
14944 return -EINVAL;
14945 }
14946
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014947 if (check_vmentry_prereqs(vcpu, vmcs12) ||
14948 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
14949 return -EINVAL;
14950
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014951 vmx->nested.dirty_vmcs12 = true;
Sean Christophersona633e412018-09-26 09:23:47 -070014952 ret = nested_vmx_enter_non_root_mode(vcpu, false);
Jim Mattson8fcc4b52018-07-10 11:27:20 +020014953 if (ret)
14954 return -EINVAL;
14955
14956 return 0;
14957}
14958
Kees Cook404f6aa2016-08-08 16:29:06 -070014959static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
Avi Kivity6aa8b732006-12-10 02:21:36 -080014960 .cpu_has_kvm_support = cpu_has_kvm_support,
14961 .disabled_by_bios = vmx_disabled_by_bios,
14962 .hardware_setup = hardware_setup,
14963 .hardware_unsetup = hardware_unsetup,
Yang, Sheng002c7f72007-07-31 14:23:01 +030014964 .check_processor_compatibility = vmx_check_processor_compat,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014965 .hardware_enable = hardware_enable,
14966 .hardware_disable = hardware_disable,
Sheng Yang04547152009-04-01 15:52:31 +080014967 .cpu_has_accelerated_tpr = report_flexpriority,
Tom Lendackybc226f02018-05-10 22:06:39 +020014968 .has_emulated_msr = vmx_has_emulated_msr,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014969
Wanpeng Lib31c1142018-03-12 04:53:04 -070014970 .vm_init = vmx_vm_init,
Sean Christopherson434a1e92018-03-20 12:17:18 -070014971 .vm_alloc = vmx_vm_alloc,
14972 .vm_free = vmx_vm_free,
Wanpeng Lib31c1142018-03-12 04:53:04 -070014973
Avi Kivity6aa8b732006-12-10 02:21:36 -080014974 .vcpu_create = vmx_create_vcpu,
14975 .vcpu_free = vmx_free_vcpu,
Avi Kivity04d2cc72007-09-10 18:10:54 +030014976 .vcpu_reset = vmx_vcpu_reset,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014977
Sean Christopherson6d6095b2018-07-23 12:32:44 -070014978 .prepare_guest_switch = vmx_prepare_switch_to_guest,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014979 .vcpu_load = vmx_vcpu_load,
14980 .vcpu_put = vmx_vcpu_put,
14981
Paolo Bonzinia96036b2015-11-10 11:55:36 +010014982 .update_bp_intercept = update_exception_bitmap,
Tom Lendacky801e4592018-02-21 13:39:51 -060014983 .get_msr_feature = vmx_get_msr_feature,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014984 .get_msr = vmx_get_msr,
14985 .set_msr = vmx_set_msr,
14986 .get_segment_base = vmx_get_segment_base,
14987 .get_segment = vmx_get_segment,
14988 .set_segment = vmx_set_segment,
Izik Eidus2e4d2652008-03-24 19:38:34 +020014989 .get_cpl = vmx_get_cpl,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014990 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
Avi Kivitye8467fd2009-12-29 18:43:06 +020014991 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
Avi Kivityaff48ba2010-12-05 18:56:11 +020014992 .decache_cr3 = vmx_decache_cr3,
Anthony Liguori25c4c272007-04-27 09:29:21 +030014993 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014994 .set_cr0 = vmx_set_cr0,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014995 .set_cr3 = vmx_set_cr3,
14996 .set_cr4 = vmx_set_cr4,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014997 .set_efer = vmx_set_efer,
Avi Kivity6aa8b732006-12-10 02:21:36 -080014998 .get_idt = vmx_get_idt,
14999 .set_idt = vmx_set_idt,
15000 .get_gdt = vmx_get_gdt,
15001 .set_gdt = vmx_set_gdt,
Jan Kiszka73aaf249e2014-01-04 18:47:16 +010015002 .get_dr6 = vmx_get_dr6,
15003 .set_dr6 = vmx_set_dr6,
Gleb Natapov020df072010-04-13 10:05:23 +030015004 .set_dr7 = vmx_set_dr7,
Paolo Bonzini81908bf2014-02-21 10:32:27 +010015005 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030015006 .cache_reg = vmx_cache_reg,
Avi Kivity6aa8b732006-12-10 02:21:36 -080015007 .get_rflags = vmx_get_rflags,
15008 .set_rflags = vmx_set_rflags,
Huaitong Hanbe94f6b2016-03-22 16:51:20 +080015009
Avi Kivity6aa8b732006-12-10 02:21:36 -080015010 .tlb_flush = vmx_flush_tlb,
Junaid Shahidfaff8752018-06-29 13:10:05 -070015011 .tlb_flush_gva = vmx_flush_tlb_gva,
Avi Kivity6aa8b732006-12-10 02:21:36 -080015012
Avi Kivity6aa8b732006-12-10 02:21:36 -080015013 .run = vmx_vcpu_run,
Avi Kivity6062d012009-03-23 17:35:17 +020015014 .handle_exit = vmx_handle_exit,
Avi Kivity6aa8b732006-12-10 02:21:36 -080015015 .skip_emulated_instruction = skip_emulated_instruction,
Glauber Costa2809f5d2009-05-12 16:21:05 -040015016 .set_interrupt_shadow = vmx_set_interrupt_shadow,
15017 .get_interrupt_shadow = vmx_get_interrupt_shadow,
Ingo Molnar102d8322007-02-19 14:37:47 +020015018 .patch_hypercall = vmx_patch_hypercall,
Eddie Dong2a8067f2007-08-06 16:29:07 +030015019 .set_irq = vmx_inject_irq,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030015020 .set_nmi = vmx_inject_nmi,
Avi Kivity298101d2007-11-25 13:41:11 +020015021 .queue_exception = vmx_queue_exception,
Avi Kivityb463a6f2010-07-20 15:06:17 +030015022 .cancel_injection = vmx_cancel_injection,
Gleb Natapov78646122009-03-23 12:12:11 +020015023 .interrupt_allowed = vmx_interrupt_allowed,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030015024 .nmi_allowed = vmx_nmi_allowed,
Jan Kiszka3cfc3092009-11-12 01:04:25 +010015025 .get_nmi_mask = vmx_get_nmi_mask,
15026 .set_nmi_mask = vmx_set_nmi_mask,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030015027 .enable_nmi_window = enable_nmi_window,
15028 .enable_irq_window = enable_irq_window,
15029 .update_cr8_intercept = update_cr8_intercept,
Jim Mattson8d860bb2018-05-09 16:56:05 -040015030 .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
Tang Chen38b99172014-09-24 15:57:54 +080015031 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
Andrey Smetanind62caab2015-11-10 15:36:33 +030015032 .get_enable_apicv = vmx_get_enable_apicv,
15033 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
Yang Zhangc7c9c562013-01-25 10:18:51 +080015034 .load_eoi_exitmap = vmx_load_eoi_exitmap,
Paolo Bonzini967235d2016-12-19 14:03:45 +010015035 .apicv_post_state_restore = vmx_apicv_post_state_restore,
Yang Zhangc7c9c562013-01-25 10:18:51 +080015036 .hwapic_irr_update = vmx_hwapic_irr_update,
15037 .hwapic_isr_update = vmx_hwapic_isr_update,
Liran Alone6c67d82018-09-04 10:56:52 +030015038 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
Yang Zhanga20ed542013-04-11 19:25:15 +080015039 .sync_pir_to_irr = vmx_sync_pir_to_irr,
15040 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
Gleb Natapov95ba8273132009-04-21 17:45:08 +030015041
Izik Eiduscbc94022007-10-25 00:29:55 +020015042 .set_tss_addr = vmx_set_tss_addr,
Sean Christopherson2ac52ab2018-03-20 12:17:19 -070015043 .set_identity_map_addr = vmx_set_identity_map_addr,
Sheng Yang67253af2008-04-25 10:20:22 +080015044 .get_tdp_level = get_ept_level,
Sheng Yang4b12f0d2009-04-27 20:35:42 +080015045 .get_mt_mask = vmx_get_mt_mask,
Marcelo Tosatti229456f2009-06-17 09:22:14 -030015046
Avi Kivity586f9602010-11-18 13:09:54 +020015047 .get_exit_info = vmx_get_exit_info,
Avi Kivity586f9602010-11-18 13:09:54 +020015048
Sheng Yang17cc3932010-01-05 19:02:27 +080015049 .get_lpage_level = vmx_get_lpage_level,
Sheng Yang0e851882009-12-18 16:48:46 +080015050
15051 .cpuid_update = vmx_cpuid_update,
Sheng Yang4e47c7a2009-12-18 16:48:47 +080015052
15053 .rdtscp_supported = vmx_rdtscp_supported,
Mao, Junjiead756a12012-07-02 01:18:48 +000015054 .invpcid_supported = vmx_invpcid_supported,
Joerg Roedeld4330ef2010-04-22 12:33:11 +020015055
15056 .set_supported_cpuid = vmx_set_supported_cpuid,
Sheng Yangf5f48ee2010-06-30 12:25:15 +080015057
15058 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
Zachary Amsden99e3e302010-08-19 22:07:17 -100015059
KarimAllah Ahmede79f2452018-04-14 05:10:52 +020015060 .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
Zachary Amsden99e3e302010-08-19 22:07:17 -100015061 .write_tsc_offset = vmx_write_tsc_offset,
Joerg Roedel1c97f0a2010-09-10 17:30:41 +020015062
15063 .set_tdp_cr3 = vmx_set_cr3,
Joerg Roedel8a76d7f2011-04-04 12:39:27 +020015064
15065 .check_intercept = vmx_check_intercept,
Yang Zhanga547c6d2013-04-11 19:25:10 +080015066 .handle_external_intr = vmx_handle_external_intr,
Liu, Jinsongda8999d2014-02-24 10:55:46 +000015067 .mpx_supported = vmx_mpx_supported,
Wanpeng Li55412b22014-12-02 19:21:30 +080015068 .xsaves_supported = vmx_xsaves_supported,
Paolo Bonzini66336ca2016-07-12 10:36:41 +020015069 .umip_emulated = vmx_umip_emulated,
Jan Kiszkab6b8a142014-03-07 20:03:12 +010015070
15071 .check_nested_events = vmx_check_nested_events,
Sean Christophersond264ee02018-08-27 15:21:12 -070015072 .request_immediate_exit = vmx_request_immediate_exit,
Radim Krčmářae97a3b2014-08-21 18:08:06 +020015073
15074 .sched_in = vmx_sched_in,
Kai Huang843e4332015-01-28 10:54:28 +080015075
15076 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
15077 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
15078 .flush_log_dirty = vmx_flush_log_dirty,
15079 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
Bandan Dasc5f983f2017-05-05 15:25:14 -040015080 .write_log_dirty = vmx_write_pml_buffer,
Wei Huang25462f72015-06-19 15:45:05 +020015081
Feng Wubf9f6ac2015-09-18 22:29:55 +080015082 .pre_block = vmx_pre_block,
15083 .post_block = vmx_post_block,
15084
Wei Huang25462f72015-06-19 15:45:05 +020015085 .pmu_ops = &intel_pmu_ops,
Feng Wuefc64402015-09-18 22:29:51 +080015086
15087 .update_pi_irte = vmx_update_pi_irte,
Yunhong Jiang64672c92016-06-13 14:19:59 -070015088
15089#ifdef CONFIG_X86_64
15090 .set_hv_timer = vmx_set_hv_timer,
15091 .cancel_hv_timer = vmx_cancel_hv_timer,
15092#endif
Ashok Rajc45dcc72016-06-22 14:59:56 +080015093
15094 .setup_mce = vmx_setup_mce,
Ladi Prosek0234bf82017-10-11 16:54:40 +020015095
Jim Mattson8fcc4b52018-07-10 11:27:20 +020015096 .get_nested_state = vmx_get_nested_state,
15097 .set_nested_state = vmx_set_nested_state,
Paolo Bonzini7f7f1ba2018-07-18 18:49:01 +020015098 .get_vmcs12_pages = nested_get_vmcs12_pages,
15099
Ladi Prosek72d7b372017-10-11 16:54:41 +020015100 .smi_allowed = vmx_smi_allowed,
Ladi Prosek0234bf82017-10-11 16:54:40 +020015101 .pre_enter_smm = vmx_pre_enter_smm,
15102 .pre_leave_smm = vmx_pre_leave_smm,
Ladi Prosekcc3d9672017-10-17 16:02:39 +020015103 .enable_smi_window = enable_smi_window,
Vitaly Kuznetsov57b119d2018-10-16 18:50:01 +020015104
15105 .nested_enable_evmcs = nested_enable_evmcs,
Avi Kivity6aa8b732006-12-10 02:21:36 -080015106};
15107
Thomas Gleixner72c6d2d2018-07-13 16:23:16 +020015108static void vmx_cleanup_l1d_flush(void)
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020015109{
15110 if (vmx_l1d_flush_pages) {
15111 free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
15112 vmx_l1d_flush_pages = NULL;
15113 }
Thomas Gleixner72c6d2d2018-07-13 16:23:16 +020015114 /* Restore state so sysfs ignores VMX */
15115 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
Konrad Rzeszutek Wilka3994772018-07-02 12:29:30 +020015116}
15117
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015118static void vmx_exit(void)
15119{
15120#ifdef CONFIG_KEXEC_CORE
15121 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
15122 synchronize_rcu();
15123#endif
15124
15125 kvm_exit();
15126
15127#if IS_ENABLED(CONFIG_HYPERV)
15128 if (static_branch_unlikely(&enable_evmcs)) {
15129 int cpu;
15130 struct hv_vp_assist_page *vp_ap;
15131 /*
15132 * Reset everything to support using non-enlightened VMCS
15133 * access later (e.g. when we reload the module with
15134 * enlightened_vmcs=0)
15135 */
15136 for_each_online_cpu(cpu) {
15137 vp_ap = hv_get_vp_assist_page(cpu);
15138
15139 if (!vp_ap)
15140 continue;
15141
15142 vp_ap->current_nested_vmcs = 0;
15143 vp_ap->enlighten_vmentry = 0;
15144 }
15145
15146 static_branch_disable(&enable_evmcs);
15147 }
15148#endif
15149 vmx_cleanup_l1d_flush();
15150}
15151module_exit(vmx_exit);
15152
Avi Kivity6aa8b732006-12-10 02:21:36 -080015153static int __init vmx_init(void)
15154{
Vitaly Kuznetsov773e8a02018-03-20 15:02:11 +010015155 int r;
15156
15157#if IS_ENABLED(CONFIG_HYPERV)
15158 /*
15159 * Enlightened VMCS usage should be recommended and the host needs
15160 * to support eVMCS v1 or above. We can also disable eVMCS support
15161 * with module parameter.
15162 */
15163 if (enlightened_vmcs &&
15164 ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
15165 (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
15166 KVM_EVMCS_VERSION) {
15167 int cpu;
15168
15169 /* Check that we have assist pages on all online CPUs */
15170 for_each_online_cpu(cpu) {
15171 if (!hv_get_vp_assist_page(cpu)) {
15172 enlightened_vmcs = false;
15173 break;
15174 }
15175 }
15176
15177 if (enlightened_vmcs) {
15178 pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
15179 static_branch_enable(&enable_evmcs);
15180 }
15181 } else {
15182 enlightened_vmcs = false;
15183 }
15184#endif
15185
15186 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015187 __alignof__(struct vcpu_vmx), THIS_MODULE);
He, Qingfdef3ad2007-04-30 09:45:24 +030015188 if (r)
Tiejun Chen34a1cd62014-10-28 10:14:48 +080015189 return r;
Sheng Yang25c5f222008-03-28 13:18:56 +080015190
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015191 /*
Thomas Gleixner7db92e12018-07-13 16:23:19 +020015192 * Must be called after kvm_init() so enable_ept is properly set
15193 * up. Hand the parameter mitigation value in which was stored in
15194 * the pre module init parser. If no parameter was given, it will
15195 * contain 'auto' which will be turned into the default 'cond'
15196 * mitigation mode.
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015197 */
Thomas Gleixner7db92e12018-07-13 16:23:19 +020015198 if (boot_cpu_has(X86_BUG_L1TF)) {
15199 r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
15200 if (r) {
15201 vmx_exit();
15202 return r;
15203 }
Paolo Bonzinia47dd5f2018-07-02 12:47:38 +020015204 }
15205
Dave Young2965faa2015-09-09 15:38:55 -070015206#ifdef CONFIG_KEXEC_CORE
Zhang Yanfei8f536b72012-12-06 23:43:34 +080015207 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
15208 crash_vmclear_local_loaded_vmcss);
15209#endif
Jim Mattson21ebf532018-05-01 15:40:28 -070015210 vmx_check_vmcs12_offsets();
Zhang Yanfei8f536b72012-12-06 23:43:34 +080015211
He, Qingfdef3ad2007-04-30 09:45:24 +030015212 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -080015213}
Thomas Gleixnera7b90202018-07-13 16:23:18 +020015214module_init(vmx_init);