KVM: nVMX: Introduce vmread and vmwrite bitmaps
Prepare vmread and vmwrite bitmaps according to a pre-specified list of fields.
These lists are intended to specifiy most frequent accessed fields so we can
minimize the number of fields that are copied from/to the software controlled
VMCS12 format to/from to processor-specific shadow vmcs. The lists were built
measuring the VMCS fields access rate after L2 Ubuntu 12.04 booted when it was
running on top of L1 KVM, also Ubuntu 12.04. Note that during boot there were
additional fields which were frequently modified but they were not added to
these lists because after boot these fields were not longer accessed by L1.
Signed-off-by: Abel Gordon <abelg@il.ibm.com>
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7042b69..7dc5996 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -484,6 +484,64 @@
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
+
+static const unsigned long shadow_read_only_fields[] = {
+ /*
+ * We do NOT shadow fields that are modified when L0
+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
+ * VMXON...) executed by L1.
+ * For example, VM_INSTRUCTION_ERROR is read
+ * by L1 if a vmx instruction fails (part of the error path).
+ * Note the code assumes this logic. If for some reason
+ * we start shadowing these fields then we need to
+ * force a shadow sync when L0 emulates vmx instructions
+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+ * by nested_vmx_failValid)
+ */
+ VM_EXIT_REASON,
+ VM_EXIT_INTR_INFO,
+ VM_EXIT_INSTRUCTION_LEN,
+ IDT_VECTORING_INFO_FIELD,
+ IDT_VECTORING_ERROR_CODE,
+ VM_EXIT_INTR_ERROR_CODE,
+ EXIT_QUALIFICATION,
+ GUEST_LINEAR_ADDRESS,
+ GUEST_PHYSICAL_ADDRESS
+};
+static const int max_shadow_read_only_fields =
+ ARRAY_SIZE(shadow_read_only_fields);
+
+static const unsigned long shadow_read_write_fields[] = {
+ GUEST_RIP,
+ GUEST_RSP,
+ GUEST_CR0,
+ GUEST_CR3,
+ GUEST_CR4,
+ GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_RFLAGS,
+ GUEST_CS_SELECTOR,
+ GUEST_CS_AR_BYTES,
+ GUEST_CS_LIMIT,
+ GUEST_CS_BASE,
+ GUEST_ES_BASE,
+ CR0_GUEST_HOST_MASK,
+ CR0_READ_SHADOW,
+ CR4_READ_SHADOW,
+ TSC_OFFSET,
+ EXCEPTION_BITMAP,
+ CPU_BASED_VM_EXEC_CONTROL,
+ VM_ENTRY_EXCEPTION_ERROR_CODE,
+ VM_ENTRY_INTR_INFO_FIELD,
+ VM_ENTRY_INSTRUCTION_LEN,
+ VM_ENTRY_EXCEPTION_ERROR_CODE,
+ HOST_FS_BASE,
+ HOST_GS_BASE,
+ HOST_FS_SELECTOR,
+ HOST_GS_SELECTOR
+};
+static const int max_shadow_read_write_fields =
+ ARRAY_SIZE(shadow_read_write_fields);
+
static const unsigned short vmcs_field_to_offset_table[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -675,6 +733,8 @@
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+static unsigned long *vmx_vmread_bitmap;
+static unsigned long *vmx_vmwrite_bitmap;
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -4128,6 +4188,10 @@
vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
+ if (enable_shadow_vmcs) {
+ vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ }
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
@@ -7941,6 +8005,24 @@
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
+ vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmread_bitmap)
+ goto out5;
+
+ vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmwrite_bitmap)
+ goto out6;
+
+ memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+ memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+ /* shadowed read/write fields */
+ for (i = 0; i < max_shadow_read_write_fields; i++) {
+ clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap);
+ clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap);
+ }
+ /* shadowed read only fields */
+ for (i = 0; i < max_shadow_read_only_fields; i++)
+ clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap);
/*
* Allow direct access to the PC debug port (it is often used for I/O
@@ -7959,7 +8041,7 @@
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
- goto out5;
+ goto out7;
#ifdef CONFIG_KEXEC
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -8007,6 +8089,10 @@
return 0;
+out7:
+ free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+ free_page((unsigned long)vmx_vmread_bitmap);
out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
@@ -8030,6 +8116,8 @@
free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
+ free_page((unsigned long)vmx_vmwrite_bitmap);
+ free_page((unsigned long)vmx_vmread_bitmap);
#ifdef CONFIG_KEXEC
rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);