Merge tag 'kvm-s390-next-20141107' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: Fixes for kvm/next (3.19) and stable
1. We should flush TLBs for load control instruction emulation (stable)
2. A workaround for a compiler bug that renders ACCESS_ONCE broken (stable)
3. Fix program check handling for load control
4. Documentation Fix
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7610eaa..7a943c2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -68,9 +68,12 @@
Capability: which KVM extension provides this ioctl. Can be 'basic',
which means that is will be provided by any kernel that supports
- API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+ API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
means availability needs to be checked with KVM_CHECK_EXTENSION
- (see section 4.4).
+ (see section 4.4), or 'none' which means that while not all kernels
+ support this ioctl, there's no capability bit to check its
+ availability: for kernels that don't support the ioctl,
+ the ioctl returns -ENOTTY.
Architectures: which instruction set architectures provide this ioctl.
x86 includes both i386 and x86_64.
@@ -1257,7 +1260,7 @@
4.48 KVM_ASSIGN_PCI_DEVICE
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
+Capability: none
Architectures: x86 ia64
Type: vm ioctl
Parameters: struct kvm_assigned_pci_dev (in)
@@ -1298,10 +1301,16 @@
device assignment. The user requesting this ioctl must have read/write
access to the PCI sysfs resource files associated with the device.
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
4.49 KVM_DEASSIGN_PCI_DEVICE
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
+Capability: none
Architectures: x86 ia64
Type: vm ioctl
Parameters: struct kvm_assigned_pci_dev (in)
@@ -1309,9 +1318,14 @@
Ends PCI device assignment, releasing all associated resources.
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
used in kvm_assigned_pci_dev to identify the device.
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
4.50 KVM_ASSIGN_DEV_IRQ
@@ -1346,6 +1360,12 @@
It is not valid to specify multiple types per host or guest IRQ. However, the
IRQ type of host and guest can differ or can even be null.
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
4.51 KVM_DEASSIGN_DEV_IRQ
@@ -1423,7 +1443,7 @@
4.53 KVM_ASSIGN_SET_MSIX_NR
-Capability: KVM_CAP_DEVICE_MSIX
+Capability: none
Architectures: x86 ia64
Type: vm ioctl
Parameters: struct kvm_assigned_msix_nr (in)
@@ -1445,7 +1465,7 @@
4.54 KVM_ASSIGN_SET_MSIX_ENTRY
-Capability: KVM_CAP_DEVICE_MSIX
+Capability: none
Architectures: x86 ia64
Type: vm ioctl
Parameters: struct kvm_assigned_msix_entry (in)
@@ -1461,6 +1481,12 @@
__u16 padding[3];
};
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
4.55 KVM_SET_TSC_KHZ
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 6d470ae..2a71c8f 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -168,7 +168,7 @@
64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when
- disabled. Bit 2 is 1 if asynchronous page faults can be injected
+ disabled. Bit 1 is 1 if asynchronous page faults can be injected
when vcpu is in cpl == 0.
First 4 byte of 64 byte memory location will be written to by
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911..7e02d77 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -226,10 +226,17 @@
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
+ u32 instruction_sigp_cond_emergency;
+ u32 instruction_sigp_start;
u32 instruction_sigp_stop;
+ u32 instruction_sigp_stop_store_status;
+ u32 instruction_sigp_store_status;
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
+ u32 instruction_sigp_init_cpu_reset;
+ u32 instruction_sigp_cpu_reset;
+ u32 instruction_sigp_unknown;
u32 diagnose_10;
u32 diagnose_44;
u32 diagnose_9c;
@@ -434,6 +441,8 @@
int user_cpu_state_ctrl;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
+ int ipte_lock_count;
+ struct mutex ipte_mutex;
spinlock_t start_stop_lock;
struct kvm_s390_crypto crypto;
};
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 4957611..fad4ae2 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
#define SIGP_RESTART 6
#define SIGP_STOP_AND_STORE_STATUS 9
#define SIGP_INITIAL_CPU_RESET 11
+#define SIGP_CPU_RESET 12
#define SIGP_SET_PREFIX 13
#define SIGP_STORE_STATUS_AT_ADDRESS 14
#define SIGP_SET_ARCHITECTURE 18
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6dc0ad9..8b9ccf0 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@
unsigned long pfra : 52; /* Page-Frame Real Address */
};
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
@@ -216,16 +214,16 @@
if (vcpu->arch.sie_block->eca & 1)
return ic->kh != 0;
- return ipte_lock_count != 0;
+ return vcpu->kvm->arch.ipte_lock_count != 0;
}
static void ipte_lock_simple(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
- mutex_lock(&ipte_mutex);
- ipte_lock_count++;
- if (ipte_lock_count > 1)
+ mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+ vcpu->kvm->arch.ipte_lock_count++;
+ if (vcpu->kvm->arch.ipte_lock_count > 1)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
@@ -240,16 +238,16 @@
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
out:
- mutex_unlock(&ipte_mutex);
+ mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
}
static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
- mutex_lock(&ipte_mutex);
- ipte_lock_count--;
- if (ipte_lock_count)
+ mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+ vcpu->kvm->arch.ipte_lock_count--;
+ if (vcpu->kvm->arch.ipte_lock_count)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
@@ -260,7 +258,7 @@
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
wake_up(&vcpu->kvm->arch.ipte_wq);
out:
- mutex_unlock(&ipte_mutex);
+ mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
}
static void ipte_lock_siif(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a398384..4fc3fed 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -270,7 +270,7 @@
break;
case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
- (u64 *)__LC_MON_CLASS_NR);
+ (u16 *)__LC_MON_CLASS_NR);
rc |= put_guest_lc(vcpu, pgm_info->mon_code,
(u64 *)__LC_MON_CODE);
break;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 55aade4..06878bd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+ { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+ { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+ { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+ { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+ { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+ { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+ { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
{ "diagnose_10", VCPU_STAT(diagnose_10) },
{ "diagnose_44", VCPU_STAT(diagnose_44) },
{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
init_waitqueue_head(&kvm->arch.ipte_wq);
+ mutex_init(&kvm->arch.ipte_mutex);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba..f7cd3f7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
#include "kvm-s390.h"
#include "trace.h"
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
u64 *reg)
{
struct kvm_s390_local_interrupt *li;
- struct kvm_vcpu *dst_vcpu = NULL;
int cpuflags;
int rc;
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
-
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@
rc = SIGP_CC_STATUS_STORED;
}
- VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+ VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+ rc);
return rc;
}
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
{
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_EMERGENCY,
.parm = vcpu->vcpu_id,
};
- struct kvm_vcpu *dst_vcpu = NULL;
int rc = 0;
- if (cpu_addr < KVM_MAX_VCPUS)
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
-
rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
if (!rc)
- VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+ dst_vcpu->vcpu_id);
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+ return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu,
u16 asn, u64 *reg)
{
- struct kvm_vcpu *dst_vcpu = NULL;
const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
u16 p_asn, s_asn;
psw_t *psw;
u32 flags;
- if (cpu_addr < KVM_MAX_VCPUS)
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
psw = &dst_vcpu->arch.sie_block->gpsw;
p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
- /* Deliver the emergency signal? */
+ /* Inject the emergency signal? */
if (!(flags & CPUSTAT_STOPPED)
|| (psw->mask & psw_int_mask) != psw_int_mask
|| ((flags & CPUSTAT_WAIT) && psw->addr != 0)
|| (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
- return __sigp_emergency(vcpu, cpu_addr);
+ return __inject_sigp_emergency(vcpu, dst_vcpu);
} else {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@
}
}
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
{
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_EXTERNAL_CALL,
.parm = vcpu->vcpu_id,
};
- struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- if (cpu_addr < KVM_MAX_VCPUS)
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
-
rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
if (!rc)
- VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+ VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+ dst_vcpu->vcpu_id);
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
@@ -160,23 +147,27 @@
return rc;
}
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
{
- struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
+ rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+ VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
+ return rc;
+}
- rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+ int rc;
- VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+ rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+ ACTION_STORE_ON_STOP);
+ VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+ dst_vcpu->vcpu_id);
- if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+ if (rc == -ESHUTDOWN) {
/* If the CPU has already been stopped, we still have
* to save the status when doing stop-and-store. This
* has to be done after unlocking all spinlocks. */
@@ -212,18 +203,13 @@
return rc;
}
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
- u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+ u32 address, u64 *reg)
{
struct kvm_s390_local_interrupt *li;
- struct kvm_vcpu *dst_vcpu = NULL;
struct kvm_s390_interrupt_info *inti;
int rc;
- if (cpu_addr < KVM_MAX_VCPUS)
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
/*
@@ -260,24 +246,20 @@
kvm_s390_vcpu_wakeup(dst_vcpu);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+ VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+ address);
out_li:
spin_unlock(&li->lock);
return rc;
}
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
- u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu,
+ u32 addr, u64 *reg)
{
- struct kvm_vcpu *dst_vcpu = NULL;
int flags;
int rc;
- if (cpu_id < KVM_MAX_VCPUS)
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
-
spin_lock(&dst_vcpu->arch.local_int.lock);
flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +279,12 @@
return rc;
}
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
- u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u64 *reg)
{
struct kvm_s390_local_interrupt *li;
- struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
-
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
- if (!dst_vcpu)
- return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
/* running */
@@ -321,18 +296,46 @@
rc = SIGP_CC_STATUS_STORED;
}
- VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
- rc);
+ VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+ dst_vcpu->vcpu_id, rc);
return rc;
}
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u8 order_code)
{
- struct kvm_s390_local_interrupt *li;
- int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- struct kvm_vcpu *dst_vcpu = NULL;
+ struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+ /* handle (RE)START in user space */
+ int rc = -EOPNOTSUPP;
+
+ spin_lock(&li->lock);
+ if (li->action_bits & ACTION_STOP_ON_STOP)
+ rc = SIGP_CC_BUSY;
+ spin_unlock(&li->lock);
+
+ return rc;
+}
+
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+ /* handle (INITIAL) CPU RESET in user space */
+ return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
+{
+ /* handle unknown orders in user space */
+ return -EOPNOTSUPP;
+}
+
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+ u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+ int rc;
+ struct kvm_vcpu *dst_vcpu;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
@@ -340,11 +343,71 @@
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
- if (li->action_bits & ACTION_STOP_ON_STOP)
- rc = SIGP_CC_BUSY;
- spin_unlock(&li->lock);
+
+ switch (order_code) {
+ case SIGP_SENSE:
+ vcpu->stat.instruction_sigp_sense++;
+ rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_EXTERNAL_CALL:
+ vcpu->stat.instruction_sigp_external_call++;
+ rc = __sigp_external_call(vcpu, dst_vcpu);
+ break;
+ case SIGP_EMERGENCY_SIGNAL:
+ vcpu->stat.instruction_sigp_emergency++;
+ rc = __sigp_emergency(vcpu, dst_vcpu);
+ break;
+ case SIGP_STOP:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, dst_vcpu);
+ break;
+ case SIGP_STOP_AND_STORE_STATUS:
+ vcpu->stat.instruction_sigp_stop_store_status++;
+ rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ vcpu->stat.instruction_sigp_store_status++;
+ rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+ status_reg);
+ break;
+ case SIGP_SET_PREFIX:
+ vcpu->stat.instruction_sigp_prefix++;
+ rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
+ break;
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ vcpu->stat.instruction_sigp_cond_emergency++;
+ rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+ status_reg);
+ break;
+ case SIGP_SENSE_RUNNING:
+ vcpu->stat.instruction_sigp_sense_running++;
+ rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_START:
+ vcpu->stat.instruction_sigp_start++;
+ rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_RESTART:
+ vcpu->stat.instruction_sigp_restart++;
+ rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_INITIAL_CPU_RESET:
+ vcpu->stat.instruction_sigp_init_cpu_reset++;
+ rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_CPU_RESET:
+ vcpu->stat.instruction_sigp_cpu_reset++;
+ rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+ break;
+ default:
+ vcpu->stat.instruction_sigp_unknown++;
+ rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+ }
+
+ if (rc == -EOPNOTSUPP)
+ VCPU_EVENT(vcpu, 4,
+ "sigp order %u -> cpu %x: handled in user space",
+ order_code, dst_vcpu->vcpu_id);
return rc;
}
@@ -371,68 +434,14 @@
trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
switch (order_code) {
- case SIGP_SENSE:
- vcpu->stat.instruction_sigp_sense++;
- rc = __sigp_sense(vcpu, cpu_addr,
- &vcpu->run->s.regs.gprs[r1]);
- break;
- case SIGP_EXTERNAL_CALL:
- vcpu->stat.instruction_sigp_external_call++;
- rc = __sigp_external_call(vcpu, cpu_addr);
- break;
- case SIGP_EMERGENCY_SIGNAL:
- vcpu->stat.instruction_sigp_emergency++;
- rc = __sigp_emergency(vcpu, cpu_addr);
- break;
- case SIGP_STOP:
- vcpu->stat.instruction_sigp_stop++;
- rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
- break;
- case SIGP_STOP_AND_STORE_STATUS:
- vcpu->stat.instruction_sigp_stop++;
- rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
- ACTION_STOP_ON_STOP);
- break;
- case SIGP_STORE_STATUS_AT_ADDRESS:
- rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
- &vcpu->run->s.regs.gprs[r1]);
- break;
case SIGP_SET_ARCHITECTURE:
vcpu->stat.instruction_sigp_arch++;
rc = __sigp_set_arch(vcpu, parameter);
break;
- case SIGP_SET_PREFIX:
- vcpu->stat.instruction_sigp_prefix++;
- rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
- &vcpu->run->s.regs.gprs[r1]);
- break;
- case SIGP_COND_EMERGENCY_SIGNAL:
- rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
- &vcpu->run->s.regs.gprs[r1]);
- break;
- case SIGP_SENSE_RUNNING:
- vcpu->stat.instruction_sigp_sense_running++;
- rc = __sigp_sense_running(vcpu, cpu_addr,
- &vcpu->run->s.regs.gprs[r1]);
- break;
- case SIGP_START:
- rc = sigp_check_callable(vcpu, cpu_addr);
- if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
- rc = -EOPNOTSUPP; /* Handle START in user space */
- break;
- case SIGP_RESTART:
- vcpu->stat.instruction_sigp_restart++;
- rc = sigp_check_callable(vcpu, cpu_addr);
- if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
- VCPU_EVENT(vcpu, 4,
- "sigp restart %x to handle userspace",
- cpu_addr);
- /* user space must know about restart */
- rc = -EOPNOTSUPP;
- }
- break;
default:
- return -EOPNOTSUPP;
+ rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+ parameter,
+ &vcpu->run->s.regs.gprs[r1]);
}
if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1b79ca6..cfecc24 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -936,7 +936,7 @@
down_read(&mm->mmap_sem);
retry:
- ptep = get_locked_pte(current->mm, addr, &ptl);
+ ptep = get_locked_pte(mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..3ac8076 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -542,7 +542,7 @@
struct rcu_head rcu;
u8 ldr_bits;
/* fields bellow are used to decode ldr values in different modes */
- u32 cid_shift, cid_mask, lid_mask;
+ u32 cid_shift, cid_mask, lid_mask, broadcast;
struct kvm_lapic *phys_map[256];
/* first index is cluster id second is cpu id in a cluster */
struct kvm_lapic *logical_map[16][16];
@@ -895,6 +895,7 @@
gfn_t gfn, void *data, int offset, int len,
u32 access);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
static inline int __kvm_irq_line_state(unsigned long *irq_state,
int irq_source_id, int level)
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79a..5fa9770 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
#define XSTATE_Hi16_ZMM 0x80
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
/* Bit 63 of XCR0 is reserved for future expansion */
#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ce..0bf3467 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@
native_write_msr(msr_kvm_wall_clock, low, high);
- preempt_disable();
- cpu = smp_processor_id();
+ cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
pvclock_read_wallclock(&wall_clock, vcpu_time, now);
- preempt_enable();
+ put_cpu();
}
static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@
int cpu;
unsigned long tsc_khz;
- preempt_disable();
- cpu = smp_processor_id();
+ cpu = get_cpu();
src = &hv_clock[cpu].pvti;
tsc_khz = pvclock_tsc_khz(src);
- preempt_enable();
+ put_cpu();
return tsc_khz;
}
@@ -284,23 +282,22 @@
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
- preempt_disable();
- cpu = smp_processor_id();
+ cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
- preempt_enable();
+ put_cpu();
return 1;
}
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
- preempt_enable();
+ put_cpu();
return ret;
}
- preempt_enable();
+ put_cpu();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a5..20d8321 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -317,7 +317,8 @@
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX) | F(SMAP);
+ F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+ F(AVX512CD);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5edf088..cd2029b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -166,6 +166,7 @@
#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
#define NoBigReal ((u64)1 << 50) /* No big real mode */
#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
+#define NearBranch ((u64)1 << 52) /* Near branches */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -641,11 +642,11 @@
return true;
}
-static int __linearize(struct x86_emulate_ctxt *ctxt,
- struct segmented_address addr,
- unsigned *max_size, unsigned size,
- bool write, bool fetch,
- ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+ struct segmented_address addr,
+ unsigned *max_size, unsigned size,
+ bool write, bool fetch,
+ ulong *linear)
{
struct desc_struct desc;
bool usable;
@@ -654,11 +655,12 @@
u16 sel;
unsigned cpl;
- la = seg_base(ctxt, addr.seg) + addr.ea;
+ la = seg_base(ctxt, addr.seg) +
+ (fetch || ctxt->ad_bytes == 8 ? addr.ea : (u32)addr.ea);
*max_size = 0;
switch (ctxt->mode) {
case X86EMUL_MODE_PROT64:
- if (((signed long)la << 16) >> 16 != la)
+ if (is_noncanonical_address(la))
return emulate_gp(ctxt, 0);
*max_size = min_t(u64, ~0u, (1ull << 48) - la);
@@ -701,8 +703,8 @@
if (size > *max_size)
goto bad;
cpl = ctxt->ops->cpl(ctxt);
- if (!(desc.type & 8)) {
- /* data segment */
+ if (!fetch) {
+ /* data segment or readable code segment */
if (cpl > desc.dpl)
goto bad;
} else if ((desc.type & 8) && !(desc.type & 4)) {
@@ -716,7 +718,7 @@
}
break;
}
- if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
+ if (ctxt->mode != X86EMUL_MODE_PROT64)
la &= (u32)-1;
if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
return emulate_gp(ctxt, 0);
@@ -2045,31 +2047,22 @@
return rc;
}
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
{
- int rc = X86EMUL_CONTINUE;
+ return assign_eip_near(ctxt, ctxt->src.val);
+}
- switch (ctxt->modrm_reg) {
- case 2: /* call near abs */ {
- long int old_eip;
- old_eip = ctxt->_eip;
- rc = assign_eip_near(ctxt, ctxt->src.val);
- if (rc != X86EMUL_CONTINUE)
- break;
- ctxt->src.val = old_eip;
- rc = em_push(ctxt);
- break;
- }
- case 4: /* jmp abs */
- rc = assign_eip_near(ctxt, ctxt->src.val);
- break;
- case 5: /* jmp far */
- rc = em_jmp_far(ctxt);
- break;
- case 6: /* push */
- rc = em_push(ctxt);
- break;
- }
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+ int rc;
+ long int old_eip;
+
+ old_eip = ctxt->_eip;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ ctxt->src.val = old_eip;
+ rc = em_push(ctxt);
return rc;
}
@@ -2425,6 +2418,8 @@
if ((msr_data & 0xfffc) == 0x0)
return emulate_gp(ctxt, 0);
ss_sel = (u16)(msr_data + 24);
+ rcx = (u32)rcx;
+ rdx = (u32)rdx;
break;
case X86EMUL_MODE_PROT64:
cs_sel = (u16)(msr_data + 32);
@@ -3780,11 +3775,11 @@
static const struct opcode group5[] = {
F(DstMem | SrcNone | Lock, em_inc),
F(DstMem | SrcNone | Lock, em_dec),
- I(SrcMem | Stack, em_grp45),
+ I(SrcMem | NearBranch, em_call_near_abs),
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
- I(SrcMem | Stack, em_grp45),
- I(SrcMemFAddr | ImplicitOps, em_grp45),
- I(SrcMem | Stack, em_grp45), D(Undefined),
+ I(SrcMem | NearBranch, em_jmp_abs),
+ I(SrcMemFAddr | ImplicitOps, em_jmp_far),
+ I(SrcMem | Stack, em_push), D(Undefined),
};
static const struct opcode group6[] = {
@@ -3964,7 +3959,7 @@
I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
/* 0x70 - 0x7F */
- X16(D(SrcImmByte)),
+ X16(D(SrcImmByte | NearBranch)),
/* 0x80 - 0x87 */
G(ByteOp | DstMem | SrcImm, group1),
G(DstMem | SrcImm, group1),
@@ -4003,8 +3998,8 @@
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
/* 0xC0 - 0xC7 */
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
- I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
- I(ImplicitOps | Stack, em_ret),
+ I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+ I(ImplicitOps | NearBranch, em_ret),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4019,14 @@
/* 0xD8 - 0xDF */
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
/* 0xE0 - 0xE7 */
- X3(I(SrcImmByte, em_loop)),
- I(SrcImmByte, em_jcxz),
+ X3(I(SrcImmByte | NearBranch, em_loop)),
+ I(SrcImmByte | NearBranch, em_jcxz),
I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
/* 0xE8 - 0xEF */
- I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
- I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+ I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+ I(SrcImmFAddr | No64, em_jmp_far),
+ D(SrcImmByte | ImplicitOps | NearBranch),
I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
/* 0xF0 - 0xF7 */
@@ -4090,7 +4086,7 @@
N, N, N, N,
N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
- X16(D(SrcImm)),
+ X16(D(SrcImm | NearBranch)),
/* 0x90 - 0x9F */
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
@@ -4559,7 +4555,7 @@
return EMULATION_FAILED;
if (unlikely(ctxt->d &
- (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+ (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch))) {
/*
* These are copied unconditionally here, and checked unconditionally
* in x86_emulate_insn.
@@ -4570,8 +4566,12 @@
if (ctxt->d & NotImpl)
return EMULATION_FAILED;
- if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
- ctxt->op_bytes = 8;
+ if (mode == X86EMUL_MODE_PROT64) {
+ if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+ ctxt->op_bytes = 8;
+ else if (ctxt->d & NearBranch)
+ ctxt->op_bytes = 8;
+ }
if (ctxt->d & Op3264) {
if (mode == X86EMUL_MODE_PROT64)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd..5f574b4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
#define MAX_APIC_VECTOR 256
#define APIC_VECTORS_PER_REG 32
+#define APIC_BROADCAST 0xFF
+#define X2APIC_BROADCAST 0xFFFFFFFFul
+
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
@@ -149,6 +152,7 @@
new->cid_shift = 8;
new->cid_mask = 0;
new->lid_mask = 0xff;
+ new->broadcast = APIC_BROADCAST;
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -170,6 +174,7 @@
new->cid_shift = 16;
new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
new->lid_mask = 0xffff;
+ new->broadcast = X2APIC_BROADCAST;
} else if (kvm_apic_sw_enabled(apic) &&
!new->cid_mask /* flat mode */ &&
kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
@@ -201,11 +206,13 @@
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
{
- u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+ bool enabled = val & APIC_SPIV_APIC_ENABLED;
apic_set_reg(apic, APIC_SPIV, val);
- if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
- if (val & APIC_SPIV_APIC_ENABLED) {
+
+ if (enabled != apic->sw_enabled) {
+ apic->sw_enabled = enabled;
+ if (enabled) {
static_key_slow_dec_deferred(&apic_sw_disabled);
recalculate_apic_map(apic->vcpu->kvm);
} else
@@ -237,21 +244,17 @@
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
}
static inline int apic_lvtt_period(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
}
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) ==
- APIC_LVT_TIMER_TSCDEADLINE);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
}
static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -558,16 +561,25 @@
apic_update_ppr(apic);
}
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
{
- return dest == 0xff || kvm_apic_id(apic) == dest;
+ return dest == (apic_x2apic_mode(apic) ?
+ X2APIC_BROADCAST : APIC_BROADCAST);
}
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+{
+ return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
{
int result = 0;
u32 logical_id;
+ if (kvm_apic_broadcast(apic, mda))
+ return 1;
+
if (apic_x2apic_mode(apic)) {
logical_id = kvm_apic_get_reg(apic, APIC_LDR);
return logical_id & mda;
@@ -595,7 +607,7 @@
}
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
- int short_hand, int dest, int dest_mode)
+ int short_hand, unsigned int dest, int dest_mode)
{
int result = 0;
struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,9 +669,11 @@
if (!map)
goto out;
+ if (irq->dest_id == map->broadcast)
+ goto out;
+
if (irq->dest_mode == 0) { /* physical mode */
- if (irq->delivery_mode == APIC_DM_LOWEST ||
- irq->dest_id == 0xff)
+ if (irq->delivery_mode == APIC_DM_LOWEST)
goto out;
dst = &map->phys_map[irq->dest_id & 0xff];
} else {
@@ -1034,6 +1048,26 @@
apic->divide_count);
}
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ wait_queue_head_t *q = &vcpu->wq;
+
+ /*
+ * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+ * vcpu_enter_guest.
+ */
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ atomic_inc(&apic->lapic_timer.pending);
+ /* FIXME: this code should not know anything about vcpus */
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+ if (waitqueue_active(q))
+ wake_up_interruptible(q);
+}
+
static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now;
@@ -1096,9 +1130,10 @@
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz);
- }
- hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+ hrtimer_start(&apic->lapic_timer.timer,
+ ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+ } else
+ apic_timer_expired(apic);
local_irq_restore(flags);
}
@@ -1203,17 +1238,20 @@
break;
- case APIC_LVTT:
- if ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) !=
- (val & apic->lapic_timer.timer_mode_mask))
+ case APIC_LVTT: {
+ u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+ if (apic->lapic_timer.timer_mode != timer_mode) {
+ apic->lapic_timer.timer_mode = timer_mode;
hrtimer_cancel(&apic->lapic_timer.timer);
+ }
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
apic_set_reg(apic, APIC_LVTT, val);
break;
+ }
case APIC_TMICT:
if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1358,7 @@
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
static_key_slow_dec_deferred(&apic_hw_disabled);
- if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+ if (!apic->sw_enabled)
static_key_slow_dec_deferred(&apic_sw_disabled);
if (apic->regs)
@@ -1355,9 +1393,6 @@
return;
hrtimer_cancel(&apic->lapic_timer.timer);
- /* Inject here so clearing tscdeadline won't override new value */
- if (apic_has_pending_timer(vcpu))
- kvm_inject_apic_timer_irqs(vcpu);
apic->lapic_timer.tscdeadline = data;
start_apic_timer(apic);
}
@@ -1447,6 +1482,7 @@
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+ apic->lapic_timer.timer_mode = 0;
apic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
@@ -1538,23 +1574,8 @@
{
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
- struct kvm_vcpu *vcpu = apic->vcpu;
- wait_queue_head_t *q = &vcpu->wq;
- /*
- * There is a race window between reading and incrementing, but we do
- * not care about potentially losing timer events in the !reinject
- * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
- * in vcpu_enter_guest.
- */
- if (!atomic_read(&ktimer->pending)) {
- atomic_inc(&ktimer->pending);
- /* FIXME: this code should not know anything about vcpus */
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
- }
-
- if (waitqueue_active(q))
- wake_up_interruptible(q);
+ apic_timer_expired(apic);
if (lapic_is_periodic(apic)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..d4365f2 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
atomic_t pending; /* accumulated triggered timers */
@@ -22,6 +23,7 @@
struct kvm_timer lapic_timer;
u32 divide_count;
struct kvm_vcpu *vcpu;
+ bool sw_enabled;
bool irr_pending;
/* Number of bits set in ISR. */
s16 isr_count;
@@ -55,8 +57,8 @@
void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
unsigned long *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@
extern struct static_key_deferred apic_sw_disabled;
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
{
if (static_key_false(&apic_sw_disabled.key))
- return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
- return APIC_SPIV_APIC_ENABLED;
+ return apic->sw_enabled;
+ return true;
}
static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cef..d4f3aaa 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2999,7 +2999,6 @@
{
int reg, dr;
unsigned long val;
- int err;
if (svm->vcpu.guest_debug == 0) {
/*
@@ -3019,12 +3018,15 @@
dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
if (dr >= 16) { /* mov to DRn */
+ if (!kvm_require_dr(&svm->vcpu, dr - 16))
+ return 1;
val = kvm_register_read(&svm->vcpu, reg);
kvm_set_dr(&svm->vcpu, dr - 16, val);
} else {
- err = kvm_get_dr(&svm->vcpu, dr, &val);
- if (!err)
- kvm_register_write(&svm->vcpu, reg, val);
+ if (!kvm_require_dr(&svm->vcpu, dr))
+ return 1;
+ kvm_get_dr(&svm->vcpu, dr, &val);
+ kvm_register_write(&svm->vcpu, reg, val);
}
skip_emulated_instruction(&svm->vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..0cd99d8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -105,7 +105,7 @@
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -5163,13 +5163,20 @@
static int handle_dr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int dr, reg;
+ int dr, dr7, reg;
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+ /* First, if DR does not exist, trigger UD */
+ if (!kvm_require_dr(vcpu, dr))
+ return 1;
/* Do not handle if the CPL > 0, will trigger GP on re-entry */
if (!kvm_require_cpl(vcpu, 0))
return 1;
- dr = vmcs_readl(GUEST_DR7);
- if (dr & DR7_GD) {
+ dr7 = vmcs_readl(GUEST_DR7);
+ if (dr7 & DR7_GD) {
/*
* As the vm-exit takes precedence over the debug trap, we
* need to emulate the latter, either for the host or the
@@ -5177,7 +5184,7 @@
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
- vcpu->run->debug.arch.dr7 = dr;
+ vcpu->run->debug.arch.dr7 = dr7;
vcpu->run->debug.arch.pc =
vmcs_readl(GUEST_CS_BASE) +
vmcs_readl(GUEST_RIP);
@@ -5185,9 +5192,7 @@
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
- vcpu->arch.dr7 &= ~DR7_GD;
vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
kvm_queue_exception(vcpu, DB_VECTOR);
return 1;
}
@@ -5209,8 +5214,6 @@
return 1;
}
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
if (exit_qualification & TYPE_MOV_FROM_DR) {
unsigned long val;
@@ -5492,7 +5495,7 @@
}
/* clear all local breakpoint enable flags */
- vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
/*
* TODO: What about debug traps on tss switch?
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..14c49cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -455,6 +455,16 @@
}
EXPORT_SYMBOL_GPL(kvm_require_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+ if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ return true;
+
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
/*
* This function will be used to read from the physical memory of the currently
* running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +666,12 @@
if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
return 1;
+ if (xcr0 & XSTATE_AVX512) {
+ if (!(xcr0 & XSTATE_YMM))
+ return 1;
+ if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+ return 1;
+ }
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
@@ -811,8 +827,6 @@
vcpu->arch.eff_db[dr] = val;
break;
case 4:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
- return 1; /* #UD */
/* fall through */
case 6:
if (val & 0xffffffff00000000ULL)
@@ -821,8 +835,6 @@
kvm_update_dr6(vcpu);
break;
case 5:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
- return 1; /* #UD */
/* fall through */
default: /* 7 */
if (val & 0xffffffff00000000ULL)
@@ -837,27 +849,21 @@
int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
- int res;
-
- res = __kvm_set_dr(vcpu, dr, val);
- if (res > 0)
- kvm_queue_exception(vcpu, UD_VECTOR);
- else if (res < 0)
+ if (__kvm_set_dr(vcpu, dr, val)) {
kvm_inject_gp(vcpu, 0);
-
- return res;
+ return 1;
+ }
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_dr);
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
switch (dr) {
case 0 ... 3:
*val = vcpu->arch.db[dr];
break;
case 4:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
- return 1;
/* fall through */
case 6:
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +872,11 @@
*val = kvm_x86_ops->get_dr6(vcpu);
break;
case 5:
- if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
- return 1;
/* fall through */
default: /* 7 */
*val = vcpu->arch.dr7;
break;
}
-
- return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
- if (_kvm_get_dr(vcpu, dr, val)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -3106,7 +3100,7 @@
unsigned long val;
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
- _kvm_get_dr(vcpu, 6, &val);
+ kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
@@ -4667,7 +4661,7 @@
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
{
- return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+ return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5965,6 +5959,12 @@
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
+ if (vcpu->arch.exception.nr == DB_VECTOR &&
+ (vcpu->arch.dr7 & DR7_GD)) {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(vcpu);
+ }
+
kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45..cc1d61a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
- | XSTATE_BNDREGS | XSTATE_BNDCSR)
+ | XSTATE_BNDREGS | XSTATE_BNDCSR \
+ | XSTATE_AVX512)
extern u64 host_xcr0;
extern u64 kvm_supported_xcr0(void);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..6d59e5b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -647,11 +647,7 @@
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
#define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@
#endif
#define KVM_CAP_IRQ_ROUTING 25
#define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
#define KVM_CAP_ASSIGN_DEV_IRQ 29
/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index e23b706..31725a3 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -83,7 +83,7 @@
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
- int short_hand, int dest, int dest_mode);
+ int short_hand, unsigned int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
int trigger_mode);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25ffac9..3a31ec6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -760,7 +760,7 @@
*
* Discontiguous memory is allowed, mostly for framebuffers.
*
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
*/
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)