KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs
Currently on POWER7, if we are running the guest on a core and we don't
need all the hardware threads, we do nothing to ensure that the unused
threads aren't executing in the kernel (other than checking that they
are offline). We just assume they're napping and we don't do anything
to stop them trying to enter the kernel while the guest is running.
This means that a stray IPI can wake up the hardware thread and it will
then try to enter the kernel, but since the core is in guest context,
it will execute code from the guest in hypervisor mode once it turns the
MMU on, which tends to lead to crashes or hangs in the host.
This fixes the problem by adding two new one-byte flags in the
kvmppc_host_state structure in the PACA which are used to interlock
between the primary thread and the unused secondary threads when entering
the guest. With these flags, the primary thread can ensure that the
unused secondaries are not already in kernel mode (i.e. handling a stray
IPI) and then indicate that they should not try to enter the kernel
if they do get woken for any reason. Instead they will go into KVM code,
find that there is no vcpu to run, acknowledge and clear the IPI and go
back to nap mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b70bf22..d595033 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -26,6 +26,7 @@
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
/*****************************************************************************
* *
@@ -82,6 +83,7 @@
#define XICS_XIRR 4
#define XICS_QIRR 0xc
+#define XICS_IPI 2 /* interrupt source # for IPIs */
/*
* We come in here when wakened from nap mode on a secondary hw thread.
@@ -94,27 +96,55 @@
subi r1,r1,STACK_FRAME_OVERHEAD
ld r2,PACATOC(r13)
+ li r0,KVM_HWTHREAD_IN_KVM
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+
+ /* NV GPR values from power7_idle() will no longer be valid */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+
+ /* get vcpu pointer, NULL if we have no vcpu to run */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ cmpdi cr1,r4,0
+
+ /* Check the wake reason in SRR1 to see why we got here */
+ mfspr r3,SPRN_SRR1
+ rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
+ cmpwi r3,4 /* was it an external interrupt? */
+ bne 27f
+
+ /*
+ * External interrupt - for now assume it is an IPI, since we
+ * should never get any other interrupts sent to offline threads.
+ * Only do this for secondary threads.
+ */
+ beq cr1,25f
+ lwz r3,VCPU_PTID(r4)
+ cmpwi r3,0
+ beq 27f
+25: ld r5,HSTATE_XICS_PHYS(r13)
+ li r0,0xff
+ li r6,XICS_QIRR
+ li r7,XICS_XIRR
+ lwzcix r8,r5,r7 /* get and ack the interrupt */
+ sync
+ clrldi. r9,r8,40 /* get interrupt source ID. */
+ beq 27f /* none there? */
+ cmpwi r9,XICS_IPI
+ bne 26f
+ stbcix r0,r5,r6 /* clear IPI */
+26: stwcix r8,r5,r7 /* EOI the interrupt */
+
+27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+
+ /* if we have no vcpu to run, go back to sleep */
+ beq cr1,kvm_no_guest
+
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
cmpwi r0,0
bne kvm_end_cede
- /* get vcpu pointer */
- ld r4, HSTATE_KVM_VCPU(r13)
-
- /* We got here with an IPI; clear it */
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0, 0xff
- li r6, XICS_QIRR
- li r7, XICS_XIRR
- lwzcix r8, r5, r7 /* ack the interrupt */
- sync
- stbcix r0, r5, r6 /* clear it */
- stwcix r8, r5, r7 /* EOI it */
-
- /* NV GPR values from power7_idle() will no longer be valid */
- stb r0, PACA_NAPSTATELOST(r13)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -1445,8 +1475,8 @@
* Take a nap until a decrementer or external interrupt occurs,
* with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
*/
- li r0,0x80
- stb r0,PACAPROCSTART(r13)
+ li r0,1
+ stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR,r5
@@ -1463,26 +1493,7 @@
kvm_end_cede:
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
- /* If we're a secondary thread and we got here by an IPI, ack it */
- ld r4,HSTATE_KVM_VCPU(r13)
- lwz r3,VCPU_PTID(r4)
- cmpwi r3,0
- beq 27f
- mfspr r3,SPRN_SRR1
- rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
- cmpwi r3,4 /* was it an external interrupt? */
- bne 27f
- ld r5, HSTATE_XICS_PHYS(r13)
- li r0,0xff
- li r6,XICS_QIRR
- li r7,XICS_XIRR
- lwzcix r8,r5,r7 /* ack the interrupt */
- sync
- stbcix r0,r5,r6 /* clear it */
- stwcix r8,r5,r7 /* EOI it */
-27:
/* load up FP state */
bl kvmppc_load_fp
@@ -1580,12 +1591,17 @@
stwcx. r3, 0, r4
bne 51b
+kvm_no_guest:
+ li r0, KVM_HWTHREAD_IN_NAP
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
isync
- li r0, 0
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)