[PATCH] i386/x86-64: Fix NMI watchdog suspend/resume
Making NMI suspend/resume work with SMP. We use CPU hotplug to offline
APs in SMP suspend/resume. Only BSP executes sysdev's .suspend/.resume
method. APs should follow CPU hotplug code path.
And:
+From: Don Zickus <dzickus@redhat.com>
Makes the start/stop paths of nmi watchdog more robust to handle the
suspend/resume cases more gracefully.
AK: I merged the two patches together
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 6241e44..8e4ed93 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -63,7 +63,6 @@
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */
-static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
extern void show_registers(struct pt_regs *regs);
@@ -341,15 +340,20 @@
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
+ /* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active);
- disable_lapic_nmi_watchdog();
+ stop_apic_nmi_watchdog(NULL);
+ BUG_ON(atomic_read(&nmi_active) != 0);
return 0;
}
static int lapic_nmi_resume(struct sys_device *dev)
{
- if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
+ /* only CPU0 goes here, other CPUs should be offline */
+ if (nmi_pm_active > 0) {
+ setup_apic_nmi_watchdog(NULL);
+ touch_nmi_watchdog();
+ }
return 0;
}
@@ -626,11 +630,21 @@
void setup_apic_nmi_watchdog (void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 1)
+ return;
+
+ /* cheap hack to support suspend/resume */
+ /* if cpu0 is not active neither should the other cpus */
+ if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -663,17 +677,22 @@
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ wd->enabled = 1;
atomic_inc(&nmi_active);
}
-static void stop_apic_nmi_watchdog(void *unused)
+void stop_apic_nmi_watchdog(void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 0)
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
@@ -697,7 +716,7 @@
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ wd->enabled = 0;
atomic_dec(&nmi_active);
}
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index efe0799..9367af7 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1376,7 +1376,8 @@
*/
if (cpu == 0)
return -EBUSY;
-
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
local_irq_enable();