Merge "defconfig: sdm845: Panic on hard lockups"
diff --git a/arch/arm64/configs/sdm845_defconfig b/arch/arm64/configs/sdm845_defconfig
index 4b347e6..f202f7e 100644
--- a/arch/arm64/configs/sdm845_defconfig
+++ b/arch/arm64/configs/sdm845_defconfig
@@ -610,6 +610,7 @@
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_LOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
CONFIG_WQ_WATCHDOG=y
CONFIG_PANIC_TIMEOUT=5
CONFIG_PANIC_ON_SCHED_BUG=y
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 3ca2526..4443a29 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -24,6 +24,9 @@
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+DECLARE_PER_CPU(unsigned long, hrtimer_interrupts);
+DECLARE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
@@ -31,8 +34,11 @@
* may be used to reset the timeout - for code which intentionally
* disables interrupts for a long time. This call is stateless.
*/
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR_NMI)
#include <asm/nmi.h>
+#endif
+
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
extern void touch_nmi_watchdog(void);
#else
static inline void touch_nmi_watchdog(void)
@@ -130,6 +136,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
+
extern bool is_hardlockup(void);
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index cffb5f2..cfbf027 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -81,10 +81,10 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(unsigned int, watchdog_en);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;
unsigned int __read_mostly softlockup_panic =
@@ -254,6 +254,10 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
{
}
+void __weak watchdog_check_hardlockup_other_cpu(void)
+{
+}
+
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
@@ -271,6 +275,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
/* kick the hardlockup detector */
watchdog_interrupt_count();
+ /* test for hardlockups on the next cpu */
+ watchdog_check_hardlockup_other_cpu();
+
/* kick the softlockup detector */
wake_up_process(__this_cpu_read(softlockup_watchdog));
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 12b8dd6..5b2c127 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -18,7 +18,11 @@
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static cpumask_t __read_mostly watchdog_cpus;
+#else
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+#endif
/* boot commands */
/*
@@ -26,7 +30,10 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static unsigned long hardlockup_allcpu_dumped;
+#endif
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
@@ -68,6 +75,108 @@ void touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(touch_nmi_watchdog);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+ cpumask_t cpus = watchdog_cpus;
+ unsigned int next_cpu;
+
+ next_cpu = cpumask_next(cpu, &cpus);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(&cpus);
+
+ if (next_cpu == cpu)
+ return nr_cpu_ids;
+
+ return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+ unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+ if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+ return 1;
+
+ per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+ return 0;
+}
+
+void watchdog_check_hardlockup_other_cpu(void)
+{
+ unsigned int next_cpu;
+
+ /*
+ * Test for hardlockups every 3 samples. The sample period is
+ * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+ * watchdog_thresh (over by 20%).
+ */
+ if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+ return;
+
+ /* check for a hardlockup on the next cpu */
+ next_cpu = watchdog_next_cpu(smp_processor_id());
+ if (next_cpu >= nr_cpu_ids)
+ return;
+
+ smp_rmb();
+
+ if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+ per_cpu(watchdog_nmi_touch, next_cpu) = false;
+ return;
+ }
+
+ if (is_hardlockup_other_cpu(next_cpu)) {
+ /* only warn once */
+ if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+ return;
+
+ if (hardlockup_panic)
+ panic("Watchdog detected hard LOCKUP on cpu %u",
+ next_cpu);
+ else
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %u",
+ next_cpu);
+
+ per_cpu(hard_watchdog_warn, next_cpu) = true;
+ } else {
+ per_cpu(hard_watchdog_warn, next_cpu) = false;
+ }
+}
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+ /*
+ * The new cpu will be marked online before the first hrtimer interrupt
+ * runs on it. If another cpu tests for a hardlockup on the new cpu
+ * before it has run its first hrtimer, it will get a false positive.
+ * Touch the watchdog on the new cpu to delay the first check for at
+ * least 3 sampling periods to guarantee one hrtimer has run on the new
+ * cpu.
+ */
+ per_cpu(watchdog_nmi_touch, cpu) = true;
+ smp_wmb();
+ cpumask_set_cpu(cpu, &watchdog_cpus);
+ return 0;
+}
+
+void watchdog_nmi_disable(unsigned int cpu)
+{
+ unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+ /*
+ * Offlining this cpu will cause the cpu before this one to start
+ * checking the one after this one. If this cpu just finished checking
+ * the next cpu and updating hrtimer_interrupts_saved, and then the
+ * previous cpu checks it within one sample period, it will trigger a
+ * false positive. Touch the watchdog on the next cpu to prevent it.
+ */
+ if (next_cpu < nr_cpu_ids)
+ per_cpu(watchdog_nmi_touch, next_cpu) = true;
+ smp_wmb();
+ cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+#else
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -228,3 +337,4 @@ void watchdog_nmi_disable(unsigned int cpu)
cpu0_err = 0;
}
}
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2f9f7aa..411b383 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -779,11 +779,20 @@
The frequency of hrtimer and NMI events and the soft and hard lockup
thresholds can be controlled through the sysctl watchdog_thresh.
-config HARDLOCKUP_DETECTOR
+config HARDLOCKUP_DETECTOR_NMI
def_bool y
depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+config HARDLOCKUP_DETECTOR_OTHER_CPU
+ def_bool y
+ depends on LOCKUP_DETECTOR && SMP
+ depends on !HARDLOCKUP_DETECTOR_NMI && !HAVE_NMI_WATCHDOG
+
+config HARDLOCKUP_DETECTOR
+ def_bool y
+ depends on HARDLOCKUP_DETECTOR_NMI || HARDLOCKUP_DETECTOR_OTHER_CPU
+
config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR