Blame - kernel/watchdog_hld.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: 5b2c1273cbb639f10425671f3394225071d0ff69 [file] [log] [blame]

Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	1	/*
				2	* Detect hard lockups on a system
				3	*
				4	* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
				5	*
				6	* Note: Most of this code is borrowed heavily from the original softlockup
				7	* detector, so thanks to Ingo for the initial implementation.
				8	* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
				9	* to those contributors as well.
				10	*/
				11
				12	#define pr_fmt(fmt) "NMI watchdog: " fmt
				13
				14	#include <linux/nmi.h>
				15	#include <linux/module.h>
				16	#include <asm/irq_regs.h>
				17	#include <linux/perf_event.h>
				18
				19	static DEFINE_PER_CPU(bool, hard_watchdog_warn);
				20	static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	21	#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
				22	static cpumask_t __read_mostly watchdog_cpus;
				23	#else
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	24	static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	25	#endif
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	26
				27	/* boot commands */
				28	/*
				29	* Should we panic when a soft-lockup or hard-lockup occurs:
				30	*/
				31	unsigned int __read_mostly hardlockup_panic =
				32	CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	33
				34	#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	35	static unsigned long hardlockup_allcpu_dumped;
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	36	#endif
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	37	/*
				38	* We may not want to enable hard lockup detection by default in all cases,
				39	* for example when running the kernel as a guest on a hypervisor. In these
				40	* cases this function can be called to disable hard lockup detection. This
				41	* function should only be executed once by the boot processor before the
				42	* kernel command line parameters are parsed, because otherwise it is not
				43	* possible to override this in hardlockup_panic_setup().
				44	*/
				45	void hardlockup_detector_disable(void)
				46	{
				47	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
				48	}
				49
				50	static int __init hardlockup_panic_setup(char *str)
				51	{
				52	if (!strncmp(str, "panic", 5))
				53	hardlockup_panic = 1;
				54	else if (!strncmp(str, "nopanic", 7))
				55	hardlockup_panic = 0;
				56	else if (!strncmp(str, "0", 1))
				57	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
				58	else if (!strncmp(str, "1", 1))
				59	watchdog_enabled \|= NMI_WATCHDOG_ENABLED;
				60	return 1;
				61	}
				62	__setup("nmi_watchdog=", hardlockup_panic_setup);
				63
				64	void touch_nmi_watchdog(void)
				65	{
				66	/*
				67	* Using __raw here because some code paths have
				68	* preemption enabled. If preemption is enabled
				69	* then interrupts should be enabled too, in which
				70	* case we shouldn't have to worry about the watchdog
				71	* going off.
				72	*/
				73	raw_cpu_write(watchdog_nmi_touch, true);
				74	touch_softlockup_watchdog();
				75	}
				76	EXPORT_SYMBOL(touch_nmi_watchdog);
				77
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	78	#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
				79	static unsigned int watchdog_next_cpu(unsigned int cpu)
				80	{
				81	cpumask_t cpus = watchdog_cpus;
				82	unsigned int next_cpu;
				83
				84	next_cpu = cpumask_next(cpu, &cpus);
				85	if (next_cpu >= nr_cpu_ids)
				86	next_cpu = cpumask_first(&cpus);
				87
				88	if (next_cpu == cpu)
				89	return nr_cpu_ids;
				90
				91	return next_cpu;
				92	}
				93
				94	static int is_hardlockup_other_cpu(unsigned int cpu)
				95	{
				96	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
				97
				98	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
				99	return 1;
				100
				101	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
				102	return 0;
				103	}
				104
				105	void watchdog_check_hardlockup_other_cpu(void)
				106	{
				107	unsigned int next_cpu;
				108
				109	/*
				110	* Test for hardlockups every 3 samples. The sample period is
				111	* watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
				112	* watchdog_thresh (over by 20%).
				113	*/
				114	if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
				115	return;
				116
				117	/* check for a hardlockup on the next cpu */
				118	next_cpu = watchdog_next_cpu(smp_processor_id());
				119	if (next_cpu >= nr_cpu_ids)
				120	return;
				121
				122	smp_rmb();
				123
				124	if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
				125	per_cpu(watchdog_nmi_touch, next_cpu) = false;
				126	return;
				127	}
				128
				129	if (is_hardlockup_other_cpu(next_cpu)) {
				130	/* only warn once */
				131	if (per_cpu(hard_watchdog_warn, next_cpu) == true)
				132	return;
				133
				134	if (hardlockup_panic)
				135	panic("Watchdog detected hard LOCKUP on cpu %u",
				136	next_cpu);
				137	else
				138	WARN(1, "Watchdog detected hard LOCKUP on cpu %u",
				139	next_cpu);
				140
				141	per_cpu(hard_watchdog_warn, next_cpu) = true;
				142	} else {
				143	per_cpu(hard_watchdog_warn, next_cpu) = false;
				144	}
				145	}
				146
				147	int watchdog_nmi_enable(unsigned int cpu)
				148	{
				149	/*
				150	* The new cpu will be marked online before the first hrtimer interrupt
				151	* runs on it. If another cpu tests for a hardlockup on the new cpu
				152	* before it has run its first hrtimer, it will get a false positive.
				153	* Touch the watchdog on the new cpu to delay the first check for at
				154	* least 3 sampling periods to guarantee one hrtimer has run on the new
				155	* cpu.
				156	*/
				157	per_cpu(watchdog_nmi_touch, cpu) = true;
				158	smp_wmb();
				159	cpumask_set_cpu(cpu, &watchdog_cpus);
				160	return 0;
				161	}
				162
				163	void watchdog_nmi_disable(unsigned int cpu)
				164	{
				165	unsigned int next_cpu = watchdog_next_cpu(cpu);
				166
				167	/*
				168	* Offlining this cpu will cause the cpu before this one to start
				169	* checking the one after this one. If this cpu just finished checking
				170	* the next cpu and updating hrtimer_interrupts_saved, and then the
				171	* previous cpu checks it within one sample period, it will trigger a
				172	* false positive. Touch the watchdog on the next cpu to prevent it.
				173	*/
				174	if (next_cpu < nr_cpu_ids)
				175	per_cpu(watchdog_nmi_touch, next_cpu) = true;
				176	smp_wmb();
				177	cpumask_clear_cpu(cpu, &watchdog_cpus);
				178	}
				179	#else
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	180	static struct perf_event_attr wd_hw_attr = {
				181	.type = PERF_TYPE_HARDWARE,
				182	.config = PERF_COUNT_HW_CPU_CYCLES,
				183	.size = sizeof(struct perf_event_attr),
				184	.pinned = 1,
				185	.disabled = 1,
				186	};
				187
				188	/* Callback function for perf event subsystem */
				189	static void watchdog_overflow_callback(struct perf_event *event,
				190	struct perf_sample_data *data,
				191	struct pt_regs *regs)
				192	{
				193	/* Ensure the watchdog never gets throttled */
				194	event->hw.interrupts = 0;
				195
Don Zickus	b13b3b7	2017-01-24 15:17:53 -0800	[diff] [blame]	196	if (atomic_read(&watchdog_park_in_progress) != 0)
				197	return;
				198
Babu Moger	b969a24	2016-12-14 15:06:24 -0800	[diff] [blame]	199	if (__this_cpu_read(watchdog_nmi_touch) == true) {
				200	__this_cpu_write(watchdog_nmi_touch, false);
				201	return;
				202	}
				203
				204	/* check for a hardlockup
				205	* This is done by making sure our timer interrupt
				206	* is incrementing. The timer interrupt should have
				207	* fired multiple times before we overflow'd. If it hasn't
				208	* then this is a good indication the cpu is stuck
				209	*/
				210	if (is_hardlockup()) {
				211	int this_cpu = smp_processor_id();
				212
				213	/* only print hardlockups once */
				214	if (__this_cpu_read(hard_watchdog_warn) == true)
				215	return;
				216
				217	pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
				218	print_modules();
				219	print_irqtrace_events(current);
				220	if (regs)
				221	show_regs(regs);
				222	else
				223	dump_stack();
				224
				225	/*
				226	* Perform all-CPU dump only once to avoid multiple hardlockups
				227	* generating interleaving traces
				228	*/
				229	if (sysctl_hardlockup_all_cpu_backtrace &&
				230	!test_and_set_bit(0, &hardlockup_allcpu_dumped))
				231	trigger_allbutself_cpu_backtrace();
				232
				233	if (hardlockup_panic)
				234	nmi_panic(regs, "Hard LOCKUP");
				235
				236	__this_cpu_write(hard_watchdog_warn, true);
				237	return;
				238	}
				239
				240	__this_cpu_write(hard_watchdog_warn, false);
				241	return;
				242	}
				243
				244	/*
				245	* People like the simple clean cpu node info on boot.
				246	* Reduce the watchdog noise by only printing messages
				247	* that are different from what cpu0 displayed.
				248	*/
				249	static unsigned long cpu0_err;
				250
				251	int watchdog_nmi_enable(unsigned int cpu)
				252	{
				253	struct perf_event_attr *wd_attr;
				254	struct perf_event *event = per_cpu(watchdog_ev, cpu);
				255
				256	/* nothing to do if the hard lockup detector is disabled */
				257	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
				258	goto out;
				259
				260	/* is it already setup and enabled? */
				261	if (event && event->state > PERF_EVENT_STATE_OFF)
				262	goto out;
				263
				264	/* it is setup but not enabled */
				265	if (event != NULL)
				266	goto out_enable;
				267
				268	wd_attr = &wd_hw_attr;
				269	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
				270
				271	/* Try to register using hardware perf events */
				272	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
				273
				274	/* save cpu0 error for future comparision */
				275	if (cpu == 0 && IS_ERR(event))
				276	cpu0_err = PTR_ERR(event);
				277
				278	if (!IS_ERR(event)) {
				279	/* only print for cpu0 or different than cpu0 */
				280	if (cpu == 0 \|\| cpu0_err)
				281	pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
				282	goto out_save;
				283	}
				284
				285	/*
				286	* Disable the hard lockup detector if _any_ CPU fails to set up
				287	* set up the hardware perf event. The watchdog() function checks
				288	* the NMI_WATCHDOG_ENABLED bit periodically.
				289	*
				290	* The barriers are for syncing up watchdog_enabled across all the
				291	* cpus, as clear_bit() does not use barriers.
				292	*/
				293	smp_mb__before_atomic();
				294	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
				295	smp_mb__after_atomic();
				296
				297	/* skip displaying the same error again */
				298	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
				299	return PTR_ERR(event);
				300
				301	/* vary the KERN level based on the returned errno */
				302	if (PTR_ERR(event) == -EOPNOTSUPP)
				303	pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
				304	else if (PTR_ERR(event) == -ENOENT)
				305	pr_warn("disabled (cpu%i): hardware events not enabled\n",
				306	cpu);
				307	else
				308	pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
				309	cpu, PTR_ERR(event));
				310
				311	pr_info("Shutting down hard lockup detector on all cpus\n");
				312
				313	return PTR_ERR(event);
				314
				315	/* success path */
				316	out_save:
				317	per_cpu(watchdog_ev, cpu) = event;
				318	out_enable:
				319	perf_event_enable(per_cpu(watchdog_ev, cpu));
				320	out:
				321	return 0;
				322	}
				323
				324	void watchdog_nmi_disable(unsigned int cpu)
				325	{
				326	struct perf_event *event = per_cpu(watchdog_ev, cpu);
				327
				328	if (event) {
				329	perf_event_disable(event);
				330	per_cpu(watchdog_ev, cpu) = NULL;
				331
				332	/* should be in cleanup, but blocks oprofile */
				333	perf_event_release_kernel(event);
				334	}
				335	if (cpu == 0) {
				336	/* watchdog_nmi_enable() expects this to be zero initially. */
				337	cpu0_err = 0;
				338	}
				339	}
Kyle Yan	bd44874	2017-08-21 15:10:31 -0700	[diff] [blame]	340	#endif