blob: 7ef3b596e45ff2a9ec3c53c661167fe6de7b3544 [file] [log] [blame]
Paul E. McKenney10462d62019-01-11 16:10:57 -08001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * RCU CPU stall warnings for normal RCU grace periods
4 *
5 * Copyright IBM Corporation, 2019
6 *
7 * Author: Paul E. McKenney <paulmck@linux.ibm.com>
8 */
9
10
Paul E. McKenney32255d52019-01-11 16:57:41 -080011/* panic() on RCU Stall sysctl. */
12int sysctl_panic_on_rcu_stall __read_mostly;
13
Paul E. McKenney10462d62019-01-11 16:10:57 -080014#ifdef CONFIG_PROVE_RCU
15#define RCU_STALL_DELAY_DELTA (5 * HZ)
16#else
17#define RCU_STALL_DELAY_DELTA 0
18#endif
19
20int rcu_jiffies_till_stall_check(void)
21{
22 int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
23
24 /*
25 * Limit check must be consistent with the Kconfig limits
26 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
27 */
28 if (till_stall_check < 3) {
29 WRITE_ONCE(rcu_cpu_stall_timeout, 3);
30 till_stall_check = 3;
31 } else if (till_stall_check > 300) {
32 WRITE_ONCE(rcu_cpu_stall_timeout, 300);
33 till_stall_check = 300;
34 }
35 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
36}
37EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
38
39void rcu_sysrq_start(void)
40{
41 if (!rcu_cpu_stall_suppress)
42 rcu_cpu_stall_suppress = 2;
43}
44
45void rcu_sysrq_end(void)
46{
47 if (rcu_cpu_stall_suppress == 2)
48 rcu_cpu_stall_suppress = 0;
49}
50
51static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
52{
53 rcu_cpu_stall_suppress = 1;
54 return NOTIFY_DONE;
55}
56
57static struct notifier_block rcu_panic_block = {
58 .notifier_call = rcu_panic,
59};
60
61static int __init check_cpu_stall_init(void)
62{
63 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
64 return 0;
65}
66early_initcall(check_cpu_stall_init);
Paul E. McKenney3fc3d172019-01-11 16:34:47 -080067
68#ifdef CONFIG_PREEMPT
69
70/*
71 * Dump detailed information for all tasks blocking the current RCU
72 * grace period on the specified rcu_node structure.
73 */
74static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
75{
76 unsigned long flags;
77 struct task_struct *t;
78
79 raw_spin_lock_irqsave_rcu_node(rnp, flags);
80 if (!rcu_preempt_blocked_readers_cgp(rnp)) {
81 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
82 return;
83 }
84 t = list_entry(rnp->gp_tasks->prev,
85 struct task_struct, rcu_node_entry);
86 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
87 /*
88 * We could be printing a lot while holding a spinlock.
89 * Avoid triggering hard lockup.
90 */
91 touch_nmi_watchdog();
92 sched_show_task(t);
93 }
94 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
95}
96
97/*
Paul E. McKenney3fc3d172019-01-11 16:34:47 -080098 * Scan the current list of tasks blocked within RCU read-side critical
99 * sections, printing out the tid of each.
100 */
101static int rcu_print_task_stall(struct rcu_node *rnp)
102{
103 struct task_struct *t;
104 int ndetected = 0;
105
106 if (!rcu_preempt_blocked_readers_cgp(rnp))
107 return 0;
Paul E. McKenney21d0d792019-01-11 20:36:45 -0800108 pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
109 rnp->level, rnp->grplo, rnp->grphi);
Paul E. McKenney3fc3d172019-01-11 16:34:47 -0800110 t = list_entry(rnp->gp_tasks->prev,
111 struct task_struct, rcu_node_entry);
112 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
113 pr_cont(" P%d", t->pid);
114 ndetected++;
115 }
Paul E. McKenney21d0d792019-01-11 20:36:45 -0800116 pr_cont("\n");
Paul E. McKenney3fc3d172019-01-11 16:34:47 -0800117 return ndetected;
118}
119
120#else /* #ifdef CONFIG_PREEMPT */
121
122/*
123 * Because preemptible RCU does not exist, we never have to check for
124 * tasks blocked within RCU read-side critical sections.
125 */
Paul E. McKenney21d0d792019-01-11 20:36:45 -0800126static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
Paul E. McKenney3fc3d172019-01-11 16:34:47 -0800127{
128}
129
130/*
131 * Because preemptible RCU does not exist, we never have to check for
132 * tasks blocked within RCU read-side critical sections.
133 */
134static int rcu_print_task_stall(struct rcu_node *rnp)
135{
136 return 0;
137}
138#endif /* #else #ifdef CONFIG_PREEMPT */
Paul E. McKenney32255d52019-01-11 16:57:41 -0800139
140static void record_gp_stall_check_time(void)
141{
142 unsigned long j = jiffies;
143 unsigned long j1;
144
145 rcu_state.gp_start = j;
146 j1 = rcu_jiffies_till_stall_check();
147 /* Record ->gp_start before ->jiffies_stall. */
148 smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
149 rcu_state.jiffies_resched = j + j1 / 2;
150 rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
151}
152
153/*
154 * Complain about starvation of grace-period kthread.
155 */
156static void rcu_check_gp_kthread_starvation(void)
157{
158 struct task_struct *gpk = rcu_state.gp_kthread;
159 unsigned long j;
160
161 j = jiffies - READ_ONCE(rcu_state.gp_activity);
162 if (j > 2 * HZ) {
163 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
164 rcu_state.name, j,
165 (long)rcu_seq_current(&rcu_state.gp_seq),
166 READ_ONCE(rcu_state.gp_flags),
167 gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
168 gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
169 if (gpk) {
170 pr_err("RCU grace-period kthread stack dump:\n");
171 sched_show_task(gpk);
172 wake_up_process(gpk);
173 }
174 }
175}
176
177/*
178 * Dump stacks of all tasks running on stalled CPUs. First try using
179 * NMIs, but fall back to manual remote stack tracing on architectures
180 * that don't support NMI-based stack dumps. The NMI-triggered stack
181 * traces are more accurate because they are printed by the target CPU.
182 */
183static void rcu_dump_cpu_stacks(void)
184{
185 int cpu;
186 unsigned long flags;
187 struct rcu_node *rnp;
188
189 rcu_for_each_leaf_node(rnp) {
190 raw_spin_lock_irqsave_rcu_node(rnp, flags);
191 for_each_leaf_node_possible_cpu(rnp, cpu)
192 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
193 if (!trigger_single_cpu_backtrace(cpu))
194 dump_cpu_task(cpu);
195 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
196 }
197}
198
199/*
200 * If too much time has passed in the current grace period, and if
201 * so configured, go kick the relevant kthreads.
202 */
203static void rcu_stall_kick_kthreads(void)
204{
205 unsigned long j;
206
207 if (!rcu_kick_kthreads)
208 return;
209 j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
210 if (time_after(jiffies, j) && rcu_state.gp_kthread &&
211 (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
212 WARN_ONCE(1, "Kicking %s grace-period kthread\n",
213 rcu_state.name);
214 rcu_ftrace_dump(DUMP_ALL);
215 wake_up_process(rcu_state.gp_kthread);
216 WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
217 }
218}
219
220static void panic_on_rcu_stall(void)
221{
222 if (sysctl_panic_on_rcu_stall)
223 panic("RCU Stall\n");
224}
225
226static void print_other_cpu_stall(unsigned long gp_seq)
227{
228 int cpu;
229 unsigned long flags;
230 unsigned long gpa;
231 unsigned long j;
232 int ndetected = 0;
Paul E. McKenney21d0d792019-01-11 20:36:45 -0800233 struct rcu_node *rnp;
Paul E. McKenney32255d52019-01-11 16:57:41 -0800234 long totqlen = 0;
235
236 /* Kick and suppress, if so configured. */
237 rcu_stall_kick_kthreads();
238 if (rcu_cpu_stall_suppress)
239 return;
240
241 /*
242 * OK, time to rat on our buddy...
243 * See Documentation/RCU/stallwarn.txt for info on how to debug
244 * RCU CPU stall warnings.
245 */
Paul E. McKenney40e69ac2019-01-11 20:58:58 -0800246 pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
Paul E. McKenney32255d52019-01-11 16:57:41 -0800247 rcu_for_each_leaf_node(rnp) {
248 raw_spin_lock_irqsave_rcu_node(rnp, flags);
249 ndetected += rcu_print_task_stall(rnp);
250 if (rnp->qsmask != 0) {
251 for_each_leaf_node_possible_cpu(rnp, cpu)
252 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
253 print_cpu_stall_info(cpu);
254 ndetected++;
255 }
256 }
257 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
258 }
259
Paul E. McKenney32255d52019-01-11 16:57:41 -0800260 for_each_possible_cpu(cpu)
261 totqlen += rcu_get_n_cbs_cpu(cpu);
Paul E. McKenney40e69ac2019-01-11 20:58:58 -0800262 pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
Paul E. McKenney32255d52019-01-11 16:57:41 -0800263 smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
264 (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
265 if (ndetected) {
266 rcu_dump_cpu_stacks();
267
268 /* Complain about tasks blocking the grace period. */
Paul E. McKenney21d0d792019-01-11 20:36:45 -0800269 rcu_for_each_leaf_node(rnp)
270 rcu_print_detail_task_stall_rnp(rnp);
Paul E. McKenney32255d52019-01-11 16:57:41 -0800271 } else {
272 if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
273 pr_err("INFO: Stall ended before state dump start\n");
274 } else {
275 j = jiffies;
276 gpa = READ_ONCE(rcu_state.gp_activity);
277 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
278 rcu_state.name, j - gpa, j, gpa,
279 READ_ONCE(jiffies_till_next_fqs),
280 rcu_get_root()->qsmask);
281 /* In this case, the current CPU might be at fault. */
282 sched_show_task(current);
283 }
284 }
285 /* Rewrite if needed in case of slow consoles. */
286 if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
287 WRITE_ONCE(rcu_state.jiffies_stall,
288 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
289
290 rcu_check_gp_kthread_starvation();
291
292 panic_on_rcu_stall();
293
294 rcu_force_quiescent_state(); /* Kick them all. */
295}
296
297static void print_cpu_stall(void)
298{
299 int cpu;
300 unsigned long flags;
301 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
302 struct rcu_node *rnp = rcu_get_root();
303 long totqlen = 0;
304
305 /* Kick and suppress, if so configured. */
306 rcu_stall_kick_kthreads();
307 if (rcu_cpu_stall_suppress)
308 return;
309
310 /*
311 * OK, time to rat on ourselves...
312 * See Documentation/RCU/stallwarn.txt for info on how to debug
313 * RCU CPU stall warnings.
314 */
Paul E. McKenney40e69ac2019-01-11 20:58:58 -0800315 pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
Paul E. McKenney32255d52019-01-11 16:57:41 -0800316 raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
317 print_cpu_stall_info(smp_processor_id());
318 raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
Paul E. McKenney32255d52019-01-11 16:57:41 -0800319 for_each_possible_cpu(cpu)
320 totqlen += rcu_get_n_cbs_cpu(cpu);
Paul E. McKenney40e69ac2019-01-11 20:58:58 -0800321 pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
Paul E. McKenney32255d52019-01-11 16:57:41 -0800322 jiffies - rcu_state.gp_start,
323 (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
324
325 rcu_check_gp_kthread_starvation();
326
327 rcu_dump_cpu_stacks();
328
329 raw_spin_lock_irqsave_rcu_node(rnp, flags);
330 /* Rewrite if needed in case of slow consoles. */
331 if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
332 WRITE_ONCE(rcu_state.jiffies_stall,
333 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
334 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
335
336 panic_on_rcu_stall();
337
338 /*
339 * Attempt to revive the RCU machinery by forcing a context switch.
340 *
341 * A context switch would normally allow the RCU state machine to make
342 * progress and it could be we're stuck in kernel space without context
343 * switches for an entirely unreasonable amount of time.
344 */
345 set_tsk_need_resched(current);
346 set_preempt_need_resched();
347}
348
349static void check_cpu_stall(struct rcu_data *rdp)
350{
351 unsigned long gs1;
352 unsigned long gs2;
353 unsigned long gps;
354 unsigned long j;
355 unsigned long jn;
356 unsigned long js;
357 struct rcu_node *rnp;
358
359 if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
360 !rcu_gp_in_progress())
361 return;
362 rcu_stall_kick_kthreads();
363 j = jiffies;
364
365 /*
366 * Lots of memory barriers to reject false positives.
367 *
368 * The idea is to pick up rcu_state.gp_seq, then
369 * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
370 * another copy of rcu_state.gp_seq. These values are updated in
371 * the opposite order with memory barriers (or equivalent) during
372 * grace-period initialization and cleanup. Now, a false positive
373 * can occur if we get an new value of rcu_state.gp_start and a old
374 * value of rcu_state.jiffies_stall. But given the memory barriers,
375 * the only way that this can happen is if one grace period ends
376 * and another starts between these two fetches. This is detected
377 * by comparing the second fetch of rcu_state.gp_seq with the
378 * previous fetch from rcu_state.gp_seq.
379 *
380 * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
381 * and rcu_state.gp_start suffice to forestall false positives.
382 */
383 gs1 = READ_ONCE(rcu_state.gp_seq);
384 smp_rmb(); /* Pick up ->gp_seq first... */
385 js = READ_ONCE(rcu_state.jiffies_stall);
386 smp_rmb(); /* ...then ->jiffies_stall before the rest... */
387 gps = READ_ONCE(rcu_state.gp_start);
388 smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
389 gs2 = READ_ONCE(rcu_state.gp_seq);
390 if (gs1 != gs2 ||
391 ULONG_CMP_LT(j, js) ||
392 ULONG_CMP_GE(gps, js))
393 return; /* No stall or GP completed since entering function. */
394 rnp = rdp->mynode;
395 jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
396 if (rcu_gp_in_progress() &&
397 (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
398 cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
399
400 /* We haven't checked in, so go dump stack. */
401 print_cpu_stall();
402
403 } else if (rcu_gp_in_progress() &&
404 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
405 cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
406
407 /* They had a few time units to dump stack, so complain. */
408 print_other_cpu_stall(gs2);
409 }
410}
411
412/**
413 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
414 *
415 * Set the stall-warning timeout way off into the future, thus preventing
416 * any RCU CPU stall-warning messages from appearing in the current set of
417 * RCU grace periods.
418 *
419 * The caller must disable hard irqs.
420 */
421void rcu_cpu_stall_reset(void)
422{
423 WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
424}