blob: ad0156b8693716b8d5bc862f34a25ed418b466f3 [file] [log] [blame]
Paul E. McKenney22e40922019-01-17 10:23:39 -08001/* SPDX-License-Identifier: GPL-2.0+ */
Paul E. McKenneyf41d9112009-08-22 13:56:52 -07002/*
3 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
4 * Internal non-public definitions that provide either classic
Paul E. McKenney6cc68792011-03-02 13:15:15 -08005 * or preemptible semantics.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -07006 *
Paul E. McKenneyf41d9112009-08-22 13:56:52 -07007 * Copyright Red Hat, 2009
8 * Copyright IBM Corporation, 2009
9 *
10 * Author: Ingo Molnar <mingo@elte.hu>
Paul E. McKenney22e40922019-01-17 10:23:39 -080011 * Paul E. McKenney <paulmck@linux.ibm.com>
Paul E. McKenneyf41d9112009-08-22 13:56:52 -070012 */
13
Paul E. McKenneyabaa93d2014-06-12 13:30:25 -070014#include "../locking/rtmutex_common.h"
Mike Galbraith5b61b0b2011-08-19 11:39:11 -070015
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -070016#ifdef CONFIG_RCU_NOCB_CPU
17static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
Paul Gortmaker1b0048a2012-12-20 13:19:22 -080018static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
Frederic Weisbecker3820b512020-11-12 01:51:21 +010019static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
20{
21 return lockdep_is_held(&rdp->nocb_lock);
22}
23
24static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
25{
26 /* Race on early boot between thread creation and assignment */
27 if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread)
28 return true;
29
30 if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread)
31 if (in_task())
32 return true;
33 return false;
34}
35
36static inline bool rcu_running_nocb_timer(struct rcu_data *rdp)
37{
38 return (timer_curr_running(&rdp->nocb_timer) && !in_irq());
39}
40#else
41static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
42{
43 return 0;
44}
45
46static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
47{
48 return false;
49}
50
51static inline bool rcu_running_nocb_timer(struct rcu_data *rdp)
52{
53 return false;
54}
55
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -070056#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
57
Frederic Weisbecker3820b512020-11-12 01:51:21 +010058static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
59{
60 /*
61 * In order to read the offloaded state of an rdp is a safe
62 * and stable way and prevent from its value to be changed
63 * under us, we must either hold the barrier mutex, the cpu
64 * hotplug lock (read or write) or the nocb lock. Local
65 * non-preemptible reads are also safe. NOCB kthreads and
66 * timers have their own means of synchronization against the
67 * offloaded state updaters.
68 */
69 RCU_LOCKDEP_WARN(
70 !(lockdep_is_held(&rcu_state.barrier_mutex) ||
71 (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
72 rcu_lockdep_is_held_nocb(rdp) ||
73 (rdp == this_cpu_ptr(&rcu_data) &&
74 !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
75 rcu_current_is_nocb_kthread(rdp) ||
76 rcu_running_nocb_timer(rdp)),
77 "Unsafe read of RCU_NOCB offloaded state"
78 );
79
80 return rcu_segcblist_is_offloaded(&rdp->cblist);
81}
82
Paul E. McKenney26845c22010-04-13 14:19:23 -070083/*
84 * Check the RCU kernel configuration parameters and print informative
Paul E. McKenney699d4032015-09-29 08:47:49 -070085 * messages about anything out of the ordinary.
Paul E. McKenney26845c22010-04-13 14:19:23 -070086 */
87static void __init rcu_bootup_announce_oddness(void)
88{
Paul E. McKenneyab6f5bd2015-01-21 16:58:06 -080089 if (IS_ENABLED(CONFIG_RCU_TRACE))
Paul E. McKenneyae91aa02017-05-15 15:30:32 -070090 pr_info("\tRCU event tracing is enabled.\n");
Paul E. McKenney05c5df32015-04-20 14:27:43 -070091 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
92 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
Joe Perchesa7538352018-05-14 13:27:33 -070093 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
94 RCU_FANOUT);
Paul E. McKenney7fa27002015-04-20 10:27:15 -070095 if (rcu_fanout_exact)
Paul E. McKenneyab6f5bd2015-01-21 16:58:06 -080096 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
97 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
98 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
Paul E. McKenneyc4a09ff2017-05-12 14:37:19 -070099 if (IS_ENABLED(CONFIG_PROVE_RCU))
Paul E. McKenneyab6f5bd2015-01-21 16:58:06 -0800100 pr_info("\tRCU lockdep checking is enabled.\n");
Paul E. McKenney8cbd0e32020-08-05 15:51:20 -0700101 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
102 pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
Alexander Gordeev42621692015-06-03 08:18:31 +0200103 if (RCU_NUM_LVLS >= 4)
104 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
Paul E. McKenney47d631a2015-04-21 09:12:13 -0700105 if (RCU_FANOUT_LEAF != 16)
Paul E. McKenneya3bd2c09a2015-01-21 20:58:57 -0800106 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
Paul E. McKenney47d631a2015-04-21 09:12:13 -0700107 RCU_FANOUT_LEAF);
108 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
Joe Perchesa7538352018-05-14 13:27:33 -0700109 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
110 rcu_fanout_leaf);
Paul E. McKenneycca6f392012-05-08 21:00:28 -0700111 if (nr_cpu_ids != NR_CPUS)
Alexey Dobriyan9b130ad2017-09-08 16:14:18 -0700112 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
Paul E. McKenney17c77982017-04-28 11:12:34 -0700113#ifdef CONFIG_RCU_BOOST
Joe Perchesa7538352018-05-14 13:27:33 -0700114 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
115 kthread_prio, CONFIG_RCU_BOOST_DELAY);
Paul E. McKenney17c77982017-04-28 11:12:34 -0700116#endif
117 if (blimit != DEFAULT_RCU_BLIMIT)
118 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
119 if (qhimark != DEFAULT_RCU_QHIMARK)
120 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
121 if (qlowmark != DEFAULT_RCU_QLOMARK)
122 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
Paul E. McKenneyb2b00dd2019-10-30 11:56:10 -0700123 if (qovld != DEFAULT_RCU_QOVLD)
Colin Ian Kingaa96a932019-12-12 17:36:43 +0000124 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
Paul E. McKenney17c77982017-04-28 11:12:34 -0700125 if (jiffies_till_first_fqs != ULONG_MAX)
126 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
127 if (jiffies_till_next_fqs != ULONG_MAX)
128 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
Paul E. McKenneyc06aed02018-07-25 11:25:23 -0700129 if (jiffies_till_sched_qs != ULONG_MAX)
130 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
Paul E. McKenney17c77982017-04-28 11:12:34 -0700131 if (rcu_kick_kthreads)
132 pr_info("\tKick kthreads if too-long grace period.\n");
133 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
134 pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
Paul E. McKenney90040c92017-05-10 14:36:55 -0700135 if (gp_preinit_delay)
Paul E. McKenney17c77982017-04-28 11:12:34 -0700136 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
Paul E. McKenney90040c92017-05-10 14:36:55 -0700137 if (gp_init_delay)
Paul E. McKenney17c77982017-04-28 11:12:34 -0700138 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
Paul E. McKenney90040c92017-05-10 14:36:55 -0700139 if (gp_cleanup_delay)
Paul E. McKenney17c77982017-04-28 11:12:34 -0700140 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
Sebastian Andrzej Siewior48d07c02019-03-20 22:13:33 +0100141 if (!use_softirq)
142 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
Paul E. McKenney17c77982017-04-28 11:12:34 -0700143 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
144 pr_info("\tRCU debug extended QS entry/exit.\n");
Paul E. McKenney59d80fd2017-04-28 10:20:28 -0700145 rcupdate_announce_bootup_oddness();
Paul E. McKenney26845c22010-04-13 14:19:23 -0700146}
147
Pranith Kumar28f65692014-09-22 14:00:48 -0400148#ifdef CONFIG_PREEMPT_RCU
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700149
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700150static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
Paul E. McKenney3949fa92018-05-08 15:29:10 -0700151static void rcu_read_unlock_special(struct task_struct *t);
Paul E. McKenneyd9a3da02009-12-02 12:10:15 -0800152
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700153/*
154 * Tell them what RCU they are running.
155 */
Paul E. McKenney0e0fc1c2009-11-11 11:28:06 -0800156static void __init rcu_bootup_announce(void)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700157{
Paul E. McKenneyefc151c2013-03-18 16:24:11 -0700158 pr_info("Preemptible hierarchical RCU implementation.\n");
Paul E. McKenney26845c22010-04-13 14:19:23 -0700159 rcu_bootup_announce_oddness();
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700160}
161
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700162/* Flags for rcu_preempt_ctxt_queue() decision table. */
163#define RCU_GP_TASKS 0x8
164#define RCU_EXP_TASKS 0x4
165#define RCU_GP_BLKD 0x2
166#define RCU_EXP_BLKD 0x1
167
168/*
169 * Queues a task preempted within an RCU-preempt read-side critical
170 * section into the appropriate location within the ->blkd_tasks list,
171 * depending on the states of any ongoing normal and expedited grace
172 * periods. The ->gp_tasks pointer indicates which element the normal
173 * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
174 * indicates which element the expedited grace period is waiting on (again,
175 * NULL if none). If a grace period is waiting on a given element in the
176 * ->blkd_tasks list, it also waits on all subsequent elements. Thus,
177 * adding a task to the tail of the list blocks any grace period that is
178 * already waiting on one of the elements. In contrast, adding a task
179 * to the head of the list won't block any grace period that is already
180 * waiting on one of the elements.
181 *
182 * This queuing is imprecise, and can sometimes make an ongoing grace
183 * period wait for a task that is not strictly speaking blocking it.
184 * Given the choice, we needlessly block a normal grace period rather than
185 * blocking an expedited grace period.
186 *
187 * Note that an endless sequence of expedited grace periods still cannot
188 * indefinitely postpone a normal grace period. Eventually, all of the
189 * fixed number of preempted tasks blocking the normal grace period that are
190 * not also blocking the expedited grace period will resume and complete
191 * their RCU read-side critical sections. At that point, the ->gp_tasks
192 * pointer will equal the ->exp_tasks pointer, at which point the end of
193 * the corresponding expedited grace period will also be the end of the
194 * normal grace period.
195 */
Paul E. McKenney46a5d162015-10-07 09:10:48 -0700196static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
197 __releases(rnp->lock) /* But leaves rrupts disabled. */
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700198{
199 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
200 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
201 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
202 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
203 struct task_struct *t = current;
204
Matthew Wilcoxa32e01e2018-01-17 06:24:30 -0800205 raw_lockdep_assert_held_rcu_node(rnp);
Paul E. McKenney2dee9402017-07-11 21:52:31 -0700206 WARN_ON_ONCE(rdp->mynode != rnp);
Paul E. McKenney5b4c11d2018-04-13 17:11:44 -0700207 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
Paul E. McKenney1f3e5f52018-05-03 10:35:33 -0700208 /* RCU better not be waiting on newly onlined CPUs! */
209 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
210 rdp->grpmask);
Paul E. McKenneyea9b0c82017-04-28 13:19:28 -0700211
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700212 /*
213 * Decide where to queue the newly blocked task. In theory,
214 * this could be an if-statement. In practice, when I tried
215 * that, it was quite messy.
216 */
217 switch (blkd_state) {
218 case 0:
219 case RCU_EXP_TASKS:
220 case RCU_EXP_TASKS + RCU_GP_BLKD:
221 case RCU_GP_TASKS:
222 case RCU_GP_TASKS + RCU_EXP_TASKS:
223
224 /*
225 * Blocking neither GP, or first task blocking the normal
226 * GP but not blocking the already-waiting expedited GP.
227 * Queue at the head of the list to avoid unnecessarily
228 * blocking the already-waiting GPs.
229 */
230 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
231 break;
232
233 case RCU_EXP_BLKD:
234 case RCU_GP_BLKD:
235 case RCU_GP_BLKD + RCU_EXP_BLKD:
236 case RCU_GP_TASKS + RCU_EXP_BLKD:
237 case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
238 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
239
240 /*
241 * First task arriving that blocks either GP, or first task
242 * arriving that blocks the expedited GP (with the normal
243 * GP already waiting), or a task arriving that blocks
244 * both GPs with both GPs already waiting. Queue at the
245 * tail of the list to avoid any GP waiting on any of the
246 * already queued tasks that are not blocking it.
247 */
248 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
249 break;
250
251 case RCU_EXP_TASKS + RCU_EXP_BLKD:
252 case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
253 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD:
254
255 /*
256 * Second or subsequent task blocking the expedited GP.
257 * The task either does not block the normal GP, or is the
258 * first task blocking the normal GP. Queue just after
259 * the first task blocking the expedited GP.
260 */
261 list_add(&t->rcu_node_entry, rnp->exp_tasks);
262 break;
263
264 case RCU_GP_TASKS + RCU_GP_BLKD:
265 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
266
267 /*
268 * Second or subsequent task blocking the normal GP.
269 * The task does not block the expedited GP. Queue just
270 * after the first task blocking the normal GP.
271 */
272 list_add(&t->rcu_node_entry, rnp->gp_tasks);
273 break;
274
275 default:
276
277 /* Yet another exercise in excessive paranoia. */
278 WARN_ON_ONCE(1);
279 break;
280 }
281
282 /*
283 * We have now queued the task. If it was the first one to
284 * block either grace period, update the ->gp_tasks and/or
285 * ->exp_tasks pointers, respectively, to reference the newly
286 * blocked tasks.
287 */
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800288 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
Eric Dumazet6935c392019-10-09 14:21:54 -0700289 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
Paul E. McKenneyd43a5d32018-04-28 18:50:06 -0700290 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800291 }
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700292 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
Paul E. McKenney314eeb42020-01-03 14:18:12 -0800293 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
Paul E. McKenney2dee9402017-07-11 21:52:31 -0700294 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
295 !(rnp->qsmask & rdp->grpmask));
296 WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
297 !(rnp->expmask & rdp->grpmask));
Boqun Feng67c583a72015-12-29 12:18:47 +0800298 raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700299
300 /*
301 * Report the quiescent state for the expedited GP. This expedited
302 * GP should not be able to end until we report, so there should be
303 * no need to check for a subsequent expedited GP. (Though we are
304 * still in a quiescent state in any case.)
305 */
Paul E. McKenney1bb33642019-03-27 15:51:25 -0700306 if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700307 rcu_report_exp_rdp(rdp);
Paul E. McKenneyfcc878e2018-06-28 07:39:59 -0700308 else
Paul E. McKenney1bb33642019-03-27 15:51:25 -0700309 WARN_ON_ONCE(rdp->exp_deferred_qs);
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700310}
311
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700312/*
Paul E. McKenneyc7037ff2018-05-16 18:00:17 -0700313 * Record a preemptible-RCU quiescent state for the specified CPU.
314 * Note that this does not necessarily mean that the task currently running
315 * on the CPU is in a quiescent state: Instead, it means that the current
316 * grace period need not wait on any RCU read-side critical section that
317 * starts later on this CPU. It also means that if the current task is
318 * in an RCU read-side critical section, it has already added itself to
319 * some leaf rcu_node structure's ->blkd_tasks list. In addition to the
320 * current task, there might be any number of other tasks blocked while
321 * in an RCU read-side critical section.
Paul E. McKenney25502a62010-04-01 17:37:01 -0700322 *
Paul E. McKenneyc7037ff2018-05-16 18:00:17 -0700323 * Callers to this function must disable preemption.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700324 */
Paul E. McKenney45975c72018-07-02 14:30:37 -0700325static void rcu_qs(void)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700326{
Paul E. McKenney45975c72018-07-02 14:30:37 -0700327 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
Paul E. McKenney2280ee52018-07-03 15:54:39 -0700328 if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
Paul E. McKenney284a8c92014-08-14 16:38:46 -0700329 trace_rcu_grace_period(TPS("rcu_preempt"),
Paul E. McKenney2280ee52018-07-03 15:54:39 -0700330 __this_cpu_read(rcu_data.gp_seq),
Paul E. McKenney284a8c92014-08-14 16:38:46 -0700331 TPS("cpuqs"));
Paul E. McKenney2280ee52018-07-03 15:54:39 -0700332 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
Paul E. McKenneyc98cac62018-11-21 11:35:03 -0800333 barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
Paul E. McKenneyadd0d372019-03-26 10:22:22 -0700334 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
Paul E. McKenney284a8c92014-08-14 16:38:46 -0700335 }
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700336}
337
338/*
Paul E. McKenneyc3422be2009-09-13 09:15:10 -0700339 * We have entered the scheduler, and the current task might soon be
340 * context-switched away from. If this task is in an RCU read-side
341 * critical section, we will no longer be able to rely on the CPU to
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800342 * record that fact, so we enqueue the task on the blkd_tasks list.
343 * The task will dequeue itself when it exits the outermost enclosing
344 * RCU read-side critical section. Therefore, the current grace period
345 * cannot be permitted to complete until the blkd_tasks list entries
346 * predating the current grace period drain, in other words, until
347 * rnp->gp_tasks becomes NULL.
Paul E. McKenneyc3422be2009-09-13 09:15:10 -0700348 *
Paul E. McKenney46a5d162015-10-07 09:10:48 -0700349 * Caller must disable interrupts.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700350 */
Paul E. McKenney45975c72018-07-02 14:30:37 -0700351void rcu_note_context_switch(bool preempt)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700352{
353 struct task_struct *t = current;
Paul E. McKenneyda1df502018-07-03 15:37:16 -0700354 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700355 struct rcu_node *rnp;
356
Paul E. McKenney45975c72018-07-02 14:30:37 -0700357 trace_rcu_utilization(TPS("Start context switch"));
Frederic Weisbeckerb04db8e2017-11-06 16:01:30 +0100358 lockdep_assert_irqs_disabled();
Lai Jiangshan77339e62019-11-15 14:08:53 -0800359 WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
360 if (rcu_preempt_depth() > 0 &&
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700361 !t->rcu_read_unlock_special.b.blocked) {
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700362
363 /* Possibly blocking in an RCU read-side critical section. */
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700364 rnp = rdp->mynode;
Paul E. McKenney46a5d162015-10-07 09:10:48 -0700365 raw_spin_lock_rcu_node(rnp);
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700366 t->rcu_read_unlock_special.b.blocked = true;
Paul E. McKenney86848962009-08-27 15:00:12 -0700367 t->rcu_blocked_node = rnp;
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700368
369 /*
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700370 * Verify the CPU's sanity, trace the preemption, and
371 * then queue the task as required based on the states
372 * of any ongoing and expedited grace periods.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700373 */
Paul E. McKenney0aa04b02015-01-23 21:52:37 -0800374 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
Paul E. McKenneye7d88422009-09-18 09:50:18 -0700375 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
Paul E. McKenney88d1bea2018-07-04 14:45:00 -0700376 trace_rcu_preempt_task(rcu_state.name,
Paul E. McKenneyd4c08f22011-06-25 06:36:56 -0700377 t->pid,
378 (rnp->qsmask & rdp->grpmask)
Paul E. McKenney598ce092018-05-01 13:35:20 -0700379 ? rnp->gp_seq
380 : rcu_seq_snap(&rnp->gp_seq));
Paul E. McKenney46a5d162015-10-07 09:10:48 -0700381 rcu_preempt_ctxt_queue(rnp, rdp);
Paul E. McKenney3e310092018-06-21 12:50:01 -0700382 } else {
383 rcu_preempt_deferred_qs(t);
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700384 }
385
386 /*
387 * Either we were not in an RCU read-side critical section to
388 * begin with, or we have now recorded that critical section
389 * globally. Either way, we can now note a quiescent state
390 * for this CPU. Again, if we were in an RCU read-side critical
391 * section, and if that critical section was blocking the current
392 * grace period, then the fact that the task has been enqueued
393 * means that we continue to block the current grace period.
394 */
Paul E. McKenney45975c72018-07-02 14:30:37 -0700395 rcu_qs();
Paul E. McKenney1bb33642019-03-27 15:51:25 -0700396 if (rdp->exp_deferred_qs)
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700397 rcu_report_exp_rdp(rdp);
Paul E. McKenney43766c32020-03-16 20:38:29 -0700398 rcu_tasks_qs(current, preempt);
Paul E. McKenney45975c72018-07-02 14:30:37 -0700399 trace_rcu_utilization(TPS("End context switch"));
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700400}
Paul E. McKenney45975c72018-07-02 14:30:37 -0700401EXPORT_SYMBOL_GPL(rcu_note_context_switch);
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700402
403/*
Paul E. McKenneyfc2219d42009-09-23 09:50:41 -0700404 * Check for preempted RCU readers blocking the current grace period
405 * for the specified rcu_node structure. If the caller needs a reliable
406 * answer, it must hold the rcu_node's ->lock.
407 */
Paul E. McKenney27f4d282011-02-07 12:47:15 -0800408static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
Paul E. McKenneyfc2219d42009-09-23 09:50:41 -0700409{
Eric Dumazet6935c392019-10-09 14:21:54 -0700410 return READ_ONCE(rnp->gp_tasks) != NULL;
Paul E. McKenneyfc2219d42009-09-23 09:50:41 -0700411}
412
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800413/* limit value for ->rcu_read_lock_nesting. */
Paul E. McKenney5f1a6ef2018-10-29 07:36:50 -0700414#define RCU_NEST_PMAX (INT_MAX / 2)
415
Lai Jiangshan77339e62019-11-15 14:08:53 -0800416static void rcu_preempt_read_enter(void)
417{
418 current->rcu_read_lock_nesting++;
419}
420
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800421static int rcu_preempt_read_exit(void)
Lai Jiangshan77339e62019-11-15 14:08:53 -0800422{
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800423 return --current->rcu_read_lock_nesting;
Lai Jiangshan77339e62019-11-15 14:08:53 -0800424}
425
426static void rcu_preempt_depth_set(int val)
427{
428 current->rcu_read_lock_nesting = val;
429}
430
Paul E. McKenneyb668c9c2009-11-22 08:53:48 -0800431/*
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700432 * Preemptible RCU implementation for rcu_read_lock().
433 * Just increment ->rcu_read_lock_nesting, shared state will be updated
434 * if we block.
435 */
436void __rcu_read_lock(void)
437{
Lai Jiangshan77339e62019-11-15 14:08:53 -0800438 rcu_preempt_read_enter();
Paul E. McKenney5f1a6ef2018-10-29 07:36:50 -0700439 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
Lai Jiangshan77339e62019-11-15 14:08:53 -0800440 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
Paul E. McKenneyf19920e2020-08-06 09:40:18 -0700441 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
442 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700443 barrier(); /* critical section after entry code. */
444}
445EXPORT_SYMBOL_GPL(__rcu_read_lock);
446
447/*
448 * Preemptible RCU implementation for rcu_read_unlock().
449 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
450 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
451 * invoke rcu_read_unlock_special() to clean up after a context switch
452 * in an RCU read-side critical section and other special cases.
453 */
454void __rcu_read_unlock(void)
455{
456 struct task_struct *t = current;
457
Paul E. McKenney7e937222021-02-26 11:25:29 -0800458 barrier(); // critical section before exit code.
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800459 if (rcu_preempt_read_exit() == 0) {
Paul E. McKenney7e937222021-02-26 11:25:29 -0800460 barrier(); // critical-section exit before .s check.
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700461 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
462 rcu_read_unlock_special(t);
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700463 }
Paul E. McKenney5f1a6ef2018-10-29 07:36:50 -0700464 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
Lai Jiangshan77339e62019-11-15 14:08:53 -0800465 int rrln = rcu_preempt_depth();
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700466
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800467 WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700468 }
Paul E. McKenney0e5da222018-03-19 08:05:04 -0700469}
470EXPORT_SYMBOL_GPL(__rcu_read_unlock);
471
472/*
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800473 * Advance a ->blkd_tasks-list pointer to the next entry, instead
474 * returning NULL if at the end of the list.
475 */
476static struct list_head *rcu_next_node_entry(struct task_struct *t,
477 struct rcu_node *rnp)
478{
479 struct list_head *np;
480
481 np = t->rcu_node_entry.next;
482 if (np == &rnp->blkd_tasks)
483 np = NULL;
484 return np;
485}
486
487/*
Paul E. McKenney8af3a5e2014-10-31 11:22:37 -0700488 * Return true if the specified rcu_node structure has tasks that were
489 * preempted within an RCU read-side critical section.
490 */
491static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
492{
493 return !list_empty(&rnp->blkd_tasks);
494}
495
496/*
Paul E. McKenney3e310092018-06-21 12:50:01 -0700497 * Report deferred quiescent states. The deferral time can
498 * be quite short, for example, in the case of the call from
499 * rcu_read_unlock_special().
Paul E. McKenneyb668c9c2009-11-22 08:53:48 -0800500 */
Paul E. McKenney3e310092018-06-21 12:50:01 -0700501static void
502rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700503{
Paul E. McKenneyb6a932d2014-10-31 12:05:04 -0700504 bool empty_exp;
505 bool empty_norm;
506 bool empty_exp_now;
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800507 struct list_head *np;
Paul E. McKenneyabaa93d2014-06-12 13:30:25 -0700508 bool drop_boost_mutex = false;
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700509 struct rcu_data *rdp;
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700510 struct rcu_node *rnp;
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700511 union rcu_special special;
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700512
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700513 /*
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700514 * If RCU core is waiting for this CPU to exit its critical section,
515 * report the fact that it has exited. Because irqs are disabled,
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700516 * t->rcu_read_unlock_special cannot change.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700517 */
518 special = t->rcu_read_unlock_special;
Paul E. McKenneyda1df502018-07-03 15:37:16 -0700519 rdp = this_cpu_ptr(&rcu_data);
Paul E. McKenney1bb33642019-03-27 15:51:25 -0700520 if (!special.s && !rdp->exp_deferred_qs) {
Paul E. McKenney3e310092018-06-21 12:50:01 -0700521 local_irq_restore(flags);
522 return;
523 }
Lai Jiangshan3717e1e2019-11-01 05:06:21 -0700524 t->rcu_read_unlock_special.s = 0;
Paul E. McKenney44bad5b2020-08-06 15:12:50 -0700525 if (special.b.need_qs) {
Paul E. McKenney3d29aaf2020-08-07 13:44:10 -0700526 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
Paul E. McKenneycfeac392020-08-20 11:26:14 -0700527 rcu_report_qs_rdp(rdp);
Paul E. McKenney3d29aaf2020-08-07 13:44:10 -0700528 udelay(rcu_unlock_delay);
529 } else {
Paul E. McKenney44bad5b2020-08-06 15:12:50 -0700530 rcu_qs();
Paul E. McKenney3d29aaf2020-08-07 13:44:10 -0700531 }
Paul E. McKenney44bad5b2020-08-06 15:12:50 -0700532 }
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700533
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700534 /*
Paul E. McKenney3e310092018-06-21 12:50:01 -0700535 * Respond to a request by an expedited grace period for a
536 * quiescent state from this CPU. Note that requests from
537 * tasks are handled when removing the task from the
538 * blocked-tasks list below.
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700539 */
Lai Jiangshan3717e1e2019-11-01 05:06:21 -0700540 if (rdp->exp_deferred_qs)
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700541 rcu_report_exp_rdp(rdp);
Paul E. McKenney8203d6d2015-08-02 13:53:17 -0700542
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700543 /* Clean up if blocked during RCU read-side critical section. */
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700544 if (special.b.blocked) {
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700545
Paul E. McKenneydd5d19b2009-08-27 14:58:16 -0700546 /*
Paul E. McKenney0a0ba1c2015-03-08 14:20:30 -0700547 * Remove this task from the list it blocked on. The task
Paul E. McKenney8ba91532015-09-29 07:55:41 -0700548 * now remains queued on the rcu_node corresponding to the
549 * CPU it first blocked on, so there is no longer any need
550 * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia.
Paul E. McKenneydd5d19b2009-08-27 14:58:16 -0700551 */
Paul E. McKenney8ba91532015-09-29 07:55:41 -0700552 rnp = t->rcu_blocked_node;
553 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
554 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
Paul E. McKenney5b4c11d2018-04-13 17:11:44 -0700555 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
Paul E. McKenney74e871ac2014-10-30 21:08:53 -0700556 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
Paul E. McKenneyd43a5d32018-04-28 18:50:06 -0700557 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800558 (!empty_norm || rnp->qsmask));
Paul E. McKenney6c7d7db2019-11-27 13:59:37 -0800559 empty_exp = sync_rcu_exp_done(rnp);
Paul E. McKenneyd9a3da02009-12-02 12:10:15 -0800560 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800561 np = rcu_next_node_entry(t, rnp);
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700562 list_del_init(&t->rcu_node_entry);
Paul E. McKenney82e78d82011-08-04 07:55:34 -0700563 t->rcu_blocked_node = NULL;
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -0400564 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
Paul E. McKenney865aa1e2018-05-01 13:35:20 -0700565 rnp->gp_seq, t->pid);
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800566 if (&t->rcu_node_entry == rnp->gp_tasks)
Eric Dumazet6935c392019-10-09 14:21:54 -0700567 WRITE_ONCE(rnp->gp_tasks, np);
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800568 if (&t->rcu_node_entry == rnp->exp_tasks)
Paul E. McKenney314eeb42020-01-03 14:18:12 -0800569 WRITE_ONCE(rnp->exp_tasks, np);
Paul E. McKenney727b7052015-03-03 14:49:26 -0800570 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
Paul E. McKenney727b7052015-03-03 14:49:26 -0800571 /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
572 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
Paul E. McKenney2dee9402017-07-11 21:52:31 -0700573 if (&t->rcu_node_entry == rnp->boost_tasks)
Paul E. McKenney5822b812020-01-04 10:44:41 -0800574 WRITE_ONCE(rnp->boost_tasks, np);
Paul E. McKenney727b7052015-03-03 14:49:26 -0800575 }
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700576
577 /*
578 * If this was the last task on the current list, and if
579 * we aren't waiting on any CPUs, report the quiescent state.
Paul E. McKenney389abd42011-09-21 14:41:37 -0700580 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
581 * so we must take a snapshot of the expedited state.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700582 */
Paul E. McKenney6c7d7db2019-11-27 13:59:37 -0800583 empty_exp_now = sync_rcu_exp_done(rnp);
Paul E. McKenney74e871ac2014-10-30 21:08:53 -0700584 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -0400585 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
Paul E. McKenneydb023292018-05-01 13:35:20 -0700586 rnp->gp_seq,
Paul E. McKenneyd4c08f22011-06-25 06:36:56 -0700587 0, rnp->qsmask,
588 rnp->level,
589 rnp->grplo,
590 rnp->grphi,
591 !!rnp->gp_tasks);
Paul E. McKenney139ad4d2018-07-03 17:22:34 -0700592 rcu_report_unblock_qs_rnp(rnp, flags);
Paul E. McKenneyc701d5d2012-06-28 08:08:25 -0700593 } else {
Boqun Feng67c583a72015-12-29 12:18:47 +0800594 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenneyc701d5d2012-06-28 08:08:25 -0700595 }
Paul E. McKenneyd9a3da02009-12-02 12:10:15 -0800596
Paul E. McKenney27f4d282011-02-07 12:47:15 -0800597 /* Unboost if we were boosted. */
Paul E. McKenney727b7052015-03-03 14:49:26 -0800598 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
Paul E. McKenney02a7c232017-09-19 15:36:42 -0700599 rt_mutex_futex_unlock(&rnp->boost_mtx);
Paul E. McKenney27f4d282011-02-07 12:47:15 -0800600
Paul E. McKenneyd9a3da02009-12-02 12:10:15 -0800601 /*
602 * If this was the last task on the expedited lists,
603 * then we need to report up the rcu_node hierarchy.
604 */
Paul E. McKenney389abd42011-09-21 14:41:37 -0700605 if (!empty_exp && empty_exp_now)
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700606 rcu_report_exp_rnp(rnp, true);
Paul E. McKenneyb668c9c2009-11-22 08:53:48 -0800607 } else {
608 local_irq_restore(flags);
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700609 }
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700610}
611
Paul E. McKenney1ed509a2010-02-22 17:05:05 -0800612/*
Paul E. McKenney3e310092018-06-21 12:50:01 -0700613 * Is a deferred quiescent-state pending, and are we also not in
614 * an RCU read-side critical section? It is the caller's responsibility
615 * to ensure it is otherwise safe to report any deferred quiescent
616 * states. The reason for this is that it is safe to report a
617 * quiescent state during context switch even though preemption
618 * is disabled. This function cannot be expected to understand these
619 * nuances, so the caller must handle them.
620 */
621static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
622{
Paul E. McKenney1bb33642019-03-27 15:51:25 -0700623 return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
Paul E. McKenney3e310092018-06-21 12:50:01 -0700624 READ_ONCE(t->rcu_read_unlock_special.s)) &&
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800625 rcu_preempt_depth() == 0;
Paul E. McKenney3e310092018-06-21 12:50:01 -0700626}
627
628/*
629 * Report a deferred quiescent state if needed and safe to do so.
630 * As with rcu_preempt_need_deferred_qs(), "safe" involves only
631 * not being in an RCU read-side critical section. The caller must
632 * evaluate safety in terms of interrupt, softirq, and preemption
633 * disabling.
634 */
635static void rcu_preempt_deferred_qs(struct task_struct *t)
636{
637 unsigned long flags;
Paul E. McKenney3e310092018-06-21 12:50:01 -0700638
639 if (!rcu_preempt_need_deferred_qs(t))
640 return;
Paul E. McKenney3e310092018-06-21 12:50:01 -0700641 local_irq_save(flags);
642 rcu_preempt_deferred_qs_irqrestore(t, flags);
Paul E. McKenney3e310092018-06-21 12:50:01 -0700643}
644
645/*
Paul E. McKenney0864f052019-04-04 12:19:25 -0700646 * Minimal handler to give the scheduler a chance to re-evaluate.
647 */
648static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
649{
650 struct rcu_data *rdp;
651
652 rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
653 rdp->defer_qs_iw_pending = false;
654}
655
656/*
Paul E. McKenney3e310092018-06-21 12:50:01 -0700657 * Handle special cases during rcu_read_unlock(), such as needing to
658 * notify RCU core processing or task having blocked during the RCU
659 * read-side critical section.
660 */
661static void rcu_read_unlock_special(struct task_struct *t)
662{
663 unsigned long flags;
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800664 bool irqs_were_disabled;
Paul E. McKenney3e310092018-06-21 12:50:01 -0700665 bool preempt_bh_were_disabled =
666 !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
Paul E. McKenney3e310092018-06-21 12:50:01 -0700667
668 /* NMI handlers cannot block and cannot safely manipulate state. */
669 if (in_nmi())
670 return;
671
672 local_irq_save(flags);
673 irqs_were_disabled = irqs_disabled_flags(flags);
Paul E. McKenney05f41572018-10-16 04:12:58 -0700674 if (preempt_bh_were_disabled || irqs_were_disabled) {
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800675 bool expboost; // Expedited GP in flight or possible boosting.
Paul E. McKenney25102de2019-04-01 14:12:50 -0700676 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
677 struct rcu_node *rnp = rdp->mynode;
678
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800679 expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
680 (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
Paul E. McKenney7308e022021-01-27 13:57:16 -0800681 IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800682 (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
683 t->rcu_blocked_node);
Paul E. McKenney23634eb2019-03-24 15:25:51 -0700684 // Need to defer quiescent state until everything is enabled.
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800685 if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
Paul E. McKenneye4453d82020-02-15 14:18:09 -0800686 // Using softirq, safe to awaken, and either the
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800687 // wakeup is free or there is either an expedited
688 // GP in flight or a potential need to deboost.
Paul E. McKenney05f41572018-10-16 04:12:58 -0700689 raise_softirq_irqoff(RCU_SOFTIRQ);
690 } else {
Paul E. McKenney23634eb2019-03-24 15:25:51 -0700691 // Enabling BH or preempt does reschedule, so...
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800692 // Also if no expediting and no possible deboosting,
693 // slow is OK. Plus nohz_full CPUs eventually get
694 // tick enabled.
Paul E. McKenney05f41572018-10-16 04:12:58 -0700695 set_tsk_need_resched(current);
696 set_preempt_need_resched();
Paul E. McKenneyd143b3d2019-06-22 12:05:54 -0700697 if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800698 expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
Paul E. McKenney0864f052019-04-04 12:19:25 -0700699 // Get scheduler to re-evaluate and call hooks.
700 // If !IRQ_WORK, FQS scan will eventually IPI.
Paul E. McKenney39bbfc62021-01-14 10:39:31 -0800701 init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler);
Paul E. McKenney0864f052019-04-04 12:19:25 -0700702 rdp->defer_qs_iw_pending = true;
703 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
704 }
Paul E. McKenney05f41572018-10-16 04:12:58 -0700705 }
Paul E. McKenney3e310092018-06-21 12:50:01 -0700706 local_irq_restore(flags);
707 return;
708 }
709 rcu_preempt_deferred_qs_irqrestore(t, flags);
710}
711
712/*
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700713 * Check that the list of blocked tasks for the newly completed grace
714 * period is in fact empty. It is a serious bug to complete a grace
715 * period that still has RCU readers blocked! This function must be
Paul E. McKenney03bd2982019-10-10 09:05:27 -0700716 * invoked -before- updating this rnp's ->gp_seq.
Paul E. McKenney12f5f522010-11-29 21:56:39 -0800717 *
718 * Also, if there are blocked tasks on the list, they automatically
719 * block the newly created grace period, so set up ->gp_tasks accordingly.
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700720 */
Paul E. McKenney81ab59a2018-07-03 17:22:34 -0700721static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700722{
Paul E. McKenneyc5ebe662017-06-19 10:32:23 -0700723 struct task_struct *t;
724
Paul E. McKenneyea9b0c82017-04-28 13:19:28 -0700725 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
Paul E. McKenney03bd2982019-10-10 09:05:27 -0700726 raw_lockdep_assert_held_rcu_node(rnp);
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800727 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
Paul E. McKenney81ab59a2018-07-03 17:22:34 -0700728 dump_blkd_tasks(rnp, 10);
Paul E. McKenney0b107d22018-05-08 16:18:28 -0700729 if (rcu_preempt_has_tasks(rnp) &&
730 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
Eric Dumazet6935c392019-10-09 14:21:54 -0700731 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
Paul E. McKenneyc5ebe662017-06-19 10:32:23 -0700732 t = container_of(rnp->gp_tasks, struct task_struct,
733 rcu_node_entry);
734 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
Paul E. McKenney865aa1e2018-05-01 13:35:20 -0700735 rnp->gp_seq, t->pid);
Paul E. McKenneyc5ebe662017-06-19 10:32:23 -0700736 }
Paul E. McKenney28ecd582009-09-18 09:50:17 -0700737 WARN_ON_ONCE(rnp->qsmask);
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700738}
739
Paul E. McKenneydd5d19b2009-08-27 14:58:16 -0700740/*
Paul E. McKenneyc98cac62018-11-21 11:35:03 -0800741 * Check for a quiescent state from the current CPU, including voluntary
742 * context switches for Tasks RCU. When a task blocks, the task is
743 * recorded in the corresponding CPU's rcu_node structure, which is checked
744 * elsewhere, hence this function need only check for quiescent states
745 * related to the current CPU, not to those related to tasks.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700746 */
Paul E. McKenneyc98cac62018-11-21 11:35:03 -0800747static void rcu_flavor_sched_clock_irq(int user)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700748{
749 struct task_struct *t = current;
750
Paul E. McKenneya649d252020-11-19 10:13:06 -0800751 lockdep_assert_irqs_disabled();
Paul E. McKenney45975c72018-07-02 14:30:37 -0700752 if (user || rcu_is_cpu_rrupt_from_idle()) {
753 rcu_note_voluntary_context_switch(current);
754 }
Lai Jiangshan77339e62019-11-15 14:08:53 -0800755 if (rcu_preempt_depth() > 0 ||
Paul E. McKenney3e310092018-06-21 12:50:01 -0700756 (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
757 /* No QS, force context switch if deferred. */
Paul E. McKenneyfced9c82018-07-26 13:44:00 -0700758 if (rcu_preempt_need_deferred_qs(t)) {
759 set_tsk_need_resched(t);
760 set_preempt_need_resched();
761 }
Paul E. McKenney3e310092018-06-21 12:50:01 -0700762 } else if (rcu_preempt_need_deferred_qs(t)) {
763 rcu_preempt_deferred_qs(t); /* Report deferred QS. */
764 return;
Lai Jiangshan5f5fa7e2020-02-15 15:23:26 -0800765 } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
Paul E. McKenney45975c72018-07-02 14:30:37 -0700766 rcu_qs(); /* Report immediate QS. */
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700767 return;
768 }
Paul E. McKenney3e310092018-06-21 12:50:01 -0700769
770 /* If GP is oldish, ask for help from rcu_read_unlock_special(). */
Lai Jiangshan77339e62019-11-15 14:08:53 -0800771 if (rcu_preempt_depth() > 0 &&
Paul E. McKenney2280ee52018-07-03 15:54:39 -0700772 __this_cpu_read(rcu_data.core_needs_qs) &&
773 __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
Paul E. McKenney15651202018-05-16 14:41:41 -0700774 !t->rcu_read_unlock_special.b.need_qs &&
Paul E. McKenney564a9ae2018-07-04 14:52:04 -0700775 time_after(jiffies, rcu_state.gp_start + HZ))
Paul E. McKenney1d082fd2014-08-14 16:01:53 -0700776 t->rcu_read_unlock_special.b.need_qs = true;
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700777}
778
Paul E. McKenney2439b692013-04-11 10:15:52 -0700779/*
780 * Check for a task exiting while in a preemptible-RCU read-side
Paul E. McKenney884157c2019-02-11 07:21:29 -0800781 * critical section, clean up if so. No need to issue warnings, as
782 * debug_check_no_locks_held() already does this if lockdep is enabled.
783 * Besides, if this function does anything other than just immediately
784 * return, there was a bug of some sort. Spewing warnings from this
785 * function is like as not to simply obscure important prior warnings.
Paul E. McKenney2439b692013-04-11 10:15:52 -0700786 */
787void exit_rcu(void)
788{
789 struct task_struct *t = current;
790
Paul E. McKenney884157c2019-02-11 07:21:29 -0800791 if (unlikely(!list_empty(&current->rcu_node_entry))) {
Lai Jiangshan77339e62019-11-15 14:08:53 -0800792 rcu_preempt_depth_set(1);
Paul E. McKenney884157c2019-02-11 07:21:29 -0800793 barrier();
Paul E. McKenneyadd0d372019-03-26 10:22:22 -0700794 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
Lai Jiangshan77339e62019-11-15 14:08:53 -0800795 } else if (unlikely(rcu_preempt_depth())) {
796 rcu_preempt_depth_set(1);
Paul E. McKenney884157c2019-02-11 07:21:29 -0800797 } else {
Paul E. McKenney2439b692013-04-11 10:15:52 -0700798 return;
Paul E. McKenney884157c2019-02-11 07:21:29 -0800799 }
Paul E. McKenney2439b692013-04-11 10:15:52 -0700800 __rcu_read_unlock();
Paul E. McKenney3e310092018-06-21 12:50:01 -0700801 rcu_preempt_deferred_qs(current);
Paul E. McKenney2439b692013-04-11 10:15:52 -0700802}
803
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800804/*
805 * Dump the blocked-tasks state, but limit the list dump to the
806 * specified number of elements.
807 */
Paul E. McKenney57738942018-05-08 14:18:57 -0700808static void
Paul E. McKenney81ab59a2018-07-03 17:22:34 -0700809dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800810{
Paul E. McKenney57738942018-05-08 14:18:57 -0700811 int cpu;
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800812 int i;
813 struct list_head *lhp;
Paul E. McKenney57738942018-05-08 14:18:57 -0700814 bool onl;
815 struct rcu_data *rdp;
Paul E. McKenneyff3cee32018-05-08 12:50:14 -0700816 struct rcu_node *rnp1;
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800817
Boqun Fengce11fae2018-03-09 09:32:18 +0800818 raw_lockdep_assert_held_rcu_node(rnp);
Paul E. McKenneyff3cee32018-05-08 12:50:14 -0700819 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
Paul E. McKenney77cfc7b2018-05-01 15:00:10 -0700820 __func__, rnp->grplo, rnp->grphi, rnp->level,
Paul E. McKenney8ff372902020-01-04 11:33:17 -0800821 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
Paul E. McKenneyff3cee32018-05-08 12:50:14 -0700822 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
823 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
824 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
Paul E. McKenney77cfc7b2018-05-01 15:00:10 -0700825 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
Paul E. McKenney065a6db2020-01-03 15:22:01 -0800826 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
Paul E. McKenney314eeb42020-01-03 14:18:12 -0800827 READ_ONCE(rnp->exp_tasks));
Paul E. McKenney77cfc7b2018-05-01 15:00:10 -0700828 pr_info("%s: ->blkd_tasks", __func__);
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800829 i = 0;
830 list_for_each(lhp, &rnp->blkd_tasks) {
831 pr_cont(" %p", lhp);
Neeraj Upadhyaycd6d17b2019-03-29 15:25:52 +0530832 if (++i >= ncheck)
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800833 break;
834 }
835 pr_cont("\n");
Paul E. McKenney57738942018-05-08 14:18:57 -0700836 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
Paul E. McKenneyda1df502018-07-03 15:37:16 -0700837 rdp = per_cpu_ptr(&rcu_data, cpu);
Paul E. McKenney57738942018-05-08 14:18:57 -0700838 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
839 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
840 cpu, ".o"[onl],
841 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
842 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
843 }
Paul E. McKenney4bc8d552017-11-27 15:13:56 -0800844}
845
Pranith Kumar28f65692014-09-22 14:00:48 -0400846#else /* #ifdef CONFIG_PREEMPT_RCU */
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700847
848/*
Paul E. McKenneyaa40c132020-08-10 09:58:03 -0700849 * If strict grace periods are enabled, and if the calling
850 * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
851 * report that quiescent state and, if requested, spin for a bit.
852 */
853void rcu_read_unlock_strict(void)
854{
855 struct rcu_data *rdp;
856
857 if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
858 irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
859 return;
860 rdp = this_cpu_ptr(&rcu_data);
Paul E. McKenneycfeac392020-08-20 11:26:14 -0700861 rcu_report_qs_rdp(rdp);
Paul E. McKenneyaa40c132020-08-10 09:58:03 -0700862 udelay(rcu_unlock_delay);
863}
864EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
865
866/*
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700867 * Tell them what RCU they are running.
868 */
Paul E. McKenney0e0fc1c2009-11-11 11:28:06 -0800869static void __init rcu_bootup_announce(void)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700870{
Paul E. McKenneyefc151c2013-03-18 16:24:11 -0700871 pr_info("Hierarchical RCU implementation.\n");
Paul E. McKenney26845c22010-04-13 14:19:23 -0700872 rcu_bootup_announce_oddness();
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700873}
874
Paul E. McKenney45975c72018-07-02 14:30:37 -0700875/*
Sebastian Andrzej Siewior90326f02019-10-15 21:18:14 +0200876 * Note a quiescent state for PREEMPTION=n. Because we do not need to know
Paul E. McKenney45975c72018-07-02 14:30:37 -0700877 * how many quiescent states passed, just if there was at least one since
878 * the start of the grace period, this just sets a flag. The caller must
879 * have disabled preemption.
880 */
881static void rcu_qs(void)
Paul E. McKenneyd28139c2018-06-28 14:45:25 -0700882{
Paul E. McKenney45975c72018-07-02 14:30:37 -0700883 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
884 if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
885 return;
886 trace_rcu_grace_period(TPS("rcu_sched"),
887 __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
888 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
889 if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
890 return;
891 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
Paul E. McKenney63d4c8c2018-07-03 17:22:34 -0700892 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
Paul E. McKenneyd28139c2018-06-28 14:45:25 -0700893}
894
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700895/*
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700896 * Register an urgently needed quiescent state. If there is an
897 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
898 * dyntick-idle quiescent state visible to other CPUs, which will in
899 * some cases serve for expedited as well as normal grace periods.
900 * Either way, register a lightweight quiescent state.
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700901 */
902void rcu_all_qs(void)
903{
904 unsigned long flags;
905
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700906 if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700907 return;
908 preempt_disable();
909 /* Load rcu_urgent_qs before other flags. */
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700910 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700911 preempt_enable();
912 return;
913 }
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700914 this_cpu_write(rcu_data.rcu_urgent_qs, false);
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700915 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700916 local_irq_save(flags);
917 rcu_momentary_dyntick_idle();
918 local_irq_restore(flags);
919 }
Paul E. McKenney7e28c5a2018-07-11 08:09:28 -0700920 rcu_qs();
Paul E. McKenney395a2f02018-07-10 14:00:14 -0700921 preempt_enable();
922}
923EXPORT_SYMBOL_GPL(rcu_all_qs);
924
925/*
Sebastian Andrzej Siewior90326f02019-10-15 21:18:14 +0200926 * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
Paul E. McKenneycba6d0d2012-07-02 07:08:42 -0700927 */
Paul E. McKenney45975c72018-07-02 14:30:37 -0700928void rcu_note_context_switch(bool preempt)
Paul E. McKenneycba6d0d2012-07-02 07:08:42 -0700929{
Paul E. McKenney45975c72018-07-02 14:30:37 -0700930 trace_rcu_utilization(TPS("Start context switch"));
931 rcu_qs();
932 /* Load rcu_urgent_qs before other flags. */
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700933 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
Paul E. McKenney45975c72018-07-02 14:30:37 -0700934 goto out;
Paul E. McKenney2dba13f2018-08-03 21:00:38 -0700935 this_cpu_write(rcu_data.rcu_urgent_qs, false);
936 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
Paul E. McKenney45975c72018-07-02 14:30:37 -0700937 rcu_momentary_dyntick_idle();
Paul E. McKenney43766c32020-03-16 20:38:29 -0700938 rcu_tasks_qs(current, preempt);
Paul E. McKenney45975c72018-07-02 14:30:37 -0700939out:
940 trace_rcu_utilization(TPS("End context switch"));
Paul E. McKenneycba6d0d2012-07-02 07:08:42 -0700941}
Paul E. McKenney45975c72018-07-02 14:30:37 -0700942EXPORT_SYMBOL_GPL(rcu_note_context_switch);
Paul E. McKenneycba6d0d2012-07-02 07:08:42 -0700943
944/*
Paul E. McKenney6cc68792011-03-02 13:15:15 -0800945 * Because preemptible RCU does not exist, there are never any preempted
Paul E. McKenneyfc2219d42009-09-23 09:50:41 -0700946 * RCU readers.
947 */
Paul E. McKenney27f4d282011-02-07 12:47:15 -0800948static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
Paul E. McKenneyfc2219d42009-09-23 09:50:41 -0700949{
950 return 0;
951}
952
Paul E. McKenney8af3a5e2014-10-31 11:22:37 -0700953/*
954 * Because there is no preemptible RCU, there can be no readers blocked.
955 */
956static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
Paul E. McKenneyb668c9c2009-11-22 08:53:48 -0800957{
Paul E. McKenney8af3a5e2014-10-31 11:22:37 -0700958 return false;
Paul E. McKenneyb668c9c2009-11-22 08:53:48 -0800959}
960
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700961/*
Paul E. McKenney3e310092018-06-21 12:50:01 -0700962 * Because there is no preemptible RCU, there can be no deferred quiescent
963 * states.
964 */
965static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
966{
967 return false;
968}
969static void rcu_preempt_deferred_qs(struct task_struct *t) { }
970
971/*
Paul E. McKenney6cc68792011-03-02 13:15:15 -0800972 * Because there is no preemptible RCU, there can be no readers blocked,
Paul E. McKenney49e29122009-09-18 09:50:19 -0700973 * so there is no need to check for blocked tasks. So check only for
974 * bogus qsmask values.
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700975 */
Paul E. McKenney81ab59a2018-07-03 17:22:34 -0700976static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700977{
Paul E. McKenney49e29122009-09-18 09:50:19 -0700978 WARN_ON_ONCE(rnp->qsmask);
Paul E. McKenneyb0e165c2009-09-13 09:15:09 -0700979}
980
Paul E. McKenneydd5d19b2009-08-27 14:58:16 -0700981/*
Paul E. McKenneyc98cac62018-11-21 11:35:03 -0800982 * Check to see if this CPU is in a non-context-switch quiescent state,
983 * namely user mode and idle loop.
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700984 */
Paul E. McKenneyc98cac62018-11-21 11:35:03 -0800985static void rcu_flavor_sched_clock_irq(int user)
Paul E. McKenneyf41d9112009-08-22 13:56:52 -0700986{
Paul E. McKenney45975c72018-07-02 14:30:37 -0700987 if (user || rcu_is_cpu_rrupt_from_idle()) {
988
989 /*
990 * Get here if this CPU took its interrupt from user
991 * mode or from the idle loop, and if this is not a
992 * nested interrupt. In this case, the CPU is in
993 * a quiescent state, so note it.
994 *
995 * No memory barrier is required here because rcu_qs()
996 * references only CPU-local variables that other CPUs
997 * neither access nor modify, at least not while the
998 * corresponding CPU is online.
999 */
1000
1001 rcu_qs();
1002 }
Paul E. McKenneyf41d9112009-08-22 13:56:52 -07001003}
1004
Paul E. McKenney2439b692013-04-11 10:15:52 -07001005/*
1006 * Because preemptible RCU does not exist, tasks cannot possibly exit
1007 * while in preemptible RCU read-side critical sections.
1008 */
1009void exit_rcu(void)
1010{
1011}
1012
Paul E. McKenney4bc8d552017-11-27 15:13:56 -08001013/*
1014 * Dump the guaranteed-empty blocked-tasks state. Trust but verify.
1015 */
Paul E. McKenney57738942018-05-08 14:18:57 -07001016static void
Paul E. McKenney81ab59a2018-07-03 17:22:34 -07001017dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
Paul E. McKenney4bc8d552017-11-27 15:13:56 -08001018{
1019 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1020}
1021
Pranith Kumar28f65692014-09-22 14:00:48 -04001022#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001023
Sebastian Andrzej Siewior48d07c02019-03-20 22:13:33 +01001024/*
1025 * If boosting, set rcuc kthreads to realtime priority.
1026 */
1027static void rcu_cpu_kthread_setup(unsigned int cpu)
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001028{
Sebastian Andrzej Siewior48d07c02019-03-20 22:13:33 +01001029#ifdef CONFIG_RCU_BOOST
1030 struct sched_param sp;
1031
1032 sp.sched_priority = kthread_prio;
1033 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1034#endif /* #ifdef CONFIG_RCU_BOOST */
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001035}
1036
Sebastian Andrzej Siewior48d07c02019-03-20 22:13:33 +01001037#ifdef CONFIG_RCU_BOOST
1038
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001039/*
1040 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1041 * or ->boost_tasks, advancing the pointer to the next task in the
1042 * ->blkd_tasks list.
1043 *
1044 * Note that irqs must be enabled: boosting the task can block.
1045 * Returns 1 if there are more tasks needing to be boosted.
1046 */
1047static int rcu_boost(struct rcu_node *rnp)
1048{
1049 unsigned long flags;
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001050 struct task_struct *t;
1051 struct list_head *tb;
1052
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08001053 if (READ_ONCE(rnp->exp_tasks) == NULL &&
1054 READ_ONCE(rnp->boost_tasks) == NULL)
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001055 return 0; /* Nothing left to boost. */
1056
Peter Zijlstra2a67e742015-10-08 12:24:23 +02001057 raw_spin_lock_irqsave_rcu_node(rnp, flags);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001058
1059 /*
1060 * Recheck under the lock: all tasks in need of boosting
1061 * might exit their RCU read-side critical sections on their own.
1062 */
1063 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
Boqun Feng67c583a72015-12-29 12:18:47 +08001064 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001065 return 0;
1066 }
1067
1068 /*
1069 * Preferentially boost tasks blocking expedited grace periods.
1070 * This cannot starve the normal grace periods because a second
1071 * expedited grace period must boost all blocked tasks, including
1072 * those blocking the pre-existing normal grace period.
1073 */
Paul E. McKenneybec06782018-01-10 12:16:42 -08001074 if (rnp->exp_tasks != NULL)
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001075 tb = rnp->exp_tasks;
Paul E. McKenneybec06782018-01-10 12:16:42 -08001076 else
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001077 tb = rnp->boost_tasks;
1078
1079 /*
1080 * We boost task t by manufacturing an rt_mutex that appears to
1081 * be held by task t. We leave a pointer to that rt_mutex where
1082 * task t can find it, and task t will release the mutex when it
1083 * exits its outermost RCU read-side critical section. Then
1084 * simply acquiring this artificial rt_mutex will boost task
1085 * t's priority. (Thanks to tglx for suggesting this approach!)
1086 *
1087 * Note that task t must acquire rnp->lock to remove itself from
1088 * the ->blkd_tasks list, which it will do from exit() if from
1089 * nowhere else. We therefore are guaranteed that task t will
1090 * stay around at least until we drop rnp->lock. Note that
1091 * rnp->lock also resolves races between our priority boosting
1092 * and task t's exiting its outermost RCU read-side critical
1093 * section.
1094 */
1095 t = container_of(tb, struct task_struct, rcu_node_entry);
Paul E. McKenneyabaa93d2014-06-12 13:30:25 -07001096 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
Boqun Feng67c583a72015-12-29 12:18:47 +08001097 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenneyabaa93d2014-06-12 13:30:25 -07001098 /* Lock only for side effect: boosts task t's priority. */
1099 rt_mutex_lock(&rnp->boost_mtx);
1100 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001101
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08001102 return READ_ONCE(rnp->exp_tasks) != NULL ||
1103 READ_ONCE(rnp->boost_tasks) != NULL;
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001104}
1105
1106/*
Paul E. McKenneybc17ea12015-06-06 08:11:43 -07001107 * Priority-boosting kthread, one per leaf rcu_node.
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001108 */
1109static int rcu_boost_kthread(void *arg)
1110{
1111 struct rcu_node *rnp = (struct rcu_node *)arg;
1112 int spincnt = 0;
1113 int more2boost;
1114
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001115 trace_rcu_utilization(TPS("Start boost kthread@init"));
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001116 for (;;) {
Paul E. McKenney3ca3b0e2020-01-08 20:12:59 -08001117 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001118 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
Paul E. McKenney065a6db2020-01-03 15:22:01 -08001119 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1120 READ_ONCE(rnp->exp_tasks));
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001121 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
Paul E. McKenney3ca3b0e2020-01-08 20:12:59 -08001122 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001123 more2boost = rcu_boost(rnp);
1124 if (more2boost)
1125 spincnt++;
1126 else
1127 spincnt = 0;
1128 if (spincnt > 10) {
Paul E. McKenney3ca3b0e2020-01-08 20:12:59 -08001129 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001130 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
Paul E. McKenneya9352f72020-05-07 16:34:38 -07001131 schedule_timeout_idle(2);
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001132 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001133 spincnt = 0;
1134 }
1135 }
Paul E. McKenney1217ed12011-05-04 21:43:49 -07001136 /* NOTREACHED */
Steven Rostedt (Red Hat)f7f7bac2013-07-12 17:18:47 -04001137 trace_rcu_utilization(TPS("End boost kthread@notreached"));
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001138 return 0;
1139}
1140
1141/*
1142 * Check to see if it is time to start boosting RCU readers that are
1143 * blocking the current grace period, and, if so, tell the per-rcu_node
1144 * kthread to start boosting them. If there is an expedited grace
1145 * period in progress, it is always time to boost.
1146 *
Paul E. McKenneyb065a852012-08-01 15:57:54 -07001147 * The caller must hold rnp->lock, which this function releases.
1148 * The ->boost_kthread_task is immortal, so we don't need to worry
1149 * about it going away.
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001150 */
Paul E. McKenney1217ed12011-05-04 21:43:49 -07001151static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
Pranith Kumar615e41c2014-06-11 16:39:40 -04001152 __releases(rnp->lock)
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001153{
Matthew Wilcoxa32e01e2018-01-17 06:24:30 -08001154 raw_lockdep_assert_held_rcu_node(rnp);
Paul E. McKenney0ea1f2e2011-02-22 13:42:43 -08001155 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
Boqun Feng67c583a72015-12-29 12:18:47 +08001156 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001157 return;
Paul E. McKenney0ea1f2e2011-02-22 13:42:43 -08001158 }
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001159 if (rnp->exp_tasks != NULL ||
1160 (rnp->gp_tasks != NULL &&
1161 rnp->boost_tasks == NULL &&
1162 rnp->qsmask == 0 &&
Paul E. McKenney7b241312020-04-10 15:52:53 -07001163 (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001164 if (rnp->exp_tasks == NULL)
Paul E. McKenney5822b812020-01-04 10:44:41 -08001165 WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
Boqun Feng67c583a72015-12-29 12:18:47 +08001166 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenneya2badef2019-03-21 16:29:50 -07001167 rcu_wake_cond(rnp->boost_kthread_task,
Paul E. McKenney3ca3b0e2020-01-08 20:12:59 -08001168 READ_ONCE(rnp->boost_kthread_status));
Paul E. McKenney1217ed12011-05-04 21:43:49 -07001169 } else {
Boqun Feng67c583a72015-12-29 12:18:47 +08001170 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenney1217ed12011-05-04 21:43:49 -07001171 }
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001172}
1173
Paul E. McKenney0f962a52011-04-14 12:13:53 -07001174/*
Paul E. McKenneydff16722011-11-29 15:57:13 -08001175 * Is the current CPU running the RCU-callbacks kthread?
1176 * Caller must have preemption disabled.
1177 */
1178static bool rcu_is_callbacks_kthread(void)
1179{
Paul E. McKenney37f62d72018-11-30 16:11:14 -08001180 return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
Paul E. McKenneydff16722011-11-29 15:57:13 -08001181}
1182
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001183#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1184
1185/*
1186 * Do priority-boost accounting for the start of a new grace period.
1187 */
1188static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1189{
1190 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1191}
1192
1193/*
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001194 * Create an RCU-boost kthread for the specified node if one does not
1195 * already exist. We only create this kthread for preemptible RCU.
1196 * Returns zero if all is well, a negated errno otherwise.
1197 */
Byungchul Park35458322019-07-01 09:40:39 +09001198static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001199{
Paul E. McKenney6dbfdc142018-07-03 17:22:34 -07001200 int rnp_index = rnp - rcu_get_root();
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001201 unsigned long flags;
1202 struct sched_param sp;
1203 struct task_struct *t;
1204
Paul E. McKenney6dbfdc142018-07-03 17:22:34 -07001205 if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
Byungchul Park35458322019-07-01 09:40:39 +09001206 return;
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001207
Paul E. McKenney0aa04b02015-01-23 21:52:37 -08001208 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
Byungchul Park35458322019-07-01 09:40:39 +09001209 return;
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001210
Paul E. McKenney6dbfdc142018-07-03 17:22:34 -07001211 rcu_state.boost = 1;
Byungchul Park35458322019-07-01 09:40:39 +09001212
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001213 if (rnp->boost_kthread_task != NULL)
Byungchul Park35458322019-07-01 09:40:39 +09001214 return;
1215
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001216 t = kthread_create(rcu_boost_kthread, (void *)rnp,
Mike Galbraith5b61b0b2011-08-19 11:39:11 -07001217 "rcub/%d", rnp_index);
Byungchul Park35458322019-07-01 09:40:39 +09001218 if (WARN_ON_ONCE(IS_ERR(t)))
1219 return;
1220
Peter Zijlstra2a67e742015-10-08 12:24:23 +02001221 raw_spin_lock_irqsave_rcu_node(rnp, flags);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001222 rnp->boost_kthread_task = t;
Boqun Feng67c583a72015-12-29 12:18:47 +08001223 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Clark Williams21871d72014-09-12 21:21:09 -05001224 sp.sched_priority = kthread_prio;
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001225 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
Paul E. McKenney9a432732011-05-30 20:38:55 -07001226 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001227}
1228
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001229/*
1230 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1231 * served by the rcu_node in question. The CPU hotplug lock is still
1232 * held, so the value of rnp->qsmaskinit will be stable.
1233 *
1234 * We don't include outgoingcpu in the affinity set, use -1 if there is
1235 * no outgoing CPU. If there are no CPUs left in the affinity set,
1236 * this function allows the kthread to execute on any CPU.
1237 */
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001238static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001239{
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001240 struct task_struct *t = rnp->boost_kthread_task;
Paul E. McKenney0aa04b02015-01-23 21:52:37 -08001241 unsigned long mask = rcu_rnp_online_cpus(rnp);
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001242 cpumask_var_t cm;
1243 int cpu;
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001244
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001245 if (!t)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001246 return;
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001247 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001248 return;
Mark Rutlandbc75e992016-06-03 15:20:04 +01001249 for_each_leaf_node_possible_cpu(rnp, cpu)
1250 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1251 cpu != outgoingcpu)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001252 cpumask_set_cpu(cpu, cm);
Paul E. McKenney5d0b0242014-11-10 08:07:08 -08001253 if (cpumask_weight(cm) == 0)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001254 cpumask_setall(cm);
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001255 set_cpus_allowed_ptr(t, cm);
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001256 free_cpumask_var(cm);
1257}
1258
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001259/*
Paul E. McKenney9386c0b2014-07-13 12:00:53 -07001260 * Spawn boost kthreads -- called as soon as the scheduler is running.
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001261 */
Paul E. McKenney9386c0b2014-07-13 12:00:53 -07001262static void __init rcu_spawn_boost_kthreads(void)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001263{
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001264 struct rcu_node *rnp;
1265
Paul E. McKenneyaedf4ba2018-07-04 14:33:59 -07001266 rcu_for_each_leaf_node(rnp)
Byungchul Park35458322019-07-01 09:40:39 +09001267 rcu_spawn_one_boost_kthread(rnp);
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001268}
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001269
Paul Gortmaker49fb4c62013-06-19 14:52:21 -04001270static void rcu_prepare_kthreads(int cpu)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001271{
Paul E. McKenneyda1df502018-07-03 15:37:16 -07001272 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001273 struct rcu_node *rnp = rdp->mynode;
1274
1275 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
Paul E. McKenney62ab7072012-07-16 10:42:38 +00001276 if (rcu_scheduler_fully_active)
Byungchul Park35458322019-07-01 09:40:39 +09001277 rcu_spawn_one_boost_kthread(rnp);
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001278}
1279
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001280#else /* #ifdef CONFIG_RCU_BOOST */
1281
Paul E. McKenney1217ed12011-05-04 21:43:49 -07001282static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
Pranith Kumar615e41c2014-06-11 16:39:40 -04001283 __releases(rnp->lock)
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001284{
Boqun Feng67c583a72015-12-29 12:18:47 +08001285 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001286}
1287
Paul E. McKenneydff16722011-11-29 15:57:13 -08001288static bool rcu_is_callbacks_kthread(void)
1289{
1290 return false;
1291}
1292
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001293static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1294{
1295}
1296
Thomas Gleixner5d01bbd2012-07-16 10:42:35 +00001297static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001298{
1299}
1300
Paul E. McKenney9386c0b2014-07-13 12:00:53 -07001301static void __init rcu_spawn_boost_kthreads(void)
Paul E. McKenneyb0d30412011-07-10 15:57:35 -07001302{
Paul E. McKenneyb0d30412011-07-10 15:57:35 -07001303}
Paul E. McKenneyb0d30412011-07-10 15:57:35 -07001304
Paul Gortmaker49fb4c62013-06-19 14:52:21 -04001305static void rcu_prepare_kthreads(int cpu)
Paul E. McKenneyf8b7fc62011-06-16 08:26:32 -07001306{
1307}
1308
Paul E. McKenney27f4d282011-02-07 12:47:15 -08001309#endif /* #else #ifdef CONFIG_RCU_BOOST */
1310
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001311#if !defined(CONFIG_RCU_FAST_NO_HZ)
1312
1313/*
Paul E. McKenney0bd55c62019-08-12 10:28:08 -07001314 * Check to see if any future non-offloaded RCU-related work will need
1315 * to be done by the current CPU, even if none need be done immediately,
1316 * returning 1 if so. This function is part of the RCU implementation;
1317 * it is -not- an exported member of the RCU API.
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001318 *
Paul E. McKenney0ae86a22018-07-07 18:12:26 -07001319 * Because we not have RCU_FAST_NO_HZ, just check whether or not this
1320 * CPU has RCU callbacks queued.
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001321 */
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001322int rcu_needs_cpu(u64 basemono, u64 *nextevt)
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001323{
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001324 *nextevt = KTIME_MAX;
Paul E. McKenney0bd55c62019-08-12 10:28:08 -07001325 return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001326 !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
Paul E. McKenney7cb92492011-11-28 12:28:34 -08001327}
1328
1329/*
1330 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1331 * after it.
1332 */
Paul E. McKenney8fa78452014-10-22 15:07:37 -07001333static void rcu_cleanup_after_idle(void)
Paul E. McKenney7cb92492011-11-28 12:28:34 -08001334{
1335}
1336
1337/*
Paul E. McKenneya858af22012-01-16 13:29:10 -08001338 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
Paul E. McKenneyaea1b352011-11-02 06:54:54 -07001339 * is nothing.
1340 */
Paul E. McKenney198bbf82014-10-22 15:03:43 -07001341static void rcu_prepare_for_idle(void)
Paul E. McKenneyaea1b352011-11-02 06:54:54 -07001342{
1343}
1344
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001345#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1346
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001347/*
1348 * This code is invoked when a CPU goes idle, at which point we want
1349 * to have the CPU do everything required for RCU so that it can enter
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001350 * the energy-efficient dyntick-idle mode.
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001351 *
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001352 * The following preprocessor symbol controls this:
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001353 *
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001354 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1355 * to sleep in dyntick-idle mode with RCU callbacks pending. This
1356 * is sized to be roughly one RCU grace period. Those energy-efficiency
1357 * benchmarkers who might otherwise be tempted to set this to a large
1358 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
1359 * system. And if you are -that- concerned about energy efficiency,
1360 * just power the system down and be done with it!
1361 *
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001362 * The value below works well in practice. If future workloads require
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001363 * adjustment, they can be converted into kernel config parameters, though
1364 * making the state machine smarter might be a better option.
1365 */
Paul E. McKenneye84c48a2012-06-04 20:45:10 -07001366#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
Paul E. McKenneyf23f7fa2011-11-30 15:41:14 -08001367
Paul E. McKenney5e44ce32012-12-12 12:35:29 -08001368static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1369module_param(rcu_idle_gp_delay, int, 0644);
Paul E. McKenney5e44ce32012-12-12 12:35:29 -08001370
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001371/*
Paul E. McKenney0ae86a22018-07-07 18:12:26 -07001372 * Try to advance callbacks on the current CPU, but only if it has been
1373 * awhile since the last time we did so. Afterwards, if there are any
1374 * callbacks ready for immediate invocation, return true.
Paul E. McKenney486e2592012-01-06 14:11:30 -08001375 */
Paul E. McKenneyf1f399d2013-11-17 21:08:07 -08001376static bool __maybe_unused rcu_try_advance_all_cbs(void)
Paul E. McKenney486e2592012-01-06 14:11:30 -08001377{
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001378 bool cbs_ready = false;
Paul E. McKenney5998a752018-08-03 21:00:38 -07001379 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001380 struct rcu_node *rnp;
Paul E. McKenney486e2592012-01-06 14:11:30 -08001381
Paul E. McKenneyc2298282013-08-25 21:20:47 -07001382 /* Exit early if we advanced recently. */
Paul E. McKenney5998a752018-08-03 21:00:38 -07001383 if (jiffies == rdp->last_advance_all)
Pranith Kumard0bc90f2014-07-08 18:26:13 -04001384 return false;
Paul E. McKenney5998a752018-08-03 21:00:38 -07001385 rdp->last_advance_all = jiffies;
Paul E. McKenneyc2298282013-08-25 21:20:47 -07001386
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07001387 rnp = rdp->mynode;
Paul E. McKenney486e2592012-01-06 14:11:30 -08001388
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07001389 /*
1390 * Don't bother checking unless a grace period has
1391 * completed since we last checked and there are
1392 * callbacks not yet ready to invoke.
1393 */
1394 if ((rcu_seq_completed_gp(rdp->gp_seq,
1395 rcu_seq_current(&rnp->gp_seq)) ||
1396 unlikely(READ_ONCE(rdp->gpwrap))) &&
1397 rcu_segcblist_pend_cbs(&rdp->cblist))
1398 note_gp_changes(rdp);
Paul E. McKenney486e2592012-01-06 14:11:30 -08001399
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07001400 if (rcu_segcblist_ready_cbs(&rdp->cblist))
1401 cbs_ready = true;
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001402 return cbs_ready;
Paul E. McKenney486e2592012-01-06 14:11:30 -08001403}
1404
1405/*
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001406 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
1407 * to invoke. If the CPU has callbacks, try to advance them. Tell the
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001408 * caller about what to set the timeout.
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001409 *
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001410 * The caller must have disabled interrupts.
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001411 */
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001412int rcu_needs_cpu(u64 basemono, u64 *nextevt)
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001413{
Paul E. McKenney5998a752018-08-03 21:00:38 -07001414 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001415 unsigned long dj;
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001416
Frederic Weisbeckerb04db8e2017-11-06 16:01:30 +01001417 lockdep_assert_irqs_disabled();
Paul E. McKenney3382adb2015-03-04 15:41:24 -08001418
Paul E. McKenney0bd55c62019-08-12 10:28:08 -07001419 /* If no non-offloaded callbacks, RCU doesn't need the CPU. */
1420 if (rcu_segcblist_empty(&rdp->cblist) ||
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001421 rcu_rdp_is_offloaded(rdp)) {
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001422 *nextevt = KTIME_MAX;
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001423 return 0;
1424 }
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001425
1426 /* Attempt to advance callbacks. */
1427 if (rcu_try_advance_all_cbs()) {
1428 /* Some ready to invoke, so initiate later invocation. */
1429 invoke_rcu_core();
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001430 return 1;
1431 }
Paul E. McKenney5998a752018-08-03 21:00:38 -07001432 rdp->last_accelerate = jiffies;
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001433
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001434 /* Request timer and round. */
1435 dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
1436
Thomas Gleixnerc1ad3482015-04-14 21:08:58 +00001437 *nextevt = basemono + dj * TICK_NSEC;
Paul E. McKenneyaa9b16302012-05-10 16:41:44 -07001438 return 0;
1439}
1440
1441/*
Joel Fernandes (Google)77a40f92019-08-30 12:36:32 -04001442 * Prepare a CPU for idle from an RCU perspective. The first major task is to
1443 * sense whether nohz mode has been enabled or disabled via sysfs. The second
1444 * major task is to accelerate (that is, assign grace-period numbers to) any
1445 * recently arrived callbacks.
Paul E. McKenneyaea1b352011-11-02 06:54:54 -07001446 *
1447 * The caller must have disabled interrupts.
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001448 */
Paul E. McKenney198bbf82014-10-22 15:03:43 -07001449static void rcu_prepare_for_idle(void)
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001450{
Paul E. McKenney48a76392014-03-11 13:02:16 -07001451 bool needwake;
Paul E. McKenney0fd79e72018-08-03 21:00:38 -07001452 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001453 struct rcu_node *rnp;
Paul E. McKenney9d2ad242012-06-24 10:15:02 -07001454 int tne;
1455
Frederic Weisbeckerb04db8e2017-11-06 16:01:30 +01001456 lockdep_assert_irqs_disabled();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001457 if (rcu_rdp_is_offloaded(rdp))
Paul E. McKenney3382adb2015-03-04 15:41:24 -08001458 return;
1459
Paul E. McKenney9d2ad242012-06-24 10:15:02 -07001460 /* Handle nohz enablement switches conservatively. */
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08001461 tne = READ_ONCE(tick_nohz_active);
Paul E. McKenney0fd79e72018-08-03 21:00:38 -07001462 if (tne != rdp->tick_nohz_enabled_snap) {
Paul E. McKenney260e1e42018-11-29 13:28:49 -08001463 if (!rcu_segcblist_empty(&rdp->cblist))
Paul E. McKenney9d2ad242012-06-24 10:15:02 -07001464 invoke_rcu_core(); /* force nohz to see update. */
Paul E. McKenney0fd79e72018-08-03 21:00:38 -07001465 rdp->tick_nohz_enabled_snap = tne;
Paul E. McKenney9d2ad242012-06-24 10:15:02 -07001466 return;
1467 }
1468 if (!tne)
1469 return;
Paul E. McKenneyf511fc62012-03-15 12:16:26 -07001470
Paul E. McKenney3084f2f2011-11-22 17:07:11 -08001471 /*
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001472 * If we have not yet accelerated this jiffy, accelerate all
1473 * callbacks on this CPU.
1474 */
Paul E. McKenney5998a752018-08-03 21:00:38 -07001475 if (rdp->last_accelerate == jiffies)
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001476 return;
Paul E. McKenney5998a752018-08-03 21:00:38 -07001477 rdp->last_accelerate = jiffies;
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07001478 if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001479 rnp = rdp->mynode;
Peter Zijlstra2a67e742015-10-08 12:24:23 +02001480 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
Paul E. McKenney02f50142018-07-03 17:22:34 -07001481 needwake = rcu_accelerate_cbs(rnp, rdp);
Boqun Feng67c583a72015-12-29 12:18:47 +08001482 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
Paul E. McKenney48a76392014-03-11 13:02:16 -07001483 if (needwake)
Paul E. McKenney532c00c2018-07-03 17:22:34 -07001484 rcu_gp_kthread_wake();
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001485 }
1486}
1487
1488/*
1489 * Clean up for exit from idle. Attempt to advance callbacks based on
1490 * any grace periods that elapsed while the CPU was idle, and if any
1491 * callbacks are now ready to invoke, initiate invocation.
1492 */
Paul E. McKenney8fa78452014-10-22 15:07:37 -07001493static void rcu_cleanup_after_idle(void)
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001494{
Paul E. McKenneyce5215c2019-04-12 15:58:34 -07001495 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1496
Frederic Weisbeckerb04db8e2017-11-06 16:01:30 +01001497 lockdep_assert_irqs_disabled();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001498 if (rcu_rdp_is_offloaded(rdp))
Paul E. McKenneyc0f4dfd2012-12-28 11:30:36 -08001499 return;
Paul E. McKenney7a497c92013-08-22 18:16:16 -07001500 if (rcu_try_advance_all_cbs())
1501 invoke_rcu_core();
Paul E. McKenney8bd93a22010-02-22 17:04:59 -08001502}
1503
1504#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
Paul E. McKenneya858af22012-01-16 13:29:10 -08001505
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001506#ifdef CONFIG_RCU_NOCB_CPU
1507
1508/*
1509 * Offload callback processing from the boot-time-specified set of CPUs
Paul E. McKenneya9fefdb2018-12-03 14:07:17 -08001510 * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
1511 * created that pull the callbacks from the corresponding CPU, wait for
1512 * a grace period to elapse, and invoke the callbacks. These kthreads
Paul E. McKenney6484fe52019-03-28 15:44:18 -07001513 * are organized into GP kthreads, which manage incoming callbacks, wait for
1514 * grace periods, and awaken CB kthreads, and the CB kthreads, which only
1515 * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
1516 * do a wake_up() on their GP kthread when they insert a callback into any
Paul E. McKenneya9fefdb2018-12-03 14:07:17 -08001517 * empty list, unless the rcu_nocb_poll boot parameter has been specified,
1518 * in which case each kthread actively polls its CPU. (Which isn't so great
1519 * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001520 *
1521 * This is intended to be used in conjunction with Frederic Weisbecker's
1522 * adaptive-idle work, which would seriously reduce OS jitter on CPUs
1523 * running CPU-bound user-mode computations.
1524 *
Paul E. McKenneya9fefdb2018-12-03 14:07:17 -08001525 * Offloading of callbacks can also be used as an energy-efficiency
1526 * measure because CPUs with no RCU callbacks queued are more aggressive
1527 * about entering dyntick-idle mode.
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001528 */
1529
1530
Paul E. McKenney497e4262019-03-06 14:47:56 -08001531/*
1532 * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
Paul Gortmaker3e70df92021-02-21 03:08:27 -05001533 * If the list is invalid, a warning is emitted and all CPUs are offloaded.
Paul E. McKenney497e4262019-03-06 14:47:56 -08001534 */
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001535static int __init rcu_nocb_setup(char *str)
1536{
1537 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
Paul Gortmaker3e70df92021-02-21 03:08:27 -05001538 if (!strcasecmp(str, "all")) /* legacy: use "0-N" instead */
Paul E. McKenneyda8739f2019-03-05 15:28:19 -08001539 cpumask_setall(rcu_nocb_mask);
1540 else
Paul E. McKenney497e4262019-03-06 14:47:56 -08001541 if (cpulist_parse(str, rcu_nocb_mask)) {
1542 pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
1543 cpumask_setall(rcu_nocb_mask);
1544 }
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001545 return 1;
1546}
1547__setup("rcu_nocbs=", rcu_nocb_setup);
1548
Paul Gortmaker1b0048a2012-12-20 13:19:22 -08001549static int __init parse_rcu_nocb_poll(char *arg)
1550{
Nicholas Mc Guire5455a7f2017-03-25 20:46:02 +01001551 rcu_nocb_poll = true;
Paul Gortmaker1b0048a2012-12-20 13:19:22 -08001552 return 0;
1553}
1554early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
1555
Paul E. McKenney34ed62462013-01-07 13:37:42 -08001556/*
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001557 * Don't bother bypassing ->cblist if the call_rcu() rate is low.
1558 * After all, the main point of bypassing is to avoid lock contention
1559 * on ->nocb_lock, which only can happen at high call_rcu() rates.
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001560 */
Jiapeng Chong9640dca2021-02-24 16:30:29 +08001561static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001562module_param(nocb_nobypass_lim_per_jiffy, int, 0);
1563
1564/*
1565 * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
1566 * lock isn't immediately available, increment ->nocb_lock_contended to
1567 * flag the contention.
1568 */
1569static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
Jules Irenge9ced4542020-01-20 22:42:15 +00001570 __acquires(&rdp->nocb_bypass_lock)
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001571{
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07001572 lockdep_assert_irqs_disabled();
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001573 if (raw_spin_trylock(&rdp->nocb_bypass_lock))
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07001574 return;
1575 atomic_inc(&rdp->nocb_lock_contended);
Paul E. McKenney6aacd882019-07-13 12:27:03 -07001576 WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07001577 smp_mb__after_atomic(); /* atomic_inc() before lock. */
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001578 raw_spin_lock(&rdp->nocb_bypass_lock);
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07001579 smp_mb__before_atomic(); /* atomic_dec() after lock. */
1580 atomic_dec(&rdp->nocb_lock_contended);
1581}
1582
1583/*
1584 * Spinwait until the specified rcu_data structure's ->nocb_lock is
1585 * not contended. Please note that this is extremely special-purpose,
1586 * relying on the fact that at most two kthreads and one CPU contend for
1587 * this lock, and also that the two kthreads are guaranteed to have frequent
1588 * grace-period-duration time intervals between successive acquisitions
1589 * of the lock. This allows us to use an extremely simple throttling
1590 * mechanism, and further to apply it only to the CPU doing floods of
1591 * call_rcu() invocations. Don't try this at home!
1592 */
1593static void rcu_nocb_wait_contended(struct rcu_data *rdp)
1594{
Paul E. McKenney6aacd882019-07-13 12:27:03 -07001595 WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
1596 while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07001597 cpu_relax();
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001598}
1599
1600/*
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001601 * Conditionally acquire the specified rcu_data structure's
1602 * ->nocb_bypass_lock.
1603 */
1604static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
1605{
1606 lockdep_assert_irqs_disabled();
1607 return raw_spin_trylock(&rdp->nocb_bypass_lock);
1608}
1609
1610/*
1611 * Release the specified rcu_data structure's ->nocb_bypass_lock.
1612 */
1613static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
Jules Irenge92c0b882020-01-30 00:30:09 +00001614 __releases(&rdp->nocb_bypass_lock)
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001615{
1616 lockdep_assert_irqs_disabled();
1617 raw_spin_unlock(&rdp->nocb_bypass_lock);
1618}
1619
1620/*
1621 * Acquire the specified rcu_data structure's ->nocb_lock, but only
1622 * if it corresponds to a no-CBs CPU.
1623 */
1624static void rcu_nocb_lock(struct rcu_data *rdp)
1625{
1626 lockdep_assert_irqs_disabled();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001627 if (!rcu_rdp_is_offloaded(rdp))
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001628 return;
1629 raw_spin_lock(&rdp->nocb_lock);
1630}
1631
1632/*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001633 * Release the specified rcu_data structure's ->nocb_lock, but only
1634 * if it corresponds to a no-CBs CPU.
1635 */
1636static void rcu_nocb_unlock(struct rcu_data *rdp)
1637{
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001638 if (rcu_rdp_is_offloaded(rdp)) {
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001639 lockdep_assert_irqs_disabled();
1640 raw_spin_unlock(&rdp->nocb_lock);
1641 }
1642}
1643
1644/*
1645 * Release the specified rcu_data structure's ->nocb_lock and restore
1646 * interrupts, but only if it corresponds to a no-CBs CPU.
1647 */
1648static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
1649 unsigned long flags)
1650{
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001651 if (rcu_rdp_is_offloaded(rdp)) {
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001652 lockdep_assert_irqs_disabled();
1653 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1654 } else {
1655 local_irq_restore(flags);
1656 }
1657}
1658
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001659/* Lockdep check that ->cblist may be safely accessed. */
1660static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
1661{
1662 lockdep_assert_irqs_disabled();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001663 if (rcu_rdp_is_offloaded(rdp))
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001664 lockdep_assert_held(&rdp->nocb_lock);
1665}
1666
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001667/*
Paul E. McKenney0446be42012-12-30 15:21:01 -08001668 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
1669 * grace period.
Paul E. McKenneydae6e642013-02-10 20:48:58 -08001670 */
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01001671static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
Paul E. McKenneydae6e642013-02-10 20:48:58 -08001672{
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01001673 swake_up_all(sq);
Paul E. McKenneydae6e642013-02-10 20:48:58 -08001674}
1675
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01001676static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
Daniel Wagner065bb782016-02-19 09:46:40 +01001677{
Paul E. McKenneye0da2372018-04-27 20:51:36 -07001678 return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
Daniel Wagner065bb782016-02-19 09:46:40 +01001679}
1680
Paul E. McKenneydae6e642013-02-10 20:48:58 -08001681static void rcu_init_one_nocb(struct rcu_node *rnp)
1682{
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01001683 init_swait_queue_head(&rnp->nocb_gp_wq[0]);
1684 init_swait_queue_head(&rnp->nocb_gp_wq[1]);
Paul E. McKenney34ed62462013-01-07 13:37:42 -08001685}
1686
Liu Ping Fan24342c92014-02-24 06:18:09 -08001687/* Is the specified CPU a no-CBs CPU? */
Frederic Weisbeckerd1e43fa2013-03-26 23:47:24 +01001688bool rcu_is_nocb_cpu(int cpu)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001689{
Rakib Mullick84b12b72017-11-17 21:40:15 +06001690 if (cpumask_available(rcu_nocb_mask))
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001691 return cpumask_test_cpu(cpu, rcu_nocb_mask);
1692 return false;
1693}
1694
1695/*
Paul E. McKenney6484fe52019-03-28 15:44:18 -07001696 * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001697 * and this function releases it.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001698 */
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01001699static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
1700 unsigned long flags)
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001701 __releases(rdp->nocb_lock)
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001702{
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001703 bool needwake = false;
Paul E. McKenney5f675ba2019-03-31 16:11:57 -07001704 struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001705
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001706 lockdep_assert_held(&rdp->nocb_lock);
Paul E. McKenney5f675ba2019-03-31 16:11:57 -07001707 if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
Frederic Weisbeckere02691b2021-02-23 01:10:02 +01001708 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001709 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1710 TPS("AlreadyAwake"));
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01001711 return false;
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001712 }
Frederic Weisbeckerb2fcf212021-02-23 01:09:59 +01001713
1714 if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
1715 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
1716 del_timer(&rdp->nocb_timer);
1717 }
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001718 rcu_nocb_unlock_irqrestore(rdp, flags);
1719 raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
1720 if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001721 WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001722 needwake = true;
1723 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001724 }
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001725 raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
1726 if (needwake)
1727 wake_up_process(rdp_gp->nocb_gp_kthread);
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01001728
1729 return needwake;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001730}
1731
1732/*
Paul E. McKenney6484fe52019-03-28 15:44:18 -07001733 * Arrange to wake the GP kthread for this NOCB group at some future
1734 * time when it is safe to do so.
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001735 */
Paul E. McKenney0d52a662019-03-31 16:19:02 -07001736static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
1737 const char *reason)
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001738{
Frederic Weisbecker69cdea82020-11-13 13:13:23 +01001739 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_OFF)
1740 return;
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07001741 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
1742 mod_timer(&rdp->nocb_timer, jiffies + 1);
Paul E. McKenney383e1332019-05-23 13:49:26 -07001743 if (rdp->nocb_defer_wakeup < waketype)
1744 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
Paul E. McKenney88d1bea2018-07-04 14:45:00 -07001745 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
Paul E. McKenneyd7e29932014-10-27 09:15:54 -07001746}
1747
1748/*
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001749 * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
1750 * However, if there is a callback to be enqueued and if ->nocb_bypass
1751 * proves to be initially empty, just return false because the no-CB GP
1752 * kthread may need to be awakened in this case.
1753 *
1754 * Note that this function always returns true if rhp is NULL.
1755 */
1756static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1757 unsigned long j)
1758{
1759 struct rcu_cblist rcl;
1760
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001761 WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001762 rcu_lockdep_assert_cblist_protected(rdp);
1763 lockdep_assert_held(&rdp->nocb_bypass_lock);
1764 if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
1765 raw_spin_unlock(&rdp->nocb_bypass_lock);
1766 return false;
1767 }
1768 /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
1769 if (rhp)
1770 rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
1771 rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
1772 rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
1773 WRITE_ONCE(rdp->nocb_bypass_first, j);
1774 rcu_nocb_bypass_unlock(rdp);
1775 return true;
1776}
1777
1778/*
1779 * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
1780 * However, if there is a callback to be enqueued and if ->nocb_bypass
1781 * proves to be initially empty, just return false because the no-CB GP
1782 * kthread may need to be awakened in this case.
1783 *
1784 * Note that this function always returns true if rhp is NULL.
1785 */
1786static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1787 unsigned long j)
1788{
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001789 if (!rcu_rdp_is_offloaded(rdp))
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001790 return true;
1791 rcu_lockdep_assert_cblist_protected(rdp);
1792 rcu_nocb_bypass_lock(rdp);
1793 return rcu_nocb_do_flush_bypass(rdp, rhp, j);
1794}
1795
1796/*
1797 * If the ->nocb_bypass_lock is immediately available, flush the
1798 * ->nocb_bypass queue into ->cblist.
1799 */
1800static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
1801{
1802 rcu_lockdep_assert_cblist_protected(rdp);
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001803 if (!rcu_rdp_is_offloaded(rdp) ||
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001804 !rcu_nocb_bypass_trylock(rdp))
1805 return;
1806 WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
1807}
1808
1809/*
1810 * See whether it is appropriate to use the ->nocb_bypass list in order
1811 * to control contention on ->nocb_lock. A limited number of direct
1812 * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
1813 * is non-empty, further callbacks must be placed into ->nocb_bypass,
1814 * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
1815 * back to direct use of ->cblist. However, ->nocb_bypass should not be
1816 * used if ->cblist is empty, because otherwise callbacks can be stranded
1817 * on ->nocb_bypass because we cannot count on the current CPU ever again
1818 * invoking call_rcu(). The general rule is that if ->nocb_bypass is
1819 * non-empty, the corresponding no-CBs grace-period kthread must not be
1820 * in an indefinite sleep state.
1821 *
1822 * Finally, it is not permitted to use the bypass during early boot,
1823 * as doing so would confuse the auto-initialization code. Besides
1824 * which, there is no point in worrying about lock contention while
1825 * there is only one CPU in operation.
1826 */
1827static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1828 bool *was_alldone, unsigned long flags)
1829{
1830 unsigned long c;
1831 unsigned long cur_gp_seq;
1832 unsigned long j = jiffies;
1833 long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1834
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01001835 lockdep_assert_irqs_disabled();
1836
1837 // Pure softirq/rcuc based processing: no bypassing, no
1838 // locking.
Frederic Weisbecker3820b512020-11-12 01:51:21 +01001839 if (!rcu_rdp_is_offloaded(rdp)) {
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001840 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01001841 return false;
1842 }
1843
1844 // In the process of (de-)offloading: no bypassing, but
1845 // locking.
1846 if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
1847 rcu_nocb_lock(rdp);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001848 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1849 return false; /* Not offloaded, no bypassing. */
1850 }
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07001851
1852 // Don't use ->nocb_bypass during early boot.
1853 if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
1854 rcu_nocb_lock(rdp);
1855 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1856 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1857 return false;
1858 }
1859
1860 // If we have advanced to a new jiffy, reset counts to allow
1861 // moving back from ->nocb_bypass to ->cblist.
1862 if (j == rdp->nocb_nobypass_last) {
1863 c = rdp->nocb_nobypass_count + 1;
1864 } else {
1865 WRITE_ONCE(rdp->nocb_nobypass_last, j);
1866 c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
1867 if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
1868 nocb_nobypass_lim_per_jiffy))
1869 c = 0;
1870 else if (c > nocb_nobypass_lim_per_jiffy)
1871 c = nocb_nobypass_lim_per_jiffy;
1872 }
1873 WRITE_ONCE(rdp->nocb_nobypass_count, c);
1874
1875 // If there hasn't yet been all that many ->cblist enqueues
1876 // this jiffy, tell the caller to enqueue onto ->cblist. But flush
1877 // ->nocb_bypass first.
1878 if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
1879 rcu_nocb_lock(rdp);
1880 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1881 if (*was_alldone)
1882 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1883 TPS("FirstQ"));
1884 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
1885 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1886 return false; // Caller must enqueue the callback.
1887 }
1888
1889 // If ->nocb_bypass has been used too long or is too full,
1890 // flush ->nocb_bypass to ->cblist.
1891 if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
1892 ncbs >= qhimark) {
1893 rcu_nocb_lock(rdp);
1894 if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
1895 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1896 if (*was_alldone)
1897 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1898 TPS("FirstQ"));
1899 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1900 return false; // Caller must enqueue the callback.
1901 }
1902 if (j != rdp->nocb_gp_adv_time &&
1903 rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
1904 rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
1905 rcu_advance_cbs_nowake(rdp->mynode, rdp);
1906 rdp->nocb_gp_adv_time = j;
1907 }
1908 rcu_nocb_unlock_irqrestore(rdp, flags);
1909 return true; // Callback already enqueued.
1910 }
1911
1912 // We need to use the bypass.
1913 rcu_nocb_wait_contended(rdp);
1914 rcu_nocb_bypass_lock(rdp);
1915 ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1916 rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
1917 rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
1918 if (!ncbs) {
1919 WRITE_ONCE(rdp->nocb_bypass_first, j);
1920 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
1921 }
1922 rcu_nocb_bypass_unlock(rdp);
1923 smp_mb(); /* Order enqueue before wake. */
1924 if (ncbs) {
1925 local_irq_restore(flags);
1926 } else {
1927 // No-CBs GP kthread might be indefinitely asleep, if so, wake.
1928 rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
1929 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
1930 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1931 TPS("FirstBQwake"));
1932 __call_rcu_nocb_wake(rdp, true, flags);
1933 } else {
1934 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1935 TPS("FirstBQnoWake"));
1936 rcu_nocb_unlock_irqrestore(rdp, flags);
1937 }
1938 }
1939 return true; // Callback already enqueued.
1940}
1941
1942/*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001943 * Awaken the no-CBs grace-period kthead if needed, either due to it
1944 * legitimately being asleep or due to overload conditions.
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001945 *
1946 * If warranted, also wake up the kthread servicing this CPUs queues.
1947 */
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001948static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
1949 unsigned long flags)
1950 __releases(rdp->nocb_lock)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001951{
Paul E. McKenney296181d2019-07-15 06:06:40 -07001952 unsigned long cur_gp_seq;
1953 unsigned long j;
Paul E. McKenneyce0a8252019-05-23 13:56:12 -07001954 long len;
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001955 struct task_struct *t;
1956
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001957 // If we are being polled or there is no kthread, just leave.
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07001958 t = READ_ONCE(rdp->nocb_gp_kthread);
Paul E. McKenney25e03a72013-10-15 12:47:04 -07001959 if (rcu_nocb_poll || !t) {
Frederic Weisbeckere02691b2021-02-23 01:10:02 +01001960 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenney88d1bea2018-07-04 14:45:00 -07001961 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
Paul E. McKenney9261dd02013-08-14 16:24:26 -07001962 TPS("WakeNotPoll"));
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001963 return;
Paul E. McKenney9261dd02013-08-14 16:24:26 -07001964 }
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001965 // Need to actually to a wakeup.
1966 len = rcu_segcblist_n_cbs(&rdp->cblist);
1967 if (was_alldone) {
Paul E. McKenneyaeeacd92019-05-23 10:43:58 -07001968 rdp->qlen_last_fqs_check = len;
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07001969 if (!irqs_disabled_flags(flags)) {
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001970 /* ... if queue was empty ... */
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001971 wake_nocb_gp(rdp, false, flags);
Paul E. McKenney88d1bea2018-07-04 14:45:00 -07001972 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07001973 TPS("WakeEmpty"));
1974 } else {
Paul E. McKenney0d52a662019-03-31 16:19:02 -07001975 wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
1976 TPS("WakeEmptyIsDeferred"));
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001977 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07001978 }
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001979 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07001980 /* ... or if many callbacks queued. */
Paul E. McKenneyaeeacd92019-05-23 10:43:58 -07001981 rdp->qlen_last_fqs_check = len;
Paul E. McKenney296181d2019-07-15 06:06:40 -07001982 j = jiffies;
1983 if (j != rdp->nocb_gp_adv_time &&
1984 rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
1985 rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
Paul E. McKenneyfaca5c22019-06-26 09:50:38 -07001986 rcu_advance_cbs_nowake(rdp->mynode, rdp);
Paul E. McKenney296181d2019-07-15 06:06:40 -07001987 rdp->nocb_gp_adv_time = j;
1988 }
Paul E. McKenneyf48fe4c2019-07-16 02:17:00 -07001989 smp_mb(); /* Enqueue before timer_pending(). */
1990 if ((rdp->nocb_cb_sleep ||
1991 !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
1992 !timer_pending(&rdp->nocb_bypass_timer))
Paul E. McKenney273f0342019-07-09 06:54:42 -07001993 wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
1994 TPS("WakeOvfIsDeferred"));
Paul E. McKenney273f0342019-07-09 06:54:42 -07001995 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenney9261dd02013-08-14 16:24:26 -07001996 } else {
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07001997 rcu_nocb_unlock_irqrestore(rdp, flags);
Frederic Weisbeckere02691b2021-02-23 01:10:02 +01001998 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07001999 }
2000 return;
2001}
2002
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002003/* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */
2004static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
2005{
2006 unsigned long flags;
2007 struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
2008
2009 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
2010 rcu_nocb_lock_irqsave(rdp, flags);
Paul E. McKenneyf48fe4c2019-07-16 02:17:00 -07002011 smp_mb__after_spinlock(); /* Timer expire before wakeup. */
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002012 __call_rcu_nocb_wake(rdp, true, flags);
2013}
2014
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002015/*
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002016 * Check if we ignore this rdp.
2017 *
2018 * We check that without holding the nocb lock but
2019 * we make sure not to miss a freshly offloaded rdp
2020 * with the current ordering:
2021 *
2022 * rdp_offload_toggle() nocb_gp_enabled_cb()
2023 * ------------------------- ----------------------------
2024 * WRITE flags LOCK nocb_gp_lock
2025 * LOCK nocb_gp_lock READ/WRITE nocb_gp_sleep
2026 * READ/WRITE nocb_gp_sleep UNLOCK nocb_gp_lock
2027 * UNLOCK nocb_gp_lock READ flags
2028 */
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002029static inline bool nocb_gp_enabled_cb(struct rcu_data *rdp)
2030{
2031 u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_GP;
2032
2033 return rcu_segcblist_test_flags(&rdp->cblist, flags);
2034}
2035
Frederic Weisbecker55adc3e2021-01-28 18:12:13 +01002036static inline bool nocb_gp_update_state_deoffloading(struct rcu_data *rdp,
2037 bool *needwake_state)
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002038{
2039 struct rcu_segcblist *cblist = &rdp->cblist;
2040
2041 if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002042 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
2043 rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
2044 if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
2045 *needwake_state = true;
2046 }
Frederic Weisbecker55adc3e2021-01-28 18:12:13 +01002047 return false;
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002048 }
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002049
2050 /*
2051 * De-offloading. Clear our flag and notify the de-offload worker.
2052 * We will ignore this rdp until it ever gets re-offloaded.
2053 */
2054 WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
2055 rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
2056 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
2057 *needwake_state = true;
Frederic Weisbecker55adc3e2021-01-28 18:12:13 +01002058 return true;
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002059}
2060
2061
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002062/*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002063 * No-CBs GP kthreads come here to wait for additional callbacks to show up
2064 * or for grace periods to end.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002065 */
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002066static void nocb_gp_wait(struct rcu_data *my_rdp)
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002067{
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002068 bool bypass = false;
2069 long bypass_ncbs;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002070 int __maybe_unused cpu = my_rdp->cpu;
2071 unsigned long cur_gp_seq;
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002072 unsigned long flags;
Dan Carpenterb8889c92019-09-23 17:26:34 +03002073 bool gotcbs = false;
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002074 unsigned long j = jiffies;
Paul E. McKenney969974e2019-05-22 09:35:11 -07002075 bool needwait_gp = false; // This prevents actual uninitialized use.
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002076 bool needwake;
2077 bool needwake_gp;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002078 struct rcu_data *rdp;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002079 struct rcu_node *rnp;
Paul E. McKenney969974e2019-05-22 09:35:11 -07002080 unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
Paul E. McKenney3d050312020-02-04 14:55:29 -08002081 bool wasempty = false;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002082
2083 /*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002084 * Each pass through the following loop checks for CBs and for the
2085 * nearest grace period (if any) to wait for next. The CB kthreads
2086 * and the global grace-period kthread are awakened if needed.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002087 */
Paul E. McKenney4569c5e2020-08-05 10:35:16 -07002088 WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
Paul E. McKenney58bf6f72019-03-28 15:33:59 -07002089 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002090 bool needwake_state = false;
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002091
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002092 if (!nocb_gp_enabled_cb(rdp))
2093 continue;
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002094 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002095 rcu_nocb_lock_irqsave(rdp, flags);
Frederic Weisbecker55adc3e2021-01-28 18:12:13 +01002096 if (nocb_gp_update_state_deoffloading(rdp, &needwake_state)) {
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002097 rcu_nocb_unlock_irqrestore(rdp, flags);
2098 if (needwake_state)
2099 swake_up_one(&rdp->nocb_state_wq);
2100 continue;
2101 }
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002102 bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
2103 if (bypass_ncbs &&
2104 (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
2105 bypass_ncbs > 2 * qhimark)) {
2106 // Bypass full or old, so flush it.
2107 (void)rcu_nocb_try_flush_bypass(rdp, j);
2108 bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
2109 } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
2110 rcu_nocb_unlock_irqrestore(rdp, flags);
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002111 if (needwake_state)
2112 swake_up_one(&rdp->nocb_state_wq);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002113 continue; /* No callbacks here, try next. */
2114 }
2115 if (bypass_ncbs) {
2116 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2117 TPS("Bypass"));
2118 bypass = true;
2119 }
2120 rnp = rdp->mynode;
2121 if (bypass) { // Avoid race with first bypass CB.
2122 WRITE_ONCE(my_rdp->nocb_defer_wakeup,
2123 RCU_NOCB_WAKE_NOT);
2124 del_timer(&my_rdp->nocb_timer);
2125 }
2126 // Advance callbacks if helpful and low contention.
2127 needwake_gp = false;
2128 if (!rcu_segcblist_restempty(&rdp->cblist,
2129 RCU_NEXT_READY_TAIL) ||
2130 (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
2131 rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
2132 raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
2133 needwake_gp = rcu_advance_cbs(rnp, rdp);
Paul E. McKenney3d050312020-02-04 14:55:29 -08002134 wasempty = rcu_segcblist_restempty(&rdp->cblist,
2135 RCU_NEXT_READY_TAIL);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002136 raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
2137 }
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002138 // Need to wait on some grace period?
Paul E. McKenney3d050312020-02-04 14:55:29 -08002139 WARN_ON_ONCE(wasempty &&
2140 !rcu_segcblist_restempty(&rdp->cblist,
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002141 RCU_NEXT_READY_TAIL));
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002142 if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
2143 if (!needwait_gp ||
2144 ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
2145 wait_gp_seq = cur_gp_seq;
2146 needwait_gp = true;
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002147 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2148 TPS("NeedWaitGP"));
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002149 }
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002150 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
2151 needwake = rdp->nocb_cb_sleep;
2152 WRITE_ONCE(rdp->nocb_cb_sleep, false);
2153 smp_mb(); /* CB invocation -after- GP end. */
2154 } else {
2155 needwake = false;
2156 }
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002157 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002158 if (needwake) {
2159 swake_up_one(&rdp->nocb_cb_wq);
2160 gotcbs = true;
2161 }
2162 if (needwake_gp)
2163 rcu_gp_kthread_wake();
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002164 if (needwake_state)
2165 swake_up_one(&rdp->nocb_state_wq);
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002166 }
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002167
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002168 my_rdp->nocb_gp_bypass = bypass;
2169 my_rdp->nocb_gp_gp = needwait_gp;
2170 my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002171 if (bypass && !rcu_nocb_poll) {
2172 // At least one child with non-empty ->nocb_bypass, so set
2173 // timer in order to avoid stranding its callbacks.
2174 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
2175 mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
2176 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
2177 }
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002178 if (rcu_nocb_poll) {
2179 /* Polling, so trace if first poll in the series. */
2180 if (gotcbs)
2181 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
Paul E. McKenneyf5ca34642020-05-07 16:36:10 -07002182 schedule_timeout_idle(1);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002183 } else if (!needwait_gp) {
2184 /* Wait for callbacks to appear. */
2185 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
2186 swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
2187 !READ_ONCE(my_rdp->nocb_gp_sleep));
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002188 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002189 } else {
2190 rnp = my_rdp->mynode;
2191 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
2192 swait_event_interruptible_exclusive(
2193 rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
2194 rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
2195 !READ_ONCE(my_rdp->nocb_gp_sleep));
2196 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
2197 }
2198 if (!rcu_nocb_poll) {
Paul E. McKenney4fd8c5f2019-06-02 13:41:08 -07002199 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002200 if (bypass)
2201 del_timer(&my_rdp->nocb_bypass_timer);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002202 WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
Paul E. McKenney4fd8c5f2019-06-02 13:41:08 -07002203 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002204 }
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002205 my_rdp->nocb_gp_seq = -1;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002206 WARN_ON(signal_pending(current));
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002207}
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002208
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002209/*
2210 * No-CBs grace-period-wait kthread. There is one of these per group
2211 * of CPUs, but only once at least one CPU in that group has come online
2212 * at least once since boot. This kthread checks for newly posted
2213 * callbacks from any of the CPUs it is responsible for, waits for a
2214 * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
2215 * that then have callback-invocation work to do.
2216 */
2217static int rcu_nocb_gp_kthread(void *arg)
2218{
2219 struct rcu_data *rdp = arg;
2220
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002221 for (;;) {
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002222 WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002223 nocb_gp_wait(rdp);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002224 cond_resched_tasks_rcu_qs();
2225 }
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002226 return 0;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002227}
2228
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002229static inline bool nocb_cb_can_run(struct rcu_data *rdp)
2230{
2231 u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
2232 return rcu_segcblist_test_flags(&rdp->cblist, flags);
2233}
2234
2235static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
2236{
2237 return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
2238}
2239
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002240/*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002241 * Invoke any ready callbacks from the corresponding no-CBs CPU,
2242 * then, if there are no more, wait for more to appear.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002243 */
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002244static void nocb_cb_wait(struct rcu_data *rdp)
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002245{
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002246 struct rcu_segcblist *cblist = &rdp->cblist;
Paul E. McKenney1d5a81c12019-07-15 01:09:04 -07002247 unsigned long cur_gp_seq;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002248 unsigned long flags;
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002249 bool needwake_state = false;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002250 bool needwake_gp = false;
Frederic Weisbecker8a682b32021-01-28 18:12:12 +01002251 bool can_sleep = true;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002252 struct rcu_node *rnp = rdp->mynode;
2253
2254 local_irq_save(flags);
2255 rcu_momentary_dyntick_idle();
2256 local_irq_restore(flags);
Frederic Weisbecker5de2e5b2021-01-28 18:12:08 +01002257 /*
2258 * Disable BH to provide the expected environment. Also, when
2259 * transitioning to/from NOCB mode, a self-requeuing callback might
2260 * be invoked from softirq. A short grace period could cause both
2261 * instances of this callback would execute concurrently.
2262 */
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002263 local_bh_disable();
2264 rcu_do_batch(rdp);
2265 local_bh_enable();
2266 lockdep_assert_irqs_enabled();
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002267 rcu_nocb_lock_irqsave(rdp, flags);
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002268 if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) &&
Paul E. McKenney1d5a81c12019-07-15 01:09:04 -07002269 rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
2270 raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
Paul E. McKenney523bddd2019-06-01 13:33:55 -07002271 needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
2272 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
2273 }
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002274
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002275 if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002276 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
2277 rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
2278 if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
2279 needwake_state = true;
2280 }
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002281 if (rcu_segcblist_ready_cbs(cblist))
Frederic Weisbecker8a682b32021-01-28 18:12:12 +01002282 can_sleep = false;
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002283 } else {
2284 /*
2285 * De-offloading. Clear our flag and notify the de-offload worker.
2286 * We won't touch the callbacks and keep sleeping until we ever
2287 * get re-offloaded.
2288 */
2289 WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
2290 rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
2291 if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
2292 needwake_state = true;
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002293 }
2294
Frederic Weisbecker8a682b32021-01-28 18:12:12 +01002295 WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
2296
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002297 if (rdp->nocb_cb_sleep)
2298 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
2299
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002300 rcu_nocb_unlock_irqrestore(rdp, flags);
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002301 if (needwake_gp)
2302 rcu_gp_kthread_wake();
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002303
2304 if (needwake_state)
2305 swake_up_one(&rdp->nocb_state_wq);
2306
2307 do {
2308 swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
2309 nocb_cb_wait_cond(rdp));
2310
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002311 // VVV Ensure CB invocation follows _sleep test.
2312 if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002313 WARN_ON(signal_pending(current));
2314 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
2315 }
2316 } while (!nocb_cb_can_run(rdp));
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002317}
2318
2319/*
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002320 * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
2321 * nocb_cb_wait() to do the dirty work.
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002322 */
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002323static int rcu_nocb_cb_kthread(void *arg)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002324{
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002325 struct rcu_data *rdp = arg;
2326
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002327 // Each pass through this loop does one callback batch, and,
2328 // if there are no more ready callbacks, waits for them.
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002329 for (;;) {
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002330 nocb_cb_wait(rdp);
2331 cond_resched_tasks_rcu_qs();
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002332 }
2333 return 0;
2334}
2335
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002336/* Is a deferred wakeup of rcu_nocb_kthread() required? */
Paul E. McKenney9fdd3bc2014-07-29 14:50:47 -07002337static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002338{
Frederic Weisbecker69cdea82020-11-13 13:13:23 +01002339 return READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT;
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002340}
2341
2342/* Do a deferred wakeup of rcu_nocb_kthread(). */
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002343static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002344{
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002345 unsigned long flags;
Paul E. McKenney9fdd3bc2014-07-29 14:50:47 -07002346 int ndw;
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002347 int ret;
Paul E. McKenney9fdd3bc2014-07-29 14:50:47 -07002348
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002349 rcu_nocb_lock_irqsave(rdp, flags);
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002350 if (!rcu_nocb_need_deferred_wakeup(rdp)) {
Paul E. McKenney81c0b3d2019-05-28 07:18:08 -07002351 rcu_nocb_unlock_irqrestore(rdp, flags);
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002352 return false;
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002353 }
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08002354 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002355 ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
Paul E. McKenney88d1bea2018-07-04 14:45:00 -07002356 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002357
2358 return ret;
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002359}
2360
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002361/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
Kees Cookfd30b712017-10-22 17:58:54 -07002362static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002363{
Kees Cookfd30b712017-10-22 17:58:54 -07002364 struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
2365
2366 do_nocb_deferred_wakeup_common(rdp);
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002367}
2368
2369/*
2370 * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
2371 * This means we do an inexact common-case check. Note that if
2372 * we miss, ->nocb_timer will eventually clean things up.
2373 */
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002374static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002375{
2376 if (rcu_nocb_need_deferred_wakeup(rdp))
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002377 return do_nocb_deferred_wakeup_common(rdp);
2378 return false;
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002379}
2380
Frederic Weisbecker43789ef2021-02-01 00:05:45 +01002381void rcu_nocb_flush_deferred_wakeup(void)
2382{
2383 do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
2384}
Frederic Weisbecker4ae7dc92021-02-01 00:05:48 +01002385EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
Frederic Weisbecker43789ef2021-02-01 00:05:45 +01002386
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002387static int rdp_offload_toggle(struct rcu_data *rdp,
2388 bool offload, unsigned long flags)
2389 __releases(rdp->nocb_lock)
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002390{
2391 struct rcu_segcblist *cblist = &rdp->cblist;
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002392 struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002393 bool wake_gp = false;
2394
2395 rcu_segcblist_offload(cblist, offload);
2396
2397 if (rdp->nocb_cb_sleep)
2398 rdp->nocb_cb_sleep = false;
2399 rcu_nocb_unlock_irqrestore(rdp, flags);
2400
2401 /*
2402 * Ignore former value of nocb_cb_sleep and force wake up as it could
2403 * have been spuriously set to false already.
2404 */
2405 swake_up_one(&rdp->nocb_cb_wq);
2406
2407 raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
2408 if (rdp_gp->nocb_gp_sleep) {
2409 rdp_gp->nocb_gp_sleep = false;
2410 wake_gp = true;
2411 }
2412 raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
2413
2414 if (wake_gp)
2415 wake_up_process(rdp_gp->nocb_gp_kthread);
2416
2417 return 0;
2418}
2419
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002420static long rcu_nocb_rdp_deoffload(void *arg)
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002421{
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002422 struct rcu_data *rdp = arg;
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002423 struct rcu_segcblist *cblist = &rdp->cblist;
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002424 unsigned long flags;
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002425 int ret;
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002426
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002427 WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
2428
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002429 pr_info("De-offloading %d\n", rdp->cpu);
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002430
2431 rcu_nocb_lock_irqsave(rdp, flags);
Frederic Weisbeckeref005342020-11-13 13:13:20 +01002432 /*
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01002433 * Flush once and for all now. This suffices because we are
2434 * running on the target CPU holding ->nocb_lock (thus having
2435 * interrupts disabled), and because rdp_offload_toggle()
2436 * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
2437 * Thus future calls to rcu_segcblist_completely_offloaded() will
2438 * return false, which means that future calls to rcu_nocb_try_bypass()
2439 * will refuse to put anything into the bypass.
Frederic Weisbeckeref005342020-11-13 13:13:20 +01002440 */
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01002441 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002442 ret = rdp_offload_toggle(rdp, false, flags);
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002443 swait_event_exclusive(rdp->nocb_state_wq,
2444 !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
2445 SEGCBLIST_KTHREAD_GP));
Frederic Weisbecker69cdea82020-11-13 13:13:23 +01002446 rcu_nocb_lock_irqsave(rdp, flags);
Frederic Weisbecker314202f2020-11-13 13:13:24 +01002447 /* Make sure nocb timer won't stay around */
Frederic Weisbecker69cdea82020-11-13 13:13:23 +01002448 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_OFF);
2449 rcu_nocb_unlock_irqrestore(rdp, flags);
2450 del_timer_sync(&rdp->nocb_timer);
2451
Frederic Weisbecker314202f2020-11-13 13:13:24 +01002452 /*
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01002453 * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY with CB unlocked
2454 * and IRQs disabled but let's be paranoid.
Frederic Weisbecker314202f2020-11-13 13:13:24 +01002455 */
2456 rcu_nocb_lock_irqsave(rdp, flags);
Frederic Weisbeckerb9ced9e2020-11-13 13:13:25 +01002457 rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
2458 /*
2459 * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01002460 * rcu_nocb_unlock_irqrestore() anymore.
Frederic Weisbeckerb9ced9e2020-11-13 13:13:25 +01002461 */
2462 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
Frederic Weisbecker314202f2020-11-13 13:13:24 +01002463
Frederic Weisbecker76d00b42021-02-23 01:10:00 +01002464 /* Sanity check */
2465 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
2466
2467
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002468 return ret;
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002469}
2470
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002471int rcu_nocb_cpu_deoffload(int cpu)
2472{
2473 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2474 int ret = 0;
2475
2476 if (rdp == rdp->nocb_gp_rdp) {
2477 pr_info("Can't deoffload an rdp GP leader (yet)\n");
2478 return -EINVAL;
2479 }
2480 mutex_lock(&rcu_state.barrier_mutex);
2481 cpus_read_lock();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01002482 if (rcu_rdp_is_offloaded(rdp)) {
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002483 if (cpu_online(cpu)) {
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002484 ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002485 if (!ret)
2486 cpumask_clear_cpu(cpu, rcu_nocb_mask);
2487 } else {
2488 pr_info("NOCB: Can't CB-deoffload an offline CPU\n");
2489 ret = -EINVAL;
2490 }
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002491 }
2492 cpus_read_unlock();
2493 mutex_unlock(&rcu_state.barrier_mutex);
2494
2495 return ret;
2496}
2497EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
2498
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002499static long rcu_nocb_rdp_offload(void *arg)
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002500{
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002501 struct rcu_data *rdp = arg;
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002502 struct rcu_segcblist *cblist = &rdp->cblist;
2503 unsigned long flags;
2504 int ret;
2505
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002506 WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002507 /*
2508 * For now we only support re-offload, ie: the rdp must have been
2509 * offloaded on boot first.
2510 */
2511 if (!rdp->nocb_gp_rdp)
2512 return -EINVAL;
2513
Paul E. McKenneyf7590812020-12-21 11:17:16 -08002514 pr_info("Offloading %d\n", rdp->cpu);
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002515 /*
2516 * Can't use rcu_nocb_lock_irqsave() while we are in
2517 * SEGCBLIST_SOFTIRQ_ONLY mode.
2518 */
2519 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
Frederic Weisbecker69cdea82020-11-13 13:13:23 +01002520 /* Re-enable nocb timer */
2521 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002522 /*
2523 * We didn't take the nocb lock while working on the
2524 * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
2525 * Every modifications that have been done previously on
2526 * rdp->cblist must be visible remotely by the nocb kthreads
2527 * upon wake up after reading the cblist flags.
2528 *
2529 * The layout against nocb_lock enforces that ordering:
2530 *
2531 * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
2532 * ------------------------- ----------------------------
2533 * WRITE callbacks rcu_nocb_lock()
2534 * rcu_nocb_lock() READ flags
2535 * WRITE flags READ callbacks
2536 * rcu_nocb_unlock() rcu_nocb_unlock()
2537 */
2538 ret = rdp_offload_toggle(rdp, true, flags);
2539 swait_event_exclusive(rdp->nocb_state_wq,
2540 rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
2541 rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
2542
2543 return ret;
2544}
2545
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002546int rcu_nocb_cpu_offload(int cpu)
2547{
2548 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2549 int ret = 0;
2550
2551 mutex_lock(&rcu_state.barrier_mutex);
2552 cpus_read_lock();
Frederic Weisbecker3820b512020-11-12 01:51:21 +01002553 if (!rcu_rdp_is_offloaded(rdp)) {
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002554 if (cpu_online(cpu)) {
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002555 ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
Frederic Weisbecker64305db2021-01-28 18:12:09 +01002556 if (!ret)
2557 cpumask_set_cpu(cpu, rcu_nocb_mask);
2558 } else {
2559 pr_info("NOCB: Can't CB-offload an offline CPU\n");
2560 ret = -EINVAL;
2561 }
Frederic Weisbecker254e11ef2020-11-13 13:13:22 +01002562 }
2563 cpus_read_unlock();
2564 mutex_unlock(&rcu_state.barrier_mutex);
2565
2566 return ret;
2567}
2568EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
2569
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002570void __init rcu_init_nohz(void)
2571{
2572 int cpu;
Paul E. McKenneyef126202018-02-28 10:34:54 -08002573 bool need_rcu_nocb_mask = false;
Paul E. McKenneye83e73f2019-05-14 09:50:49 -07002574 struct rcu_data *rdp;
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002575
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002576#if defined(CONFIG_NO_HZ_FULL)
2577 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2578 need_rcu_nocb_mask = true;
2579#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2580
Rakib Mullick84b12b72017-11-17 21:40:15 +06002581 if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
Pranith Kumar949cccd2014-07-25 16:02:07 -07002582 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
2583 pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
2584 return;
2585 }
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002586 }
Rakib Mullick84b12b72017-11-17 21:40:15 +06002587 if (!cpumask_available(rcu_nocb_mask))
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002588 return;
2589
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002590#if defined(CONFIG_NO_HZ_FULL)
2591 if (tick_nohz_full_running)
2592 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2593#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2594
2595 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
Paul E. McKenneyef126202018-02-28 10:34:54 -08002596 pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002597 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2598 rcu_nocb_mask);
2599 }
Paul E. McKenney30166112017-12-04 09:48:59 -08002600 if (cpumask_empty(rcu_nocb_mask))
2601 pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
2602 else
2603 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
2604 cpumask_pr_args(rcu_nocb_mask));
Paul E. McKenneyf4579fc2014-07-25 11:21:47 -07002605 if (rcu_nocb_poll)
2606 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2607
Paul E. McKenneye83e73f2019-05-14 09:50:49 -07002608 for_each_cpu(cpu, rcu_nocb_mask) {
2609 rdp = per_cpu_ptr(&rcu_data, cpu);
2610 if (rcu_segcblist_empty(&rdp->cblist))
2611 rcu_segcblist_init(&rdp->cblist);
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002612 rcu_segcblist_offload(&rdp->cblist, true);
2613 rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
Frederic Weisbecker5bb39dc2020-11-13 13:13:21 +01002614 rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
Paul E. McKenneye83e73f2019-05-14 09:50:49 -07002615 }
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07002616 rcu_organize_nocb_kthreads();
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002617}
2618
2619/* Initialize per-rcu_data variables for no-CBs CPUs. */
2620static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2621{
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002622 init_swait_queue_head(&rdp->nocb_cb_wq);
2623 init_swait_queue_head(&rdp->nocb_gp_wq);
Frederic Weisbeckerd97b0782020-11-13 13:13:19 +01002624 init_swait_queue_head(&rdp->nocb_state_wq);
Paul E. McKenney8be6e1b2017-04-29 20:03:20 -07002625 raw_spin_lock_init(&rdp->nocb_lock);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002626 raw_spin_lock_init(&rdp->nocb_bypass_lock);
Paul E. McKenney4fd8c5f2019-06-02 13:41:08 -07002627 raw_spin_lock_init(&rdp->nocb_gp_lock);
Kees Cookfd30b712017-10-22 17:58:54 -07002628 timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002629 timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
2630 rcu_cblist_init(&rdp->nocb_bypass);
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002631}
2632
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002633/*
2634 * If the specified CPU is a no-CBs CPU that does not already have its
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002635 * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
2636 * for this CPU's group has not yet been created, spawn it as well.
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002637 */
Paul E. McKenney4580b052018-07-03 17:22:34 -07002638static void rcu_spawn_one_nocb_kthread(int cpu)
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002639{
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002640 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2641 struct rcu_data *rdp_gp;
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002642 struct task_struct *t;
2643
2644 /*
2645 * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
2646 * then nothing to do.
2647 */
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002648 if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002649 return;
2650
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002651 /* If we didn't spawn the GP kthread first, reorganize! */
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002652 rdp_gp = rdp->nocb_gp_rdp;
2653 if (!rdp_gp->nocb_gp_kthread) {
2654 t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
2655 "rcuog/%d", rdp_gp->cpu);
2656 if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
2657 return;
2658 WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002659 }
2660
Paul E. McKenney0ae86a22018-07-07 18:12:26 -07002661 /* Spawn the kthread for this CPU. */
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002662 t = kthread_run(rcu_nocb_cb_kthread, rdp,
Paul E. McKenney4580b052018-07-03 17:22:34 -07002663 "rcuo%c/%d", rcu_state.abbr, cpu);
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002664 if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
Paul E. McKenney92137842018-10-22 08:26:00 -07002665 return;
Paul E. McKenney12f54c3a2019-03-29 16:43:51 -07002666 WRITE_ONCE(rdp->nocb_cb_kthread, t);
2667 WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002668}
2669
2670/*
2671 * If the specified CPU is a no-CBs CPU that does not already have its
Paul E. McKenneyad368d12018-11-27 13:55:53 -08002672 * rcuo kthread, spawn it.
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002673 */
Paul E. McKenneyad368d12018-11-27 13:55:53 -08002674static void rcu_spawn_cpu_nocb_kthread(int cpu)
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002675{
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002676 if (rcu_scheduler_fully_active)
Paul E. McKenneyb97d23c2018-07-04 15:35:00 -07002677 rcu_spawn_one_nocb_kthread(cpu);
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002678}
2679
2680/*
2681 * Once the scheduler is running, spawn rcuo kthreads for all online
2682 * no-CBs CPUs. This assumes that the early_initcall()s happen before
2683 * non-boot CPUs come online -- if this changes, we will need to add
2684 * some mutual exclusion.
2685 */
2686static void __init rcu_spawn_nocb_kthreads(void)
2687{
2688 int cpu;
2689
2690 for_each_online_cpu(cpu)
Paul E. McKenneyad368d12018-11-27 13:55:53 -08002691 rcu_spawn_cpu_nocb_kthread(cpu);
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002692}
2693
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002694/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
Paul E. McKenneyf7c612b2019-04-02 08:05:55 -07002695static int rcu_nocb_gp_stride = -1;
2696module_param(rcu_nocb_gp_stride, int, 0444);
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002697
2698/*
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002699 * Initialize GP-CB relationships for all no-CBs CPU.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002700 */
Paul E. McKenney4580b052018-07-03 17:22:34 -07002701static void __init rcu_organize_nocb_kthreads(void)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002702{
2703 int cpu;
Paul E. McKenney18cd8c92019-06-01 05:12:36 -07002704 bool firsttime = true;
Stefan Reiter610dea32019-10-04 19:49:10 +00002705 bool gotnocbs = false;
2706 bool gotnocbscbs = true;
Paul E. McKenneyf7c612b2019-04-02 08:05:55 -07002707 int ls = rcu_nocb_gp_stride;
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002708 int nl = 0; /* Next GP kthread. */
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002709 struct rcu_data *rdp;
Paul E. McKenney0bdc33d2019-03-31 16:20:52 -07002710 struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002711 struct rcu_data *rdp_prev = NULL;
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002712
Rakib Mullick84b12b72017-11-17 21:40:15 +06002713 if (!cpumask_available(rcu_nocb_mask))
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002714 return;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002715 if (ls == -1) {
Paul E. McKenney9fcb09b2019-06-01 05:14:47 -07002716 ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
Paul E. McKenneyf7c612b2019-04-02 08:05:55 -07002717 rcu_nocb_gp_stride = ls;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002718 }
2719
2720 /*
Paul E. McKenney9831ce32017-01-02 14:24:24 -08002721 * Each pass through this loop sets up one rcu_data structure.
2722 * Should the corresponding CPU come online in the future, then
2723 * we will spawn the needed set of rcu_nocb_kthread() kthreads.
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002724 */
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002725 for_each_cpu(cpu, rcu_nocb_mask) {
Paul E. McKenneyda1df502018-07-03 15:37:16 -07002726 rdp = per_cpu_ptr(&rcu_data, cpu);
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002727 if (rdp->cpu >= nl) {
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002728 /* New GP kthread, set up for CBs & next GP. */
Stefan Reiter610dea32019-10-04 19:49:10 +00002729 gotnocbs = true;
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002730 nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
Paul E. McKenney58bf6f72019-03-28 15:33:59 -07002731 rdp->nocb_gp_rdp = rdp;
Paul E. McKenney0bdc33d2019-03-31 16:20:52 -07002732 rdp_gp = rdp;
Stefan Reiter610dea32019-10-04 19:49:10 +00002733 if (dump_tree) {
2734 if (!firsttime)
2735 pr_cont("%s\n", gotnocbscbs
2736 ? "" : " (self only)");
2737 gotnocbscbs = false;
2738 firsttime = false;
2739 pr_alert("%s: No-CB GP kthread CPU %d:",
2740 __func__, cpu);
2741 }
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002742 } else {
Paul E. McKenney6484fe52019-03-28 15:44:18 -07002743 /* Another CB kthread, link to previous GP kthread. */
Stefan Reiter610dea32019-10-04 19:49:10 +00002744 gotnocbscbs = true;
Paul E. McKenney0bdc33d2019-03-31 16:20:52 -07002745 rdp->nocb_gp_rdp = rdp_gp;
Paul E. McKenney58bf6f72019-03-28 15:33:59 -07002746 rdp_prev->nocb_next_cb_rdp = rdp;
Stefan Reiter610dea32019-10-04 19:49:10 +00002747 if (dump_tree)
2748 pr_cont(" %d", cpu);
Paul E. McKenneyfbce7492014-06-24 09:26:11 -07002749 }
2750 rdp_prev = rdp;
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002751 }
Stefan Reiter610dea32019-10-04 19:49:10 +00002752 if (gotnocbs && dump_tree)
2753 pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002754}
2755
Paul E. McKenney5ab7ab82018-09-21 18:08:09 -07002756/*
2757 * Bind the current task to the offloaded CPUs. If there are no offloaded
2758 * CPUs, leave the task unbound. Splat if the bind attempt fails.
2759 */
2760void rcu_bind_current_to_nocb(void)
2761{
2762 if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
2763 WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
2764}
2765EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
2766
Paul E. McKenney34169062020-12-18 10:20:34 -08002767// The ->on_cpu field is available only in CONFIG_SMP=y, so...
2768#ifdef CONFIG_SMP
2769static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
2770{
2771 return tsp && tsp->state == TASK_RUNNING && !tsp->on_cpu ? "!" : "";
2772}
2773#else // #ifdef CONFIG_SMP
2774static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
2775{
2776 return "";
2777}
2778#endif // #else #ifdef CONFIG_SMP
2779
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002780/*
2781 * Dump out nocb grace-period kthread state for the specified rcu_data
2782 * structure.
2783 */
2784static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
2785{
2786 struct rcu_node *rnp = rdp->mynode;
2787
Paul E. McKenney34169062020-12-18 10:20:34 -08002788 pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002789 rdp->cpu,
2790 "kK"[!!rdp->nocb_gp_kthread],
2791 "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
2792 "dD"[!!rdp->nocb_defer_wakeup],
2793 "tT"[timer_pending(&rdp->nocb_timer)],
2794 "bB"[timer_pending(&rdp->nocb_bypass_timer)],
2795 "sS"[!!rdp->nocb_gp_sleep],
2796 ".W"[swait_active(&rdp->nocb_gp_wq)],
2797 ".W"[swait_active(&rnp->nocb_gp_wq[0])],
2798 ".W"[swait_active(&rnp->nocb_gp_wq[1])],
2799 ".B"[!!rdp->nocb_gp_bypass],
2800 ".G"[!!rdp->nocb_gp_gp],
2801 (long)rdp->nocb_gp_seq,
Paul E. McKenney34169062020-12-18 10:20:34 -08002802 rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
2803 rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
2804 rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
2805 show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002806}
2807
2808/* Dump out nocb kthread state for the specified rcu_data structure. */
2809static void show_rcu_nocb_state(struct rcu_data *rdp)
2810{
Paul E. McKenney34169062020-12-18 10:20:34 -08002811 char bufw[20];
2812 char bufr[20];
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002813 struct rcu_segcblist *rsclp = &rdp->cblist;
2814 bool waslocked;
2815 bool wastimer;
2816 bool wassleep;
2817
2818 if (rdp->nocb_gp_rdp == rdp)
2819 show_rcu_nocb_gp_state(rdp);
2820
Paul E. McKenney34169062020-12-18 10:20:34 -08002821 sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
2822 sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
2823 pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002824 rdp->cpu, rdp->nocb_gp_rdp->cpu,
Paul E. McKenney3d0cef52020-12-18 13:17:37 -08002825 rdp->nocb_next_cb_rdp ? rdp->nocb_next_cb_rdp->cpu : -1,
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002826 "kK"[!!rdp->nocb_cb_kthread],
2827 "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
2828 "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
2829 "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
2830 "sS"[!!rdp->nocb_cb_sleep],
2831 ".W"[swait_active(&rdp->nocb_cb_wq)],
2832 jiffies - rdp->nocb_bypass_first,
2833 jiffies - rdp->nocb_nobypass_last,
2834 rdp->nocb_nobypass_count,
2835 ".D"[rcu_segcblist_ready_cbs(rsclp)],
Paul E. McKenney34169062020-12-18 10:20:34 -08002836 ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
2837 rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
2838 ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
2839 rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
2840 ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002841 ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
Paul E. McKenney34169062020-12-18 10:20:34 -08002842 rcu_segcblist_n_cbs(&rdp->cblist),
2843 rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
2844 rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
2845 show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002846
2847 /* It is OK for GP kthreads to have GP state. */
2848 if (rdp->nocb_gp_rdp == rdp)
2849 return;
2850
2851 waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
Paul E. McKenney2130c6b2020-06-22 16:46:43 -07002852 wastimer = timer_pending(&rdp->nocb_bypass_timer);
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002853 wassleep = swait_active(&rdp->nocb_gp_wq);
Paul E. McKenney2130c6b2020-06-22 16:46:43 -07002854 if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002855 return; /* Nothing untowards. */
2856
Paul E. McKenneye082c7b2020-06-22 09:25:34 -07002857 pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002858 "lL"[waslocked],
2859 "dD"[!!rdp->nocb_defer_wakeup],
2860 "tT"[wastimer],
2861 "sS"[!!rdp->nocb_gp_sleep],
2862 ".W"[wassleep]);
2863}
2864
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002865#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2866
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002867/* No ->nocb_lock to acquire. */
2868static void rcu_nocb_lock(struct rcu_data *rdp)
Paul E. McKenneyd7e29932014-10-27 09:15:54 -07002869{
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002870}
2871
2872/* No ->nocb_lock to release. */
2873static void rcu_nocb_unlock(struct rcu_data *rdp)
2874{
2875}
2876
2877/* No ->nocb_lock to release. */
2878static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
2879 unsigned long flags)
2880{
2881 local_irq_restore(flags);
Paul E. McKenneyd7e29932014-10-27 09:15:54 -07002882}
2883
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002884/* Lockdep check that ->cblist may be safely accessed. */
2885static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
2886{
2887 lockdep_assert_irqs_disabled();
2888}
2889
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01002890static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
Paul E. McKenneydae6e642013-02-10 20:48:58 -08002891{
Paul E. McKenneydae6e642013-02-10 20:48:58 -08002892}
2893
Paul Gortmakerabedf8e2016-02-19 09:46:41 +01002894static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
Daniel Wagner065bb782016-02-19 09:46:40 +01002895{
2896 return NULL;
2897}
2898
Paul E. McKenneydae6e642013-02-10 20:48:58 -08002899static void rcu_init_one_nocb(struct rcu_node *rnp)
2900{
2901}
2902
Paul E. McKenneyd1b222c2019-07-02 16:03:33 -07002903static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
2904 unsigned long j)
2905{
2906 return true;
2907}
2908
2909static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
2910 bool *was_alldone, unsigned long flags)
2911{
2912 return false;
2913}
2914
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002915static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
2916 unsigned long flags)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002917{
Paul E. McKenney5d6742b2019-05-15 09:56:40 -07002918 WARN_ON_ONCE(1); /* Should be dead code! */
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002919}
2920
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002921static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2922{
2923}
2924
Paul E. McKenney9fdd3bc2014-07-29 14:50:47 -07002925static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002926{
2927 return false;
2928}
2929
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002930static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002931{
Frederic Weisbeckerf8bb5ca2021-02-01 00:05:46 +01002932 return false;
Paul E. McKenney96d3fd02013-10-04 14:33:34 -07002933}
2934
Paul E. McKenneyad368d12018-11-27 13:55:53 -08002935static void rcu_spawn_cpu_nocb_kthread(int cpu)
Paul E. McKenney35ce7f22014-07-11 11:30:24 -07002936{
2937}
2938
2939static void __init rcu_spawn_nocb_kthreads(void)
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002940{
2941}
2942
Paul E. McKenneyf7a81b12019-06-25 13:32:51 -07002943static void show_rcu_nocb_state(struct rcu_data *rdp)
2944{
2945}
2946
Paul E. McKenney3fbfbf72012-08-19 21:35:53 -07002947#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
Paul E. McKenney65d798f2013-04-12 16:19:10 -07002948
2949/*
Paul E. McKenneya0969322013-11-08 09:03:10 -08002950 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
2951 * grace-period kthread will do force_quiescent_state() processing?
2952 * The idea is to avoid waking up RCU core processing on such a
2953 * CPU unless the grace period has extended for too long.
2954 *
2955 * This code relies on the fact that all NO_HZ_FULL CPUs are also
Paul Bolle52e2bb92014-02-09 14:35:11 +01002956 * CONFIG_RCU_NOCB_CPU CPUs.
Paul E. McKenneya0969322013-11-08 09:03:10 -08002957 */
Paul E. McKenney4580b052018-07-03 17:22:34 -07002958static bool rcu_nohz_full_cpu(void)
Paul E. McKenneya0969322013-11-08 09:03:10 -08002959{
2960#ifdef CONFIG_NO_HZ_FULL
2961 if (tick_nohz_full_cpu(smp_processor_id()) &&
Paul E. McKenneyde8e8732018-07-03 17:22:34 -07002962 (!rcu_gp_in_progress() ||
Paul E. McKenneye2f3ccf2020-04-10 17:05:22 -07002963 time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
Joe Perches5ce035f2015-03-30 16:46:16 -07002964 return true;
Paul E. McKenneya0969322013-11-08 09:03:10 -08002965#endif /* #ifdef CONFIG_NO_HZ_FULL */
Joe Perches5ce035f2015-03-30 16:46:16 -07002966 return false;
Paul E. McKenneya0969322013-11-08 09:03:10 -08002967}
Paul E. McKenney5057f55e52014-04-01 11:20:36 -07002968
2969/*
Paul E. McKenney265f5f22018-03-19 11:53:22 -07002970 * Bind the RCU grace-period kthreads to the housekeeping CPU.
Paul E. McKenney5057f55e52014-04-01 11:20:36 -07002971 */
2972static void rcu_bind_gp_kthread(void)
2973{
Paul E. McKenneyc0f489d2014-06-04 13:46:03 -07002974 if (!tick_nohz_full_enabled())
Paul E. McKenney5057f55e52014-04-01 11:20:36 -07002975 return;
Frederic Weisbeckerde201552017-10-27 04:42:35 +02002976 housekeeping_affine(current, HK_FLAG_RCU);
Paul E. McKenney5057f55e52014-04-01 11:20:36 -07002977}
Paul E. McKenney176f8f72014-08-04 17:43:50 -07002978
2979/* Record the current task on dyntick-idle entry. */
Thomas Gleixnerff5c4f52020-03-13 17:32:17 +01002980static void noinstr rcu_dynticks_task_enter(void)
Paul E. McKenney176f8f72014-08-04 17:43:50 -07002981{
2982#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08002983 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
Paul E. McKenney176f8f72014-08-04 17:43:50 -07002984#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
2985}
2986
2987/* Record no current task on dyntick-idle exit. */
Thomas Gleixnerff5c4f52020-03-13 17:32:17 +01002988static void noinstr rcu_dynticks_task_exit(void)
Paul E. McKenney176f8f72014-08-04 17:43:50 -07002989{
2990#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
Paul E. McKenney7d0ae802015-03-03 14:57:58 -08002991 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
Paul E. McKenney176f8f72014-08-04 17:43:50 -07002992#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
2993}
Paul E. McKenney7d0c9c52020-03-19 15:33:12 -07002994
2995/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
2996static void rcu_dynticks_task_trace_enter(void)
2997{
2998#ifdef CONFIG_TASKS_RCU_TRACE
2999 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
3000 current->trc_reader_special.b.need_mb = true;
3001#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
3002}
3003
3004/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
3005static void rcu_dynticks_task_trace_exit(void)
3006{
3007#ifdef CONFIG_TASKS_RCU_TRACE
3008 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
3009 current->trc_reader_special.b.need_mb = false;
3010#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
3011}