Blame - kernel/rcu/tree_exp.h - SHIFTPHONES/mainline/linux

blob: 011f626b2fd83d63f1c3065ce36d2189d2e86dc1 [file] [log] [blame]

Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	1	/*
				2	* RCU expedited grace periods
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, you can access it online at
				16	* http://www.gnu.org/licenses/gpl-2.0.html.
				17	*
				18	* Copyright IBM Corporation, 2016
				19	*
				20	* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
				21	*/
				22
				23	/* Wrapper functions for expedited grace periods. */
				24	static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
				25	{
				26	rcu_seq_start(&rsp->expedited_sequence);
				27	}
				28	static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
				29	{
				30	rcu_seq_end(&rsp->expedited_sequence);
				31	smp_mb(); /* Ensure that consecutive grace periods serialize. */
				32	}
				33	static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
				34	{
				35	unsigned long s;
				36
				37	smp_mb(); /* Caller's modifications seen first by other CPUs. */
				38	s = rcu_seq_snap(&rsp->expedited_sequence);
				39	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
				40	return s;
				41	}
				42	static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
				43	{
				44	return rcu_seq_done(&rsp->expedited_sequence, s);
				45	}
				46
				47	/*
				48	* Reset the ->expmaskinit values in the rcu_node tree to reflect any
				49	* recent CPU-online activity. Note that these masks are not cleared
				50	* when CPUs go offline, so they reflect the union of all CPUs that have
				51	* ever been online. This means that this function normally takes its
				52	* no-work-to-do fastpath.
				53	*/
				54	static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
				55	{
				56	bool done;
				57	unsigned long flags;
				58	unsigned long mask;
				59	unsigned long oldmask;
				60	int ncpus = READ_ONCE(rsp->ncpus);
				61	struct rcu_node *rnp;
				62	struct rcu_node *rnp_up;
				63
				64	/* If no new CPUs onlined since last time, nothing to do. */
				65	if (likely(ncpus == rsp->ncpus_snap))
				66	return;
				67	rsp->ncpus_snap = ncpus;
				68
				69	/*
				70	* Each pass through the following loop propagates newly onlined
				71	* CPUs for the current rcu_node structure up the rcu_node tree.
				72	*/
				73	rcu_for_each_leaf_node(rsp, rnp) {
				74	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				75	if (rnp->expmaskinit == rnp->expmaskinitnext) {
				76	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				77	continue; /* No new CPUs, nothing to do. */
				78	}
				79
				80	/* Update this node's mask, track old value for propagation. */
				81	oldmask = rnp->expmaskinit;
				82	rnp->expmaskinit = rnp->expmaskinitnext;
				83	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				84
				85	/* If was already nonzero, nothing to propagate. */
				86	if (oldmask)
				87	continue;
				88
				89	/* Propagate the new CPU up the tree. */
				90	mask = rnp->grpmask;
				91	rnp_up = rnp->parent;
				92	done = false;
				93	while (rnp_up) {
				94	raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
				95	if (rnp_up->expmaskinit)
				96	done = true;
				97	rnp_up->expmaskinit \|= mask;
				98	raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
				99	if (done)
				100	break;
				101	mask = rnp_up->grpmask;
				102	rnp_up = rnp_up->parent;
				103	}
				104	}
				105	}
				106
				107	/*
				108	* Reset the ->expmask values in the rcu_node tree in preparation for
				109	* a new expedited grace period.
				110	*/
				111	static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
				112	{
				113	unsigned long flags;
				114	struct rcu_node *rnp;
				115
				116	sync_exp_reset_tree_hotplug(rsp);
				117	rcu_for_each_node_breadth_first(rsp, rnp) {
				118	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				119	WARN_ON_ONCE(rnp->expmask);
				120	rnp->expmask = rnp->expmaskinit;
				121	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				122	}
				123	}
				124
				125	/*
				126	* Return non-zero if there is no RCU expedited grace period in progress
				127	* for the specified rcu_node structure, in other words, if all CPUs and
				128	* tasks covered by the specified rcu_node structure have done their bit
				129	* for the current expedited grace period. Works only for preemptible
				130	* RCU -- other RCU implementation use other means.
				131	*
				132	* Caller must hold the rcu_state's exp_mutex.
				133	*/
				134	static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
				135	{
				136	return rnp->exp_tasks == NULL &&
				137	READ_ONCE(rnp->expmask) == 0;
				138	}
				139
				140	/*
				141	* Report the exit from RCU read-side critical section for the last task
				142	* that queued itself during or before the current expedited preemptible-RCU
				143	* grace period. This event is reported either to the rcu_node structure on
				144	* which the task was queued or to one of that rcu_node structure's ancestors,
				145	* recursively up the tree. (Calm down, calm down, we do the recursion
				146	* iteratively!)
				147	*
				148	* Caller must hold the rcu_state's exp_mutex and the specified rcu_node
				149	* structure's ->lock.
				150	*/
				151	static void __rcu_report_exp_rnp(struct rcu_state rsp, struct rcu_node rnp,
				152	bool wake, unsigned long flags)
				153	__releases(rnp->lock)
				154	{
				155	unsigned long mask;
				156
				157	for (;;) {
				158	if (!sync_rcu_preempt_exp_done(rnp)) {
				159	if (!rnp->expmask)
				160	rcu_initiate_boost(rnp, flags);
				161	else
				162	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				163	break;
				164	}
				165	if (rnp->parent == NULL) {
				166	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				167	if (wake) {
				168	smp_mb(); /* EGP done before wake_up(). */
				169	swake_up(&rsp->expedited_wq);
				170	}
				171	break;
				172	}
				173	mask = rnp->grpmask;
				174	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
				175	rnp = rnp->parent;
				176	raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
				177	WARN_ON_ONCE(!(rnp->expmask & mask));
				178	rnp->expmask &= ~mask;
				179	}
				180	}
				181
				182	/*
				183	* Report expedited quiescent state for specified node. This is a
				184	* lock-acquisition wrapper function for __rcu_report_exp_rnp().
				185	*
				186	* Caller must hold the rcu_state's exp_mutex.
				187	*/
				188	static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
				189	struct rcu_node *rnp, bool wake)
				190	{
				191	unsigned long flags;
				192
				193	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				194	__rcu_report_exp_rnp(rsp, rnp, wake, flags);
				195	}
				196
				197	/*
				198	* Report expedited quiescent state for multiple CPUs, all covered by the
				199	* specified leaf rcu_node structure. Caller must hold the rcu_state's
				200	* exp_mutex.
				201	*/
				202	static void rcu_report_exp_cpu_mult(struct rcu_state rsp, struct rcu_node rnp,
				203	unsigned long mask, bool wake)
				204	{
				205	unsigned long flags;
				206
				207	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				208	if (!(rnp->expmask & mask)) {
				209	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				210	return;
				211	}
				212	rnp->expmask &= ~mask;
				213	__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
				214	}
				215
				216	/*
				217	* Report expedited quiescent state for specified rcu_data (CPU).
				218	*/
				219	static void rcu_report_exp_rdp(struct rcu_state rsp, struct rcu_data rdp,
				220	bool wake)
				221	{
				222	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
				223	}
				224
				225	/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
				226	static bool sync_exp_work_done(struct rcu_state rsp, atomic_long_t stat,
				227	unsigned long s)
				228	{
				229	if (rcu_exp_gp_seq_done(rsp, s)) {
				230	trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
				231	/* Ensure test happens before caller kfree(). */
				232	smp_mb__before_atomic(); /* ^^^ */
				233	atomic_long_inc(stat);
				234	return true;
				235	}
				236	return false;
				237	}
				238
				239	/*
				240	* Funnel-lock acquisition for expedited grace periods. Returns true
				241	* if some other task completed an expedited grace period that this task
				242	* can piggy-back on, and with no mutex held. Otherwise, returns false
				243	* with the mutex held, indicating that the caller must actually do the
				244	* expedited grace period.
				245	*/
				246	static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
				247	{
				248	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
				249	struct rcu_node *rnp = rdp->mynode;
				250	struct rcu_node *rnp_root = rcu_get_root(rsp);
				251
				252	/* Low-contention fastpath. */
				253	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
				254	(rnp == rnp_root \|\|
				255	ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	256	mutex_trylock(&rsp->exp_mutex))
				257	goto fastpath;
				258
				259	/*
				260	* Each pass through the following loop works its way up
				261	* the rcu_node tree, returning if others have done the work or
				262	* otherwise falls through to acquire rsp->exp_mutex. The mapping
				263	* from CPU to rcu_node structure can be inexact, as it is just
				264	* promoting locality and is not strictly needed for correctness.
				265	*/
				266	for (; rnp != NULL; rnp = rnp->parent) {
				267	if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
				268	return true;
				269
				270	/* Work not done, either wait here or go up. */
				271	spin_lock(&rnp->exp_lock);
				272	if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
				273
				274	/* Someone else doing GP, so wait for them. */
				275	spin_unlock(&rnp->exp_lock);
				276	trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
				277	rnp->grplo, rnp->grphi,
				278	TPS("wait"));
				279	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
				280	sync_exp_work_done(rsp,
				281	&rdp->exp_workdone2, s));
				282	return true;
				283	}
				284	rnp->exp_seq_rq = s; /* Followers can wait on us. */
				285	spin_unlock(&rnp->exp_lock);
				286	trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
				287	rnp->grphi, TPS("nxtlvl"));
				288	}
				289	mutex_lock(&rsp->exp_mutex);
				290	fastpath:
				291	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
				292	mutex_unlock(&rsp->exp_mutex);
				293	return true;
				294	}
				295	rcu_exp_gp_seq_start(rsp);
				296	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
				297	return false;
				298	}
				299
				300	/* Invoked on each online non-idle CPU for expedited quiescent state. */
				301	static void sync_sched_exp_handler(void *data)
				302	{
				303	struct rcu_data *rdp;
				304	struct rcu_node *rnp;
				305	struct rcu_state *rsp = data;
				306
				307	rdp = this_cpu_ptr(rsp->rda);
				308	rnp = rdp->mynode;
				309	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) \|\|
				310	__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
				311	return;
				312	if (rcu_is_cpu_rrupt_from_idle()) {
				313	rcu_report_exp_rdp(&rcu_sched_state,
				314	this_cpu_ptr(&rcu_sched_data), true);
				315	return;
				316	}
				317	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
				318	resched_cpu(smp_processor_id());
				319	}
				320
				321	/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
				322	static void sync_sched_exp_online_cleanup(int cpu)
				323	{
				324	struct rcu_data *rdp;
				325	int ret;
				326	struct rcu_node *rnp;
				327	struct rcu_state *rsp = &rcu_sched_state;
				328
				329	rdp = per_cpu_ptr(rsp->rda, cpu);
				330	rnp = rdp->mynode;
				331	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
				332	return;
				333	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
				334	WARN_ON_ONCE(ret);
				335	}
				336
				337	/*
				338	* Select the nodes that the upcoming expedited grace period needs
				339	* to wait for.
				340	*/
				341	static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
				342	smp_call_func_t func)
				343	{
				344	int cpu;
				345	unsigned long flags;
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	346	unsigned long mask_ofl_test;
				347	unsigned long mask_ofl_ipi;
				348	int ret;
				349	struct rcu_node *rnp;
				350
				351	sync_exp_reset_tree(rsp);
				352	rcu_for_each_leaf_node(rsp, rnp) {
				353	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				354
				355	/* Each pass checks a CPU for identity, offline, and idle. */
				356	mask_ofl_test = 0;
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	357	for_each_leaf_node_possible_cpu(rnp, cpu) {
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	358	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	359
Paul E. McKenney	0742ac3	2016-10-11 06:09:59 -0700	[diff] [blame]	360	rdp->exp_dynticks_snap =
Paul E. McKenney	8b2f63a	2016-11-02 14:12:05 -0700	[diff] [blame^]	361	rcu_dynticks_snap(rdp->dynticks);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	362	if (raw_smp_processor_id() == cpu \|\|
Paul E. McKenney	0742ac3	2016-10-11 06:09:59 -0700	[diff] [blame]	363	!(rdp->exp_dynticks_snap & 0x1) \|\|
Paul E. McKenney	98834b8	2016-06-29 17:04:19 -0700	[diff] [blame]	364	!(rnp->qsmaskinitnext & rdp->grpmask))
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	365	mask_ofl_test \|= rdp->grpmask;
				366	}
				367	mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
				368
				369	/*
				370	* Need to wait for any blocked tasks as well. Note that
				371	* additional blocking tasks will also block the expedited
				372	* GP until such time as the ->expmask bits are cleared.
				373	*/
				374	if (rcu_preempt_has_tasks(rnp))
				375	rnp->exp_tasks = rnp->blkd_tasks.next;
				376	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				377
				378	/* IPI the remaining CPUs for expedited quiescent state. */
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	379	for_each_leaf_node_possible_cpu(rnp, cpu) {
				380	unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	0742ac3	2016-10-11 06:09:59 -0700	[diff] [blame]	381	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
Paul E. McKenney	0742ac3	2016-10-11 06:09:59 -0700	[diff] [blame]	382
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	383	if (!(mask_ofl_ipi & mask))
				384	continue;
				385	retry_ipi:
Paul E. McKenney	8b2f63a	2016-11-02 14:12:05 -0700	[diff] [blame^]	386	if (rcu_dynticks_snap(rdp->dynticks) !=
Paul E. McKenney	0742ac3	2016-10-11 06:09:59 -0700	[diff] [blame]	387	rdp->exp_dynticks_snap) {
				388	mask_ofl_test \|= mask;
				389	continue;
				390	}
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	391	ret = smp_call_function_single(cpu, func, rsp, 0);
				392	if (!ret) {
				393	mask_ofl_ipi &= ~mask;
				394	continue;
				395	}
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	396	/* Failed, raced with CPU hotplug operation. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	397	raw_spin_lock_irqsave_rcu_node(rnp, flags);
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	398	if ((rnp->qsmaskinitnext & mask) &&
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	399	(rnp->expmask & mask)) {
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	400	/* Online, so delay for a bit and try again. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	401	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				402	schedule_timeout_uninterruptible(1);
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	403	goto retry_ipi;
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	404	}
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	405	/* CPU really is offline, so we can ignore it. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	406	if (!(rnp->expmask & mask))
				407	mask_ofl_ipi &= ~mask;
				408	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				409	}
				410	/* Report quiescent states for those that went offline. */
				411	mask_ofl_test \|= mask_ofl_ipi;
				412	if (mask_ofl_test)
				413	rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
				414	}
				415	}
				416
				417	static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
				418	{
				419	int cpu;
				420	unsigned long jiffies_stall;
				421	unsigned long jiffies_start;
				422	unsigned long mask;
				423	int ndetected;
				424	struct rcu_node *rnp;
				425	struct rcu_node *rnp_root = rcu_get_root(rsp);
				426	int ret;
				427
				428	jiffies_stall = rcu_jiffies_till_stall_check();
				429	jiffies_start = jiffies;
				430
				431	for (;;) {
				432	ret = swait_event_timeout(
				433	rsp->expedited_wq,
				434	sync_rcu_preempt_exp_done(rnp_root),
				435	jiffies_stall);
				436	if (ret > 0 \|\| sync_rcu_preempt_exp_done(rnp_root))
				437	return;
Paul E. McKenney	908d2c1	2016-06-29 14:34:59 -0700	[diff] [blame]	438	WARN_ON(ret < 0); /* workqueues should not be signaled. */
Paul E. McKenney	24a6cff	2016-06-29 14:49:29 -0700	[diff] [blame]	439	if (rcu_cpu_stall_suppress)
				440	continue;
				441	panic_on_rcu_stall();
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	442	pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
				443	rsp->name);
				444	ndetected = 0;
				445	rcu_for_each_leaf_node(rsp, rnp) {
				446	ndetected += rcu_print_task_exp_stall(rnp);
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	447	for_each_leaf_node_possible_cpu(rnp, cpu) {
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	448	struct rcu_data *rdp;
				449
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	450	mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	451	if (!(rnp->expmask & mask))
				452	continue;
				453	ndetected++;
				454	rdp = per_cpu_ptr(rsp->rda, cpu);
				455	pr_cont(" %d-%c%c%c", cpu,
				456	"O."[!!cpu_online(cpu)],
				457	"o."[!!(rdp->grpmask & rnp->expmaskinit)],
				458	"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
				459	}
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	460	}
				461	pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
				462	jiffies - jiffies_start, rsp->expedited_sequence,
				463	rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
				464	if (ndetected) {
				465	pr_err("blocking rcu_node structures:");
				466	rcu_for_each_node_breadth_first(rsp, rnp) {
				467	if (rnp == rnp_root)
				468	continue; /* printed unconditionally */
				469	if (sync_rcu_preempt_exp_done(rnp))
				470	continue;
				471	pr_cont(" l=%u:%d-%d:%#lx/%c",
				472	rnp->level, rnp->grplo, rnp->grphi,
				473	rnp->expmask,
				474	".T"[!!rnp->exp_tasks]);
				475	}
				476	pr_cont("\n");
				477	}
				478	rcu_for_each_leaf_node(rsp, rnp) {
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	479	for_each_leaf_node_possible_cpu(rnp, cpu) {
				480	mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	481	if (!(rnp->expmask & mask))
				482	continue;
				483	dump_cpu_task(cpu);
				484	}
				485	}
				486	jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
				487	}
				488	}
				489
				490	/*
				491	* Wait for the current expedited grace period to complete, and then
				492	* wake up everyone who piggybacked on the just-completed expedited
				493	* grace period. Also update all the ->exp_seq_rq counters as needed
				494	* in order to avoid counter-wrap problems.
				495	*/
				496	static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
				497	{
				498	struct rcu_node *rnp;
				499
				500	synchronize_sched_expedited_wait(rsp);
				501	rcu_exp_gp_seq_end(rsp);
				502	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
				503
				504	/*
				505	* Switch over to wakeup mode, allowing the next GP, but -only- the
				506	* next GP, to proceed.
				507	*/
				508	mutex_lock(&rsp->exp_wake_mutex);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	509
				510	rcu_for_each_node_breadth_first(rsp, rnp) {
				511	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
				512	spin_lock(&rnp->exp_lock);
				513	/* Recheck, avoid hang in case someone just arrived. */
				514	if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
				515	rnp->exp_seq_rq = s;
				516	spin_unlock(&rnp->exp_lock);
				517	}
				518	wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
				519	}
				520	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
				521	mutex_unlock(&rsp->exp_wake_mutex);
				522	}
				523
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	524	/* Let the workqueue handler know what it is supposed to do. */
				525	struct rcu_exp_work {
				526	smp_call_func_t rew_func;
				527	struct rcu_state *rew_rsp;
				528	unsigned long rew_s;
				529	struct work_struct rew_work;
				530	};
				531
				532	/*
Paul E. McKenney	52d7e48	2017-01-10 02:28:26 -0800	[diff] [blame]	533	* Common code to drive an expedited grace period forward, used by
				534	* workqueues and mid-boot-time tasks.
				535	*/
				536	static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
				537	smp_call_func_t func, unsigned long s)
				538	{
				539	/* Initialize the rcu_node tree in preparation for the wait. */
				540	sync_rcu_exp_select_cpus(rsp, func);
				541
				542	/* Wait and clean up, including waking everyone. */
				543	rcu_exp_wait_wake(rsp, s);
				544	}
				545
				546	/*
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	547	* Work-queue handler to drive an expedited grace period forward.
				548	*/
				549	static void wait_rcu_exp_gp(struct work_struct *wp)
				550	{
				551	struct rcu_exp_work *rewp;
				552
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	553	rewp = container_of(wp, struct rcu_exp_work, rew_work);
Paul E. McKenney	52d7e48	2017-01-10 02:28:26 -0800	[diff] [blame]	554	rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	555	}
				556
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	557	/*
				558	* Given an rcu_state pointer and a smp_call_function() handler, kick
				559	* off the specified flavor of expedited grace period.
				560	*/
				561	static void _synchronize_rcu_expedited(struct rcu_state *rsp,
				562	smp_call_func_t func)
				563	{
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	564	struct rcu_data *rdp;
				565	struct rcu_exp_work rew;
				566	struct rcu_node *rnp;
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	567	unsigned long s;
				568
				569	/* If expedited grace periods are prohibited, fall back to normal. */
				570	if (rcu_gp_is_normal()) {
				571	wait_rcu_gp(rsp->call);
				572	return;
				573	}
				574
				575	/* Take a snapshot of the sequence number. */
				576	s = rcu_exp_gp_seq_snap(rsp);
				577	if (exp_funnel_lock(rsp, s))
				578	return; /* Someone else did our work for us. */
				579
Paul E. McKenney	52d7e48	2017-01-10 02:28:26 -0800	[diff] [blame]	580	/* Ensure that load happens before action based on it. */
				581	if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
				582	/* Direct call during scheduler init and early_initcalls(). */
				583	rcu_exp_sel_wait_wake(rsp, func, s);
				584	} else {
				585	/* Marshall arguments & schedule the expedited grace period. */
				586	rew.rew_func = func;
				587	rew.rew_rsp = rsp;
				588	rew.rew_s = s;
				589	INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
				590	schedule_work(&rew.rew_work);
				591	}
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	592
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	593	/* Wait for expedited grace period to complete. */
				594	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
				595	rnp = rcu_get_root(rsp);
				596	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
				597	sync_exp_work_done(rsp,
				598	&rdp->exp_workdone0, s));
				599
				600	/* Let the next expedited grace period start. */
				601	mutex_unlock(&rsp->exp_mutex);
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	602	}
				603
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	604	/**
				605	* synchronize_sched_expedited - Brute-force RCU-sched grace period
				606	*
				607	* Wait for an RCU-sched grace period to elapse, but use a "big hammer"
				608	* approach to force the grace period to end quickly. This consumes
				609	* significant time on all CPUs and is unfriendly to real-time workloads,
				610	* so is thus not recommended for any sort of common-case code. In fact,
				611	* if you are using synchronize_sched_expedited() in a loop, please
				612	* restructure your code to batch your updates, and then use a single
				613	* synchronize_sched() instead.
				614	*
				615	* This implementation can be thought of as an application of sequence
				616	* locking to expedited grace periods, but using the sequence counter to
				617	* determine when someone else has already done the work instead of for
				618	* retrying readers.
				619	*/
				620	void synchronize_sched_expedited(void)
				621	{
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	622	struct rcu_state *rsp = &rcu_sched_state;
				623
				624	/* If only one CPU, this is automatically a grace period. */
				625	if (rcu_blocking_is_gp())
				626	return;
				627
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	628	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	629	}
				630	EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	631
				632	#ifdef CONFIG_PREEMPT_RCU
				633
				634	/*
				635	* Remote handler for smp_call_function_single(). If there is an
				636	* RCU read-side critical section in effect, request that the
				637	* next rcu_read_unlock() record the quiescent state up the
				638	* ->expmask fields in the rcu_node tree. Otherwise, immediately
				639	* report the quiescent state.
				640	*/
				641	static void sync_rcu_exp_handler(void *info)
				642	{
				643	struct rcu_data *rdp;
				644	struct rcu_state *rsp = info;
				645	struct task_struct *t = current;
				646
				647	/*
				648	* Within an RCU read-side critical section, request that the next
				649	* rcu_read_unlock() report. Unless this RCU read-side critical
				650	* section has already blocked, in which case it is already set
				651	* up for the expedited grace period to wait on it.
				652	*/
				653	if (t->rcu_read_lock_nesting > 0 &&
				654	!t->rcu_read_unlock_special.b.blocked) {
				655	t->rcu_read_unlock_special.b.exp_need_qs = true;
				656	return;
				657	}
				658
				659	/*
				660	* We are either exiting an RCU read-side critical section (negative
				661	* values of t->rcu_read_lock_nesting) or are not in one at all
				662	* (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
				663	* read-side critical section that blocked before this expedited
				664	* grace period started. Either way, we can immediately report
				665	* the quiescent state.
				666	*/
				667	rdp = this_cpu_ptr(rsp->rda);
				668	rcu_report_exp_rdp(rsp, rdp, true);
				669	}
				670
				671	/**
				672	* synchronize_rcu_expedited - Brute-force RCU grace period
				673	*
				674	* Wait for an RCU-preempt grace period, but expedite it. The basic
				675	* idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
				676	* checks whether the CPU is in an RCU-preempt critical section, and
				677	* if so, it sets a flag that causes the outermost rcu_read_unlock()
				678	* to report the quiescent state. On the other hand, if the CPU is
				679	* not in an RCU read-side critical section, the IPI handler reports
				680	* the quiescent state immediately.
				681	*
				682	* Although this is a greate improvement over previous expedited
				683	* implementations, it is still unfriendly to real-time workloads, so is
				684	* thus not recommended for any sort of common-case code. In fact, if
				685	* you are using synchronize_rcu_expedited() in a loop, please restructure
				686	* your code to batch your updates, and then Use a single synchronize_rcu()
				687	* instead.
				688	*/
				689	void synchronize_rcu_expedited(void)
				690	{
				691	struct rcu_state *rsp = rcu_state_p;
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	692
Paul E. McKenney	52d7e48	2017-01-10 02:28:26 -0800	[diff] [blame]	693	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
				694	return;
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	695	_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	696	}
				697	EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
				698
				699	#else /* #ifdef CONFIG_PREEMPT_RCU */
				700
				701	/*
				702	* Wait for an rcu-preempt grace period, but make it happen quickly.
				703	* But because preemptible RCU does not exist, map to rcu-sched.
				704	*/
				705	void synchronize_rcu_expedited(void)
				706	{
				707	synchronize_sched_expedited();
				708	}
				709	EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
				710
				711	#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
Paul E. McKenney	52d7e48	2017-01-10 02:28:26 -0800	[diff] [blame]	712
				713	/*
				714	* Switch to run-time mode once Tree RCU has fully initialized.
				715	*/
				716	static int __init rcu_exp_runtime_mode(void)
				717	{
				718	rcu_test_sync_prims();
				719	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
				720	rcu_test_sync_prims();
				721	return 0;
				722	}
				723	core_initcall(rcu_exp_runtime_mode);