Blame - kernel/rcupreempt.c - SHIFTPHONES/mainline/linux

blob: a5aabb1677f84874137d51c38de9326afc9d8de0 [file] [log] [blame]

Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame^]	1	/*
				2	* Read-Copy Update mechanism for mutual exclusion, realtime implementation
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				17	*
				18	* Copyright IBM Corporation, 2006
				19	*
				20	* Authors: Paul E. McKenney <paulmck@us.ibm.com>
				21	* With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
				22	* for pushing me away from locks and towards counters, and
				23	* to Suparna Bhattacharya for pushing me completely away
				24	* from atomic instructions on the read side.
				25	*
				26	* Papers: http://www.rdrop.com/users/paulmck/RCU
				27	*
				28	* Design Document: http://lwn.net/Articles/253651/
				29	*
				30	* For detailed explanation of Read-Copy Update mechanism see -
				31	* Documentation/RCU/ *.txt
				32	*
				33	*/
				34	#include <linux/types.h>
				35	#include <linux/kernel.h>
				36	#include <linux/init.h>
				37	#include <linux/spinlock.h>
				38	#include <linux/smp.h>
				39	#include <linux/rcupdate.h>
				40	#include <linux/interrupt.h>
				41	#include <linux/sched.h>
				42	#include <asm/atomic.h>
				43	#include <linux/bitops.h>
				44	#include <linux/module.h>
				45	#include <linux/completion.h>
				46	#include <linux/moduleparam.h>
				47	#include <linux/percpu.h>
				48	#include <linux/notifier.h>
				49	#include <linux/rcupdate.h>
				50	#include <linux/cpu.h>
				51	#include <linux/random.h>
				52	#include <linux/delay.h>
				53	#include <linux/byteorder/swabb.h>
				54	#include <linux/cpumask.h>
				55	#include <linux/rcupreempt_trace.h>
				56
				57	/*
				58	* Macro that prevents the compiler from reordering accesses, but does
				59	* absolutely -nothing- to prevent CPUs from reordering. This is used
				60	* only to mediate communication between mainline code and hardware
				61	* interrupt and NMI handlers.
				62	*/
				63	#define ACCESS_ONCE(x) ((volatile typeof(x) )&(x))
				64
				65	/*
				66	* PREEMPT_RCU data structures.
				67	*/
				68
				69	/*
				70	* GP_STAGES specifies the number of times the state machine has
				71	* to go through the all the rcu_try_flip_states (see below)
				72	* in a single Grace Period.
				73	*
				74	* GP in GP_STAGES stands for Grace Period ;)
				75	*/
				76	#define GP_STAGES 2
				77	struct rcu_data {
				78	spinlock_t lock; /* Protect rcu_data fields. */
				79	long completed; /* Number of last completed batch. */
				80	int waitlistcount;
				81	struct tasklet_struct rcu_tasklet;
				82	struct rcu_head *nextlist;
				83	struct rcu_head **nexttail;
				84	struct rcu_head *waitlist[GP_STAGES];
				85	struct rcu_head **waittail[GP_STAGES];
				86	struct rcu_head *donelist;
				87	struct rcu_head **donetail;
				88	long rcu_flipctr[2];
				89	#ifdef CONFIG_RCU_TRACE
				90	struct rcupreempt_trace trace;
				91	#endif /* #ifdef CONFIG_RCU_TRACE */
				92	};
				93
				94	/*
				95	* States for rcu_try_flip() and friends.
				96	*/
				97
				98	enum rcu_try_flip_states {
				99
				100	/*
				101	* Stay here if nothing is happening. Flip the counter if somthing
				102	* starts happening. Denoted by "I"
				103	*/
				104	rcu_try_flip_idle_state,
				105
				106	/*
				107	* Wait here for all CPUs to notice that the counter has flipped. This
				108	* prevents the old set of counters from ever being incremented once
				109	* we leave this state, which in turn is necessary because we cannot
				110	* test any individual counter for zero -- we can only check the sum.
				111	* Denoted by "A".
				112	*/
				113	rcu_try_flip_waitack_state,
				114
				115	/*
				116	* Wait here for the sum of the old per-CPU counters to reach zero.
				117	* Denoted by "Z".
				118	*/
				119	rcu_try_flip_waitzero_state,
				120
				121	/*
				122	* Wait here for each of the other CPUs to execute a memory barrier.
				123	* This is necessary to ensure that these other CPUs really have
				124	* completed executing their RCU read-side critical sections, despite
				125	* their CPUs wildly reordering memory. Denoted by "M".
				126	*/
				127	rcu_try_flip_waitmb_state,
				128	};
				129
				130	struct rcu_ctrlblk {
				131	spinlock_t fliplock; /* Protect state-machine transitions. */
				132	long completed; /* Number of last completed batch. */
				133	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
				134	the rcu state machine */
				135	};
				136
				137	static DEFINE_PER_CPU(struct rcu_data, rcu_data);
				138	static struct rcu_ctrlblk rcu_ctrlblk = {
				139	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
				140	.completed = 0,
				141	.rcu_try_flip_state = rcu_try_flip_idle_state,
				142	};
				143
				144
				145	#ifdef CONFIG_RCU_TRACE
				146	static char *rcu_try_flip_state_names[] =
				147	{ "idle", "waitack", "waitzero", "waitmb" };
				148	#endif /* #ifdef CONFIG_RCU_TRACE */
				149
				150	/*
				151	* Enum and per-CPU flag to determine when each CPU has seen
				152	* the most recent counter flip.
				153	*/
				154
				155	enum rcu_flip_flag_values {
				156	rcu_flip_seen, /* Steady/initial state, last flip seen. */
				157	/* Only GP detector can update. */
				158	rcu_flipped /* Flip just completed, need confirmation. */
				159	/* Only corresponding CPU can update. */
				160	};
				161	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
				162	= rcu_flip_seen;
				163
				164	/*
				165	* Enum and per-CPU flag to determine when each CPU has executed the
				166	* needed memory barrier to fence in memory references from its last RCU
				167	* read-side critical section in the just-completed grace period.
				168	*/
				169
				170	enum rcu_mb_flag_values {
				171	rcu_mb_done, /* Steady/initial state, no mb()s required. */
				172	/* Only GP detector can update. */
				173	rcu_mb_needed /* Flip just completed, need an mb(). */
				174	/* Only corresponding CPU can update. */
				175	};
				176	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
				177	= rcu_mb_done;
				178
				179	/*
				180	* RCU_DATA_ME: find the current CPU's rcu_data structure.
				181	* RCU_DATA_CPU: find the specified CPU's rcu_data structure.
				182	*/
				183	#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
				184	#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
				185
				186	/*
				187	* Helper macro for tracing when the appropriate rcu_data is not
				188	* cached in a local variable, but where the CPU number is so cached.
				189	*/
				190	#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
				191
				192	/*
				193	* Helper macro for tracing when the appropriate rcu_data is not
				194	* cached in a local variable.
				195	*/
				196	#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
				197
				198	/*
				199	* Helper macro for tracing when the appropriate rcu_data is pointed
				200	* to by a local variable.
				201	*/
				202	#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
				203
				204	/*
				205	* Return the number of RCU batches processed thus far. Useful
				206	* for debug and statistics.
				207	*/
				208	long rcu_batches_completed(void)
				209	{
				210	return rcu_ctrlblk.completed;
				211	}
				212	EXPORT_SYMBOL_GPL(rcu_batches_completed);
				213
				214	EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
				215
				216	void __rcu_read_lock(void)
				217	{
				218	int idx;
				219	struct task_struct *t = current;
				220	int nesting;
				221
				222	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				223	if (nesting != 0) {
				224
				225	/* An earlier rcu_read_lock() covers us, just count it. */
				226
				227	t->rcu_read_lock_nesting = nesting + 1;
				228
				229	} else {
				230	unsigned long flags;
				231
				232	/*
				233	* We disable interrupts for the following reasons:
				234	* - If we get scheduling clock interrupt here, and we
				235	* end up acking the counter flip, it's like a promise
				236	* that we will never increment the old counter again.
				237	* Thus we will break that promise if that
				238	* scheduling clock interrupt happens between the time
				239	* we pick the .completed field and the time that we
				240	* increment our counter.
				241	*
				242	* - We don't want to be preempted out here.
				243	*
				244	* NMIs can still occur, of course, and might themselves
				245	* contain rcu_read_lock().
				246	*/
				247
				248	local_irq_save(flags);
				249
				250	/*
				251	* Outermost nesting of rcu_read_lock(), so increment
				252	* the current counter for the current CPU. Use volatile
				253	* casts to prevent the compiler from reordering.
				254	*/
				255
				256	idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
				257	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
				258
				259	/*
				260	* Now that the per-CPU counter has been incremented, we
				261	* are protected from races with rcu_read_lock() invoked
				262	* from NMI handlers on this CPU. We can therefore safely
				263	* increment the nesting counter, relieving further NMIs
				264	* of the need to increment the per-CPU counter.
				265	*/
				266
				267	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
				268
				269	/*
				270	* Now that we have preventing any NMIs from storing
				271	* to the ->rcu_flipctr_idx, we can safely use it to
				272	* remember which counter to decrement in the matching
				273	* rcu_read_unlock().
				274	*/
				275
				276	ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
				277	local_irq_restore(flags);
				278	}
				279	}
				280	EXPORT_SYMBOL_GPL(__rcu_read_lock);
				281
				282	void __rcu_read_unlock(void)
				283	{
				284	int idx;
				285	struct task_struct *t = current;
				286	int nesting;
				287
				288	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				289	if (nesting > 1) {
				290
				291	/*
				292	* We are still protected by the enclosing rcu_read_lock(),
				293	* so simply decrement the counter.
				294	*/
				295
				296	t->rcu_read_lock_nesting = nesting - 1;
				297
				298	} else {
				299	unsigned long flags;
				300
				301	/*
				302	* Disable local interrupts to prevent the grace-period
				303	* detection state machine from seeing us half-done.
				304	* NMIs can still occur, of course, and might themselves
				305	* contain rcu_read_lock() and rcu_read_unlock().
				306	*/
				307
				308	local_irq_save(flags);
				309
				310	/*
				311	* Outermost nesting of rcu_read_unlock(), so we must
				312	* decrement the current counter for the current CPU.
				313	* This must be done carefully, because NMIs can
				314	* occur at any point in this code, and any rcu_read_lock()
				315	* and rcu_read_unlock() pairs in the NMI handlers
				316	* must interact non-destructively with this code.
				317	* Lots of volatile casts, and -very- careful ordering.
				318	*
				319	* Changes to this code, including this one, must be
				320	* inspected, validated, and tested extremely carefully!!!
				321	*/
				322
				323	/*
				324	* First, pick up the index.
				325	*/
				326
				327	idx = ACCESS_ONCE(t->rcu_flipctr_idx);
				328
				329	/*
				330	* Now that we have fetched the counter index, it is
				331	* safe to decrement the per-task RCU nesting counter.
				332	* After this, any interrupts or NMIs will increment and
				333	* decrement the per-CPU counters.
				334	*/
				335	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
				336
				337	/*
				338	* It is now safe to decrement this task's nesting count.
				339	* NMIs that occur after this statement will route their
				340	* rcu_read_lock() calls through this "else" clause, and
				341	* will thus start incrementing the per-CPU counter on
				342	* their own. They will also clobber ->rcu_flipctr_idx,
				343	* but that is OK, since we have already fetched it.
				344	*/
				345
				346	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
				347	local_irq_restore(flags);
				348	}
				349	}
				350	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
				351
				352	/*
				353	* If a global counter flip has occurred since the last time that we
				354	* advanced callbacks, advance them. Hardware interrupts must be
				355	* disabled when calling this function.
				356	*/
				357	static void __rcu_advance_callbacks(struct rcu_data *rdp)
				358	{
				359	int cpu;
				360	int i;
				361	int wlc = 0;
				362
				363	if (rdp->completed != rcu_ctrlblk.completed) {
				364	if (rdp->waitlist[GP_STAGES - 1] != NULL) {
				365	*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
				366	rdp->donetail = rdp->waittail[GP_STAGES - 1];
				367	RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
				368	}
				369	for (i = GP_STAGES - 2; i >= 0; i--) {
				370	if (rdp->waitlist[i] != NULL) {
				371	rdp->waitlist[i + 1] = rdp->waitlist[i];
				372	rdp->waittail[i + 1] = rdp->waittail[i];
				373	wlc++;
				374	} else {
				375	rdp->waitlist[i + 1] = NULL;
				376	rdp->waittail[i + 1] =
				377	&rdp->waitlist[i + 1];
				378	}
				379	}
				380	if (rdp->nextlist != NULL) {
				381	rdp->waitlist[0] = rdp->nextlist;
				382	rdp->waittail[0] = rdp->nexttail;
				383	wlc++;
				384	rdp->nextlist = NULL;
				385	rdp->nexttail = &rdp->nextlist;
				386	RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
				387	} else {
				388	rdp->waitlist[0] = NULL;
				389	rdp->waittail[0] = &rdp->waitlist[0];
				390	}
				391	rdp->waitlistcount = wlc;
				392	rdp->completed = rcu_ctrlblk.completed;
				393	}
				394
				395	/*
				396	* Check to see if this CPU needs to report that it has seen
				397	* the most recent counter flip, thereby declaring that all
				398	* subsequent rcu_read_lock() invocations will respect this flip.
				399	*/
				400
				401	cpu = raw_smp_processor_id();
				402	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
				403	smp_mb(); /* Subsequent counter accesses must see new value */
				404	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
				405	smp_mb(); /* Subsequent RCU read-side critical sections */
				406	/* seen -after- acknowledgement. */
				407	}
				408	}
				409
				410	/*
				411	* Get here when RCU is idle. Decide whether we need to
				412	* move out of idle state, and return non-zero if so.
				413	* "Straightforward" approach for the moment, might later
				414	* use callback-list lengths, grace-period duration, or
				415	* some such to determine when to exit idle state.
				416	* Might also need a pre-idle test that does not acquire
				417	* the lock, but let's get the simple case working first...
				418	*/
				419
				420	static int
				421	rcu_try_flip_idle(void)
				422	{
				423	int cpu;
				424
				425	RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
				426	if (!rcu_pending(smp_processor_id())) {
				427	RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
				428	return 0;
				429	}
				430
				431	/*
				432	* Do the flip.
				433	*/
				434
				435	RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
				436	rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
				437
				438	/*
				439	* Need a memory barrier so that other CPUs see the new
				440	* counter value before they see the subsequent change of all
				441	* the rcu_flip_flag instances to rcu_flipped.
				442	*/
				443
				444	smp_mb(); /* see above block comment. */
				445
				446	/* Now ask each CPU for acknowledgement of the flip. */
				447
				448	for_each_possible_cpu(cpu)
				449	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
				450
				451	return 1;
				452	}
				453
				454	/*
				455	* Wait for CPUs to acknowledge the flip.
				456	*/
				457
				458	static int
				459	rcu_try_flip_waitack(void)
				460	{
				461	int cpu;
				462
				463	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
				464	for_each_possible_cpu(cpu)
				465	if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
				466	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
				467	return 0;
				468	}
				469
				470	/*
				471	* Make sure our checks above don't bleed into subsequent
				472	* waiting for the sum of the counters to reach zero.
				473	*/
				474
				475	smp_mb(); /* see above block comment. */
				476	RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
				477	return 1;
				478	}
				479
				480	/*
				481	* Wait for collective ``last'' counter to reach zero,
				482	* then tell all CPUs to do an end-of-grace-period memory barrier.
				483	*/
				484
				485	static int
				486	rcu_try_flip_waitzero(void)
				487	{
				488	int cpu;
				489	int lastidx = !(rcu_ctrlblk.completed & 0x1);
				490	int sum = 0;
				491
				492	/* Check to see if the sum of the "last" counters is zero. */
				493
				494	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
				495	for_each_possible_cpu(cpu)
				496	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
				497	if (sum != 0) {
				498	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
				499	return 0;
				500	}
				501
				502	/*
				503	* This ensures that the other CPUs see the call for
				504	* memory barriers -after- the sum to zero has been
				505	* detected here
				506	*/
				507	smp_mb(); /* ^^^^^^^^^^^^ */
				508
				509	/* Call for a memory barrier from each CPU. */
				510	for_each_possible_cpu(cpu)
				511	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
				512
				513	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
				514	return 1;
				515	}
				516
				517	/*
				518	* Wait for all CPUs to do their end-of-grace-period memory barrier.
				519	* Return 0 once all CPUs have done so.
				520	*/
				521
				522	static int
				523	rcu_try_flip_waitmb(void)
				524	{
				525	int cpu;
				526
				527	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
				528	for_each_possible_cpu(cpu)
				529	if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
				530	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
				531	return 0;
				532	}
				533
				534	smp_mb(); /* Ensure that the above checks precede any following flip. */
				535	RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
				536	return 1;
				537	}
				538
				539	/*
				540	* Attempt a single flip of the counters. Remember, a single flip does
				541	* -not- constitute a grace period. Instead, the interval between
				542	* at least GP_STAGES consecutive flips is a grace period.
				543	*
				544	* If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
				545	* on a large SMP, they might want to use a hierarchical organization of
				546	* the per-CPU-counter pairs.
				547	*/
				548	static void rcu_try_flip(void)
				549	{
				550	unsigned long flags;
				551
				552	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
				553	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
				554	RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
				555	return;
				556	}
				557
				558	/*
				559	* Take the next transition(s) through the RCU grace-period
				560	* flip-counter state machine.
				561	*/
				562
				563	switch (rcu_ctrlblk.rcu_try_flip_state) {
				564	case rcu_try_flip_idle_state:
				565	if (rcu_try_flip_idle())
				566	rcu_ctrlblk.rcu_try_flip_state =
				567	rcu_try_flip_waitack_state;
				568	break;
				569	case rcu_try_flip_waitack_state:
				570	if (rcu_try_flip_waitack())
				571	rcu_ctrlblk.rcu_try_flip_state =
				572	rcu_try_flip_waitzero_state;
				573	break;
				574	case rcu_try_flip_waitzero_state:
				575	if (rcu_try_flip_waitzero())
				576	rcu_ctrlblk.rcu_try_flip_state =
				577	rcu_try_flip_waitmb_state;
				578	break;
				579	case rcu_try_flip_waitmb_state:
				580	if (rcu_try_flip_waitmb())
				581	rcu_ctrlblk.rcu_try_flip_state =
				582	rcu_try_flip_idle_state;
				583	}
				584	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
				585	}
				586
				587	/*
				588	* Check to see if this CPU needs to do a memory barrier in order to
				589	* ensure that any prior RCU read-side critical sections have committed
				590	* their counter manipulations and critical-section memory references
				591	* before declaring the grace period to be completed.
				592	*/
				593	static void rcu_check_mb(int cpu)
				594	{
				595	if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
				596	smp_mb(); /* Ensure RCU read-side accesses are visible. */
				597	per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
				598	}
				599	}
				600
				601	void rcu_check_callbacks(int cpu, int user)
				602	{
				603	unsigned long flags;
				604	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				605
				606	rcu_check_mb(cpu);
				607	if (rcu_ctrlblk.completed == rdp->completed)
				608	rcu_try_flip();
				609	spin_lock_irqsave(&rdp->lock, flags);
				610	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				611	__rcu_advance_callbacks(rdp);
				612	if (rdp->donelist == NULL) {
				613	spin_unlock_irqrestore(&rdp->lock, flags);
				614	} else {
				615	spin_unlock_irqrestore(&rdp->lock, flags);
				616	raise_softirq(RCU_SOFTIRQ);
				617	}
				618	}
				619
				620	/*
				621	* Needed by dynticks, to make sure all RCU processing has finished
				622	* when we go idle:
				623	*/
				624	void rcu_advance_callbacks(int cpu, int user)
				625	{
				626	unsigned long flags;
				627	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				628
				629	if (rcu_ctrlblk.completed == rdp->completed) {
				630	rcu_try_flip();
				631	if (rcu_ctrlblk.completed == rdp->completed)
				632	return;
				633	}
				634	spin_lock_irqsave(&rdp->lock, flags);
				635	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				636	__rcu_advance_callbacks(rdp);
				637	spin_unlock_irqrestore(&rdp->lock, flags);
				638	}
				639
				640	static void rcu_process_callbacks(struct softirq_action *unused)
				641	{
				642	unsigned long flags;
				643	struct rcu_head next, list;
				644	struct rcu_data *rdp = RCU_DATA_ME();
				645
				646	spin_lock_irqsave(&rdp->lock, flags);
				647	list = rdp->donelist;
				648	if (list == NULL) {
				649	spin_unlock_irqrestore(&rdp->lock, flags);
				650	return;
				651	}
				652	rdp->donelist = NULL;
				653	rdp->donetail = &rdp->donelist;
				654	RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
				655	spin_unlock_irqrestore(&rdp->lock, flags);
				656	while (list) {
				657	next = list->next;
				658	list->func(list);
				659	list = next;
				660	RCU_TRACE_ME(rcupreempt_trace_invoke);
				661	}
				662	}
				663
				664	void call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu))
				665	{
				666	unsigned long flags;
				667	struct rcu_data *rdp;
				668
				669	head->func = func;
				670	head->next = NULL;
				671	local_irq_save(flags);
				672	rdp = RCU_DATA_ME();
				673	spin_lock(&rdp->lock);
				674	__rcu_advance_callbacks(rdp);
				675	*rdp->nexttail = head;
				676	rdp->nexttail = &head->next;
				677	RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
				678	spin_unlock(&rdp->lock);
				679	local_irq_restore(flags);
				680	}
				681	EXPORT_SYMBOL_GPL(call_rcu);
				682
				683	/*
				684	* Wait until all currently running preempt_disable() code segments
				685	* (including hardware-irq-disable segments) complete. Note that
				686	* in -rt this does -not- necessarily result in all currently executing
				687	* interrupt -handlers- having completed.
				688	*/
				689	void __synchronize_sched(void)
				690	{
				691	cpumask_t oldmask;
				692	int cpu;
				693
				694	if (sched_getaffinity(0, &oldmask) < 0)
				695	oldmask = cpu_possible_map;
				696	for_each_online_cpu(cpu) {
				697	sched_setaffinity(0, cpumask_of_cpu(cpu));
				698	schedule();
				699	}
				700	sched_setaffinity(0, oldmask);
				701	}
				702	EXPORT_SYMBOL_GPL(__synchronize_sched);
				703
				704	/*
				705	* Check to see if any future RCU-related work will need to be done
				706	* by the current CPU, even if none need be done immediately, returning
				707	* 1 if so. Assumes that notifiers would take care of handling any
				708	* outstanding requests from the RCU core.
				709	*
				710	* This function is part of the RCU implementation; it is -not-
				711	* an exported member of the RCU API.
				712	*/
				713	int rcu_needs_cpu(int cpu)
				714	{
				715	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				716
				717	return (rdp->donelist != NULL \|\|
				718	!!rdp->waitlistcount \|\|
				719	rdp->nextlist != NULL);
				720	}
				721
				722	int rcu_pending(int cpu)
				723	{
				724	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				725
				726	/* The CPU has at least one callback queued somewhere. */
				727
				728	if (rdp->donelist != NULL \|\|
				729	!!rdp->waitlistcount \|\|
				730	rdp->nextlist != NULL)
				731	return 1;
				732
				733	/* The RCU core needs an acknowledgement from this CPU. */
				734
				735	if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) \|\|
				736	(per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
				737	return 1;
				738
				739	/* This CPU has fallen behind the global grace-period number. */
				740
				741	if (rdp->completed != rcu_ctrlblk.completed)
				742	return 1;
				743
				744	/* Nothing needed from this CPU. */
				745
				746	return 0;
				747	}
				748
				749	void __init __rcu_init(void)
				750	{
				751	int cpu;
				752	int i;
				753	struct rcu_data *rdp;
				754
				755	printk(KERN_NOTICE "Preemptible RCU implementation.\n");
				756	for_each_possible_cpu(cpu) {
				757	rdp = RCU_DATA_CPU(cpu);
				758	spin_lock_init(&rdp->lock);
				759	rdp->completed = 0;
				760	rdp->waitlistcount = 0;
				761	rdp->nextlist = NULL;
				762	rdp->nexttail = &rdp->nextlist;
				763	for (i = 0; i < GP_STAGES; i++) {
				764	rdp->waitlist[i] = NULL;
				765	rdp->waittail[i] = &rdp->waitlist[i];
				766	}
				767	rdp->donelist = NULL;
				768	rdp->donetail = &rdp->donelist;
				769	rdp->rcu_flipctr[0] = 0;
				770	rdp->rcu_flipctr[1] = 0;
				771	}
				772	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
				773	}
				774
				775	/*
				776	* Deprecated, use synchronize_rcu() or synchronize_sched() instead.
				777	*/
				778	void synchronize_kernel(void)
				779	{
				780	synchronize_rcu();
				781	}
				782
				783	#ifdef CONFIG_RCU_TRACE
				784	long *rcupreempt_flipctr(int cpu)
				785	{
				786	return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
				787	}
				788	EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
				789
				790	int rcupreempt_flip_flag(int cpu)
				791	{
				792	return per_cpu(rcu_flip_flag, cpu);
				793	}
				794	EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
				795
				796	int rcupreempt_mb_flag(int cpu)
				797	{
				798	return per_cpu(rcu_mb_flag, cpu);
				799	}
				800	EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
				801
				802	char *rcupreempt_try_flip_state_name(void)
				803	{
				804	return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
				805	}
				806	EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
				807
				808	struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
				809	{
				810	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				811
				812	return &rdp->trace;
				813	}
				814	EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
				815
				816	#endif /* #ifdef RCU_TRACE */