blob: e0139a274856e4adb16d41b4817d7ab68a16f8c4 [file] [log] [blame]
Paul E. McKenney621934e2006-10-04 02:17:02 -07001/*
2 * Sleepable Read-Copy Update mechanism for mutual exclusion.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2006
19 *
20 * Author: Paul McKenney <paulmck@us.ibm.com>
21 *
22 * For detailed explanation of Read-Copy Update mechanism see -
23 * Documentation/RCU/ *.txt
24 *
25 */
26
Paul Gortmaker9984de12011-05-23 14:51:41 -040027#include <linux/export.h>
Paul E. McKenney621934e2006-10-04 02:17:02 -070028#include <linux/mutex.h>
29#include <linux/percpu.h>
30#include <linux/preempt.h>
31#include <linux/rcupdate.h>
32#include <linux/sched.h>
Paul E. McKenney621934e2006-10-04 02:17:02 -070033#include <linux/smp.h>
Paul E. McKenney46fdb092010-10-26 02:11:40 -070034#include <linux/delay.h>
Paul E. McKenney621934e2006-10-04 02:17:02 -070035#include <linux/srcu.h>
36
Paul E. McKenney632ee202010-02-22 17:04:45 -080037static int init_srcu_struct_fields(struct srcu_struct *sp)
38{
39 sp->completed = 0;
40 mutex_init(&sp->mutex);
41 sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
42 return sp->per_cpu_ref ? 0 : -ENOMEM;
43}
44
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46
47int __init_srcu_struct(struct srcu_struct *sp, const char *name,
48 struct lock_class_key *key)
49{
Paul E. McKenney632ee202010-02-22 17:04:45 -080050 /* Don't re-initialize a lock while it is held. */
51 debug_check_no_locks_freed((void *)sp, sizeof(*sp));
52 lockdep_init_map(&sp->dep_map, name, key, 0);
Paul E. McKenney632ee202010-02-22 17:04:45 -080053 return init_srcu_struct_fields(sp);
54}
55EXPORT_SYMBOL_GPL(__init_srcu_struct);
56
57#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
58
Paul E. McKenney621934e2006-10-04 02:17:02 -070059/**
60 * init_srcu_struct - initialize a sleep-RCU structure
61 * @sp: structure to initialize.
62 *
63 * Must invoke this on a given srcu_struct before passing that srcu_struct
64 * to any other function. Each srcu_struct represents a separate domain
65 * of SRCU protection.
66 */
Alan Sterne6a92012006-10-04 02:17:05 -070067int init_srcu_struct(struct srcu_struct *sp)
Paul E. McKenney621934e2006-10-04 02:17:02 -070068{
Paul E. McKenney632ee202010-02-22 17:04:45 -080069 return init_srcu_struct_fields(sp);
Paul E. McKenney621934e2006-10-04 02:17:02 -070070}
Paul E. McKenney0cd397d2009-10-25 19:03:51 -070071EXPORT_SYMBOL_GPL(init_srcu_struct);
Paul E. McKenney621934e2006-10-04 02:17:02 -070072
Paul E. McKenney632ee202010-02-22 17:04:45 -080073#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
74
Paul E. McKenney621934e2006-10-04 02:17:02 -070075/*
Lai Jiangshanb52ce062012-02-27 09:29:09 -080076 * Returns approximate total of the readers' ->seq[] values for the
77 * rank of per-CPU counters specified by idx.
78 */
79static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
80{
81 int cpu;
82 unsigned long sum = 0;
83 unsigned long t;
84
85 for_each_possible_cpu(cpu) {
86 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
87 sum += t;
88 }
89 return sum;
90}
91
92/*
Paul E. McKenneycef50122012-02-05 07:42:44 -080093 * Returns approximate number of readers active on the specified rank
Lai Jiangshanb52ce062012-02-27 09:29:09 -080094 * of the per-CPU ->c[] counters.
Paul E. McKenney621934e2006-10-04 02:17:02 -070095 */
Paul E. McKenneycef50122012-02-05 07:42:44 -080096static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
Paul E. McKenney621934e2006-10-04 02:17:02 -070097{
98 int cpu;
Paul E. McKenneycef50122012-02-05 07:42:44 -080099 unsigned long sum = 0;
100 unsigned long t;
Paul E. McKenney621934e2006-10-04 02:17:02 -0700101
Paul E. McKenneycef50122012-02-05 07:42:44 -0800102 for_each_possible_cpu(cpu) {
103 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
104 sum += t;
Paul E. McKenneycef50122012-02-05 07:42:44 -0800105 }
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800106 return sum;
Paul E. McKenneycef50122012-02-05 07:42:44 -0800107}
108
109/*
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800110 * Return true if the number of pre-existing readers is determined to
111 * be stably zero. An example unstable zero can occur if the call
112 * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
113 * but due to task migration, sees the corresponding __srcu_read_unlock()
114 * decrement. This can happen because srcu_readers_active_idx() takes
115 * time to sum the array, and might in fact be interrupted or preempted
116 * partway through the summation.
Paul E. McKenneycef50122012-02-05 07:42:44 -0800117 */
118static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
119{
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800120 unsigned long seq;
121
122 seq = srcu_readers_seq_idx(sp, idx);
123
124 /*
125 * The following smp_mb() A pairs with the smp_mb() B located in
126 * __srcu_read_lock(). This pairing ensures that if an
127 * __srcu_read_lock() increments its counter after the summation
128 * in srcu_readers_active_idx(), then the corresponding SRCU read-side
129 * critical section will see any changes made prior to the start
130 * of the current SRCU grace period.
131 *
132 * Also, if the above call to srcu_readers_seq_idx() saw the
133 * increment of ->seq[], then the call to srcu_readers_active_idx()
134 * must see the increment of ->c[].
135 */
136 smp_mb(); /* A */
Paul E. McKenneycef50122012-02-05 07:42:44 -0800137
138 /*
139 * Note that srcu_readers_active_idx() can incorrectly return
140 * zero even though there is a pre-existing reader throughout.
141 * To see this, suppose that task A is in a very long SRCU
142 * read-side critical section that started on CPU 0, and that
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800143 * no other reader exists, so that the sum of the counters
Paul E. McKenneycef50122012-02-05 07:42:44 -0800144 * is equal to one. Then suppose that task B starts executing
145 * srcu_readers_active_idx(), summing up to CPU 1, and then that
146 * task C starts reading on CPU 0, so that its increment is not
147 * summed, but finishes reading on CPU 2, so that its decrement
148 * -is- summed. Then when task B completes its sum, it will
149 * incorrectly get zero, despite the fact that task A has been
150 * in its SRCU read-side critical section the whole time.
151 *
152 * We therefore do a validation step should srcu_readers_active_idx()
153 * return zero.
154 */
155 if (srcu_readers_active_idx(sp, idx) != 0)
156 return false;
157
158 /*
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800159 * The remainder of this function is the validation step.
160 * The following smp_mb() D pairs with the smp_mb() C in
161 * __srcu_read_unlock(). If the __srcu_read_unlock() was seen
162 * by srcu_readers_active_idx() above, then any destructive
163 * operation performed after the grace period will happen after
164 * the corresponding SRCU read-side critical section.
165 *
166 * Note that there can be at most NR_CPUS worth of readers using
167 * the old index, which is not enough to overflow even a 32-bit
168 * integer. (Yes, this does mean that systems having more than
169 * a billion or so CPUs need to be 64-bit systems.) Therefore,
170 * the sum of the ->seq[] counters cannot possibly overflow.
171 * Therefore, the only way that the return values of the two
172 * calls to srcu_readers_seq_idx() can be equal is if there were
173 * no increments of the corresponding rank of ->seq[] counts
174 * in the interim. But the missed-increment scenario laid out
175 * above includes an increment of the ->seq[] counter by
176 * the corresponding __srcu_read_lock(). Therefore, if this
177 * scenario occurs, the return values from the two calls to
178 * srcu_readers_seq_idx() will differ, and thus the validation
179 * step below suffices.
Paul E. McKenneycef50122012-02-05 07:42:44 -0800180 */
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800181 smp_mb(); /* D */
Paul E. McKenneycef50122012-02-05 07:42:44 -0800182
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800183 return srcu_readers_seq_idx(sp, idx) == seq;
Paul E. McKenney621934e2006-10-04 02:17:02 -0700184}
185
186/**
187 * srcu_readers_active - returns approximate number of readers.
188 * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
189 *
190 * Note that this is not an atomic primitive, and can therefore suffer
191 * severe errors when invoked on an active srcu_struct. That said, it
192 * can be useful as an error check at cleanup time.
193 */
Adrian Bunkbb695172008-02-06 01:36:45 -0800194static int srcu_readers_active(struct srcu_struct *sp)
Paul E. McKenney621934e2006-10-04 02:17:02 -0700195{
196 return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
197}
198
199/**
200 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
201 * @sp: structure to clean up.
202 *
203 * Must invoke this after you are finished using a given srcu_struct that
204 * was initialized via init_srcu_struct(), else you leak memory.
205 */
206void cleanup_srcu_struct(struct srcu_struct *sp)
207{
208 int sum;
209
210 sum = srcu_readers_active(sp);
211 WARN_ON(sum); /* Leakage unless caller handles error. */
212 if (sum != 0)
213 return;
214 free_percpu(sp->per_cpu_ref);
215 sp->per_cpu_ref = NULL;
216}
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700217EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
Paul E. McKenney621934e2006-10-04 02:17:02 -0700218
Paul E. McKenney632ee202010-02-22 17:04:45 -0800219/*
Paul E. McKenney621934e2006-10-04 02:17:02 -0700220 * Counts the new reader in the appropriate per-CPU element of the
221 * srcu_struct. Must be called from process context.
222 * Returns an index that must be passed to the matching srcu_read_unlock().
223 */
Paul E. McKenney632ee202010-02-22 17:04:45 -0800224int __srcu_read_lock(struct srcu_struct *sp)
Paul E. McKenney621934e2006-10-04 02:17:02 -0700225{
226 int idx;
227
228 preempt_disable();
Paul E. McKenneycef50122012-02-05 07:42:44 -0800229 idx = rcu_dereference_index_check(sp->completed,
230 rcu_read_lock_sched_held()) & 0x1;
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800231 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
Paul E. McKenneycef50122012-02-05 07:42:44 -0800232 smp_mb(); /* B */ /* Avoid leaking the critical section. */
Lai Jiangshanb52ce062012-02-27 09:29:09 -0800233 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
Paul E. McKenney621934e2006-10-04 02:17:02 -0700234 preempt_enable();
235 return idx;
236}
Paul E. McKenney632ee202010-02-22 17:04:45 -0800237EXPORT_SYMBOL_GPL(__srcu_read_lock);
Paul E. McKenney621934e2006-10-04 02:17:02 -0700238
Paul E. McKenney632ee202010-02-22 17:04:45 -0800239/*
Paul E. McKenney621934e2006-10-04 02:17:02 -0700240 * Removes the count for the old reader from the appropriate per-CPU
241 * element of the srcu_struct. Note that this may well be a different
242 * CPU than that which was incremented by the corresponding srcu_read_lock().
243 * Must be called from process context.
244 */
Paul E. McKenney632ee202010-02-22 17:04:45 -0800245void __srcu_read_unlock(struct srcu_struct *sp, int idx)
Paul E. McKenney621934e2006-10-04 02:17:02 -0700246{
247 preempt_disable();
Paul E. McKenneycef50122012-02-05 07:42:44 -0800248 smp_mb(); /* C */ /* Avoid leaking the critical section. */
Lai Jiangshan440253c2012-02-22 13:29:06 -0800249 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
Paul E. McKenney621934e2006-10-04 02:17:02 -0700250 preempt_enable();
251}
Paul E. McKenney632ee202010-02-22 17:04:45 -0800252EXPORT_SYMBOL_GPL(__srcu_read_unlock);
Paul E. McKenney621934e2006-10-04 02:17:02 -0700253
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700254/*
Paul E. McKenneyc072a382011-01-07 02:33:47 -0800255 * We use an adaptive strategy for synchronize_srcu() and especially for
256 * synchronize_srcu_expedited(). We spin for a fixed time period
257 * (defined below) to allow SRCU readers to exit their read-side critical
258 * sections. If there are still some readers after 10 microseconds,
259 * we repeatedly block for 1-millisecond time periods. This approach
260 * has done well in testing, so there is no need for a config parameter.
261 */
Paul E. McKenneycef50122012-02-05 07:42:44 -0800262#define SYNCHRONIZE_SRCU_READER_DELAY 5
263
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800264/*
265 * Wait until all pre-existing readers complete. Such readers
266 * will have used the index specified by "idx".
267 */
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800268static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
Paul E. McKenneycef50122012-02-05 07:42:44 -0800269{
Paul E. McKenneycef50122012-02-05 07:42:44 -0800270 int trycount = 0;
271
Paul E. McKenneycef50122012-02-05 07:42:44 -0800272 /*
Paul E. McKenneycef50122012-02-05 07:42:44 -0800273 * SRCU read-side critical sections are normally short, so wait
274 * a small amount of time before possibly blocking.
275 */
276 if (!srcu_readers_active_idx_check(sp, idx)) {
277 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
278 while (!srcu_readers_active_idx_check(sp, idx)) {
279 if (expedited && ++ trycount < 10)
280 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
281 else
282 schedule_timeout_interruptible(1);
283 }
284 }
Paul E. McKenneycef50122012-02-05 07:42:44 -0800285}
Paul E. McKenneyc072a382011-01-07 02:33:47 -0800286
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800287static void srcu_flip(struct srcu_struct *sp)
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800288{
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800289 sp->completed++;
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800290}
291
292/*
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700293 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
Paul E. McKenney621934e2006-10-04 02:17:02 -0700294 */
Paul E. McKenneycef50122012-02-05 07:42:44 -0800295static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
Paul E. McKenney621934e2006-10-04 02:17:02 -0700296{
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800297 int busy_idx;
298
Paul E. McKenneyfe15d702012-01-04 13:30:33 -0800299 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
300 !lock_is_held(&rcu_bh_lock_map) &&
301 !lock_is_held(&rcu_lock_map) &&
302 !lock_is_held(&rcu_sched_lock_map),
303 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
304
Paul E. McKenney621934e2006-10-04 02:17:02 -0700305 mutex_lock(&sp->mutex);
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800306 busy_idx = sp->completed & 0X1UL;
Paul E. McKenney621934e2006-10-04 02:17:02 -0700307
308 /*
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800309 * If we recently flipped the index, there will be some readers
310 * using idx=0 and others using idx=1. Therefore, two calls to
311 * wait_idx()s suffice to ensure that all pre-existing readers
312 * have completed:
313 *
314 * __synchronize_srcu() {
315 * wait_idx(sp, 0, expedited);
316 * wait_idx(sp, 1, expedited);
317 * }
318 *
319 * Starvation is prevented by the fact that we flip the index.
320 * While we wait on one index to clear out, almost all new readers
321 * will be using the other index. The number of new readers using the
322 * index we are waiting on is sharply bounded by roughly the number
323 * of CPUs.
324 *
325 * How can new readers possibly using the old pre-flip value of
326 * the index? Consider the following sequence of events:
327 *
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800328 * Suppose that during the previous grace period, a reader
329 * picked up the old value of the index, but did not increment
330 * its counter until after the previous instance of
331 * __synchronize_srcu() did the counter summation and recheck.
332 * That previous grace period was OK because the reader did
333 * not start until after the grace period started, so the grace
334 * period was not obligated to wait for that reader.
Paul E. McKenney621934e2006-10-04 02:17:02 -0700335 *
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800336 * However, this sequence of events is quite improbable, so
337 * this call to wait_idx(), which waits on really old readers
338 * describe in this comment above, will almost never need to wait.
Paul E. McKenney621934e2006-10-04 02:17:02 -0700339 */
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800340 wait_idx(sp, 1 - busy_idx, expedited);
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800341
Lai Jiangshan18108eb2012-02-27 09:28:10 -0800342 /* Flip the index to avoid reader-induced starvation. */
343 srcu_flip(sp);
344
345 /* Wait for recent pre-existing readers. */
346 wait_idx(sp, busy_idx, expedited);
Lai Jiangshan944ce9a2012-02-22 16:43:55 -0800347
Paul E. McKenney621934e2006-10-04 02:17:02 -0700348 mutex_unlock(&sp->mutex);
349}
350
351/**
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700352 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
353 * @sp: srcu_struct with which to synchronize.
354 *
355 * Flip the completed counter, and wait for the old count to drain to zero.
356 * As with classic RCU, the updater must use some separate means of
357 * synchronizing concurrent updates. Can block; must be called from
358 * process context.
359 *
360 * Note that it is illegal to call synchronize_srcu() from the corresponding
361 * SRCU read-side critical section; doing so will result in deadlock.
362 * However, it is perfectly legal to call synchronize_srcu() on one
363 * srcu_struct from some other srcu_struct's read-side critical section.
364 */
365void synchronize_srcu(struct srcu_struct *sp)
366{
Paul E. McKenneycef50122012-02-05 07:42:44 -0800367 __synchronize_srcu(sp, 0);
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700368}
369EXPORT_SYMBOL_GPL(synchronize_srcu);
370
371/**
Paul E. McKenney236fefa2012-01-31 14:00:41 -0800372 * synchronize_srcu_expedited - Brute-force SRCU grace period
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700373 * @sp: srcu_struct with which to synchronize.
374 *
Paul E. McKenneycef50122012-02-05 07:42:44 -0800375 * Wait for an SRCU grace period to elapse, but be more aggressive about
376 * spinning rather than blocking when waiting.
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700377 *
Paul E. McKenney236fefa2012-01-31 14:00:41 -0800378 * Note that it is illegal to call this function while holding any lock
Paul E. McKenneycef50122012-02-05 07:42:44 -0800379 * that is acquired by a CPU-hotplug notifier. It is also illegal to call
Paul E. McKenney236fefa2012-01-31 14:00:41 -0800380 * synchronize_srcu_expedited() from the corresponding SRCU read-side
381 * critical section; doing so will result in deadlock. However, it is
382 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
383 * from some other srcu_struct's read-side critical section, as long as
384 * the resulting graph of srcu_structs is acyclic.
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700385 */
386void synchronize_srcu_expedited(struct srcu_struct *sp)
387{
Paul E. McKenneycef50122012-02-05 07:42:44 -0800388 __synchronize_srcu(sp, 1);
Paul E. McKenney0cd397d2009-10-25 19:03:51 -0700389}
390EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
391
392/**
Paul E. McKenney621934e2006-10-04 02:17:02 -0700393 * srcu_batches_completed - return batches completed.
394 * @sp: srcu_struct on which to report batch completion.
395 *
396 * Report the number of batches, correlated with, but not necessarily
397 * precisely the same as, the number of grace periods that have elapsed.
398 */
399
400long srcu_batches_completed(struct srcu_struct *sp)
401{
402 return sp->completed;
403}
Paul E. McKenney621934e2006-10-04 02:17:02 -0700404EXPORT_SYMBOL_GPL(srcu_batches_completed);