blob: 180455b6b0d4752ae3ba6fe2f89442e34877242b [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Ingo Molnarc4e05112006-07-03 00:24:29 -07002/* kernel/rwsem.c: R/W semaphores, public implementation
3 *
4 * Written by David Howells (dhowells@redhat.com).
5 * Derived from asm-i386/semaphore.h
Waiman Long5dec94d2019-05-20 16:59:03 -04006 *
7 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
8 * and Michel Lespinasse <walken@google.com>
9 *
10 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
11 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12 *
Waiman Long4f23dbc2019-05-20 16:59:06 -040013 * Rwsem count bit fields re-definition and rwsem rearchitecture by
14 * Waiman Long <longman@redhat.com> and
15 * Peter Zijlstra <peterz@infradead.org>.
Ingo Molnarc4e05112006-07-03 00:24:29 -070016 */
17
18#include <linux/types.h>
19#include <linux/kernel.h>
Livio Soaresc7af77b2007-12-18 15:21:13 +010020#include <linux/sched.h>
Waiman Long5dec94d2019-05-20 16:59:03 -040021#include <linux/sched/rt.h>
22#include <linux/sched/task.h>
Ingo Molnarb17b0152017-02-08 18:51:35 +010023#include <linux/sched/debug.h>
Waiman Long5dec94d2019-05-20 16:59:03 -040024#include <linux/sched/wake_q.h>
25#include <linux/sched/signal.h>
Paul Gortmaker9984de12011-05-23 14:51:41 -040026#include <linux/export.h>
Ingo Molnarc4e05112006-07-03 00:24:29 -070027#include <linux/rwsem.h>
Arun Sharma600634972011-07-26 16:09:06 -070028#include <linux/atomic.h>
Ingo Molnarc4e05112006-07-03 00:24:29 -070029
Davidlohr Bueso7a215f82015-01-30 01:14:25 -080030#include "rwsem.h"
Waiman Long5dec94d2019-05-20 16:59:03 -040031#include "lock_events.h"
32
33/*
34 * The least significant 2 bits of the owner value has the following
35 * meanings when set.
Waiman Long02f10822019-05-20 16:59:10 -040036 * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
37 * - Bit 1: RWSEM_NONSPINNABLE - Waiters cannot spin on the rwsem
38 * The rwsem is anonymously owned, i.e. the owner(s) cannot be
39 * readily determined. It can be reader owned or the owning writer
40 * is indeterminate.
Waiman Long5dec94d2019-05-20 16:59:03 -040041 *
42 * When a writer acquires a rwsem, it puts its task_struct pointer
43 * into the owner field. It is cleared after an unlock.
44 *
45 * When a reader acquires a rwsem, it will also puts its task_struct
46 * pointer into the owner field with both the RWSEM_READER_OWNED and
Waiman Long02f10822019-05-20 16:59:10 -040047 * RWSEM_NONSPINNABLE bits set. On unlock, the owner field will
Waiman Long5dec94d2019-05-20 16:59:03 -040048 * largely be left untouched. So for a free or reader-owned rwsem,
49 * the owner value may contain information about the last reader that
50 * acquires the rwsem. The anonymous bit is set because that particular
51 * reader may or may not still own the lock.
52 *
53 * That information may be helpful in debugging cases where the system
54 * seems to hang on a reader owned rwsem especially if only one reader
55 * is involved. Ideally we would like to track all the readers that own
56 * a rwsem, but the overhead is simply too big.
57 */
58#define RWSEM_READER_OWNED (1UL << 0)
Waiman Long02f10822019-05-20 16:59:10 -040059#define RWSEM_NONSPINNABLE (1UL << 1)
60#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
Waiman Long5dec94d2019-05-20 16:59:03 -040061
62#ifdef CONFIG_DEBUG_RWSEMS
63# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
64 if (!debug_locks_silent && \
65 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
66 #c, atomic_long_read(&(sem)->count), \
67 (long)((sem)->owner), (long)current, \
68 list_empty(&(sem)->wait_list) ? "" : "not ")) \
69 debug_locks_off(); \
70 } while (0)
71#else
72# define DEBUG_RWSEMS_WARN_ON(c, sem)
73#endif
74
75/*
76 * The definition of the atomic counter in the semaphore:
77 *
78 * Bit 0 - writer locked bit
79 * Bit 1 - waiters present bit
Waiman Long4f23dbc2019-05-20 16:59:06 -040080 * Bit 2 - lock handoff bit
81 * Bits 3-7 - reserved
Waiman Long5dec94d2019-05-20 16:59:03 -040082 * Bits 8-X - 24-bit (32-bit) or 56-bit reader count
83 *
84 * atomic_long_fetch_add() is used to obtain reader lock, whereas
85 * atomic_long_cmpxchg() will be used to obtain writer lock.
Waiman Long4f23dbc2019-05-20 16:59:06 -040086 *
87 * There are three places where the lock handoff bit may be set or cleared.
88 * 1) rwsem_mark_wake() for readers.
89 * 2) rwsem_try_write_lock() for writers.
90 * 3) Error path of rwsem_down_write_slowpath().
91 *
92 * For all the above cases, wait_lock will be held. A writer must also
93 * be the first one in the wait_list to be eligible for setting the handoff
94 * bit. So concurrent setting/clearing of handoff bit is not possible.
Waiman Long5dec94d2019-05-20 16:59:03 -040095 */
96#define RWSEM_WRITER_LOCKED (1UL << 0)
97#define RWSEM_FLAG_WAITERS (1UL << 1)
Waiman Long4f23dbc2019-05-20 16:59:06 -040098#define RWSEM_FLAG_HANDOFF (1UL << 2)
99
Waiman Long5dec94d2019-05-20 16:59:03 -0400100#define RWSEM_READER_SHIFT 8
101#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
102#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
103#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
104#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
Waiman Long4f23dbc2019-05-20 16:59:06 -0400105#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
106 RWSEM_FLAG_HANDOFF)
Waiman Long5dec94d2019-05-20 16:59:03 -0400107
108/*
109 * All writes to owner are protected by WRITE_ONCE() to make sure that
110 * store tearing can't happen as optimistic spinners may read and use
111 * the owner value concurrently without lock. Read from owner, however,
112 * may not need READ_ONCE() as long as the pointer value is only used
113 * for comparison and isn't being dereferenced.
114 */
115static inline void rwsem_set_owner(struct rw_semaphore *sem)
116{
117 WRITE_ONCE(sem->owner, current);
118}
119
120static inline void rwsem_clear_owner(struct rw_semaphore *sem)
121{
122 WRITE_ONCE(sem->owner, NULL);
123}
124
125/*
126 * The task_struct pointer of the last owning reader will be left in
127 * the owner field.
128 *
129 * Note that the owner value just indicates the task has owned the rwsem
130 * previously, it may not be the real owner or one of the real owners
131 * anymore when that field is examined, so take it with a grain of salt.
132 */
133static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
134 struct task_struct *owner)
135{
136 unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
Waiman Long02f10822019-05-20 16:59:10 -0400137 | RWSEM_NONSPINNABLE;
Waiman Long5dec94d2019-05-20 16:59:03 -0400138
139 WRITE_ONCE(sem->owner, (struct task_struct *)val);
140}
141
142static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
143{
144 __rwsem_set_reader_owned(sem, current);
145}
146
147/*
148 * Return true if the a rwsem waiter can spin on the rwsem's owner
Waiman Long02f10822019-05-20 16:59:10 -0400149 * and steal the lock.
Waiman Long5dec94d2019-05-20 16:59:03 -0400150 * N.B. !owner is considered spinnable.
151 */
152static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
153{
Waiman Long02f10822019-05-20 16:59:10 -0400154 return !((unsigned long)owner & RWSEM_NONSPINNABLE);
Waiman Long5dec94d2019-05-20 16:59:03 -0400155}
156
157#ifdef CONFIG_DEBUG_RWSEMS
158/*
159 * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
160 * is a task pointer in owner of a reader-owned rwsem, it will be the
161 * real owner or one of the real owners. The only exception is when the
162 * unlock is done by up_read_non_owner().
163 */
164static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
165{
166 unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
Waiman Long02f10822019-05-20 16:59:10 -0400167 | RWSEM_NONSPINNABLE;
Waiman Long5dec94d2019-05-20 16:59:03 -0400168 if (READ_ONCE(sem->owner) == (struct task_struct *)val)
169 cmpxchg_relaxed((unsigned long *)&sem->owner, val,
Waiman Long02f10822019-05-20 16:59:10 -0400170 RWSEM_READER_OWNED | RWSEM_NONSPINNABLE);
Waiman Long5dec94d2019-05-20 16:59:03 -0400171}
172#else
173static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
174{
175}
176#endif
177
178/*
179 * Guide to the rw_semaphore's count field.
180 *
181 * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
182 * by a writer.
183 *
184 * The lock is owned by readers when
185 * (1) the RWSEM_WRITER_LOCKED isn't set in count,
186 * (2) some of the reader bits are set in count, and
187 * (3) the owner field has RWSEM_READ_OWNED bit set.
188 *
189 * Having some reader bits set is not enough to guarantee a readers owned
190 * lock as the readers may be in the process of backing out from the count
191 * and a writer has just released the lock. So another writer may steal
192 * the lock immediately after that.
193 */
194
195/*
196 * Initialize an rwsem:
197 */
198void __init_rwsem(struct rw_semaphore *sem, const char *name,
199 struct lock_class_key *key)
200{
201#ifdef CONFIG_DEBUG_LOCK_ALLOC
202 /*
203 * Make sure we are not reinitializing a held semaphore:
204 */
205 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
206 lockdep_init_map(&sem->dep_map, name, key, 0);
207#endif
208 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
209 raw_spin_lock_init(&sem->wait_lock);
210 INIT_LIST_HEAD(&sem->wait_list);
211 sem->owner = NULL;
212#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
213 osq_lock_init(&sem->osq);
214#endif
215}
Waiman Long5dec94d2019-05-20 16:59:03 -0400216EXPORT_SYMBOL(__init_rwsem);
217
218enum rwsem_waiter_type {
219 RWSEM_WAITING_FOR_WRITE,
220 RWSEM_WAITING_FOR_READ
221};
222
223struct rwsem_waiter {
224 struct list_head list;
225 struct task_struct *task;
226 enum rwsem_waiter_type type;
Waiman Long4f23dbc2019-05-20 16:59:06 -0400227 unsigned long timeout;
Waiman Long5dec94d2019-05-20 16:59:03 -0400228};
Waiman Long4f23dbc2019-05-20 16:59:06 -0400229#define rwsem_first_waiter(sem) \
230 list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
Waiman Long5dec94d2019-05-20 16:59:03 -0400231
232enum rwsem_wake_type {
233 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
234 RWSEM_WAKE_READERS, /* Wake readers only */
235 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
236};
237
Waiman Long4f23dbc2019-05-20 16:59:06 -0400238enum writer_wait_state {
239 WRITER_NOT_FIRST, /* Writer is not first in wait list */
240 WRITER_FIRST, /* Writer is first in wait list */
241 WRITER_HANDOFF /* Writer is first & handoff needed */
242};
243
244/*
245 * The typical HZ value is either 250 or 1000. So set the minimum waiting
246 * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
247 * queue before initiating the handoff protocol.
248 */
249#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
250
Waiman Long5dec94d2019-05-20 16:59:03 -0400251/*
Waiman Longd3681e22019-05-20 16:59:09 -0400252 * Magic number to batch-wakeup waiting readers, even when writers are
253 * also present in the queue. This both limits the amount of work the
254 * waking thread must do and also prevents any potential counter overflow,
255 * however unlikely.
256 */
257#define MAX_READERS_WAKEUP 0x100
258
259/*
Waiman Long5dec94d2019-05-20 16:59:03 -0400260 * handle the lock release when processes blocked on it that can now run
261 * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
262 * have been set.
263 * - there must be someone on the queue
264 * - the wait_lock must be held by the caller
265 * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
266 * to actually wakeup the blocked task(s) and drop the reference count,
267 * preferably when the wait_lock is released
268 * - woken process blocks are discarded from the list after having task zeroed
269 * - writers are only marked woken if downgrading is false
270 */
Waiman Long6cef7ff62019-05-20 16:59:04 -0400271static void rwsem_mark_wake(struct rw_semaphore *sem,
272 enum rwsem_wake_type wake_type,
273 struct wake_q_head *wake_q)
Waiman Long5dec94d2019-05-20 16:59:03 -0400274{
275 struct rwsem_waiter *waiter, *tmp;
276 long oldcount, woken = 0, adjustment = 0;
277 struct list_head wlist;
278
Waiman Long4f23dbc2019-05-20 16:59:06 -0400279 lockdep_assert_held(&sem->wait_lock);
280
Waiman Long5dec94d2019-05-20 16:59:03 -0400281 /*
282 * Take a peek at the queue head waiter such that we can determine
283 * the wakeup(s) to perform.
284 */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400285 waiter = rwsem_first_waiter(sem);
Waiman Long5dec94d2019-05-20 16:59:03 -0400286
287 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
288 if (wake_type == RWSEM_WAKE_ANY) {
289 /*
290 * Mark writer at the front of the queue for wakeup.
291 * Until the task is actually later awoken later by
292 * the caller, other writers are able to steal it.
293 * Readers, on the other hand, will block as they
294 * will notice the queued writer.
295 */
296 wake_q_add(wake_q, waiter->task);
297 lockevent_inc(rwsem_wake_writer);
298 }
299
300 return;
301 }
302
303 /*
304 * Writers might steal the lock before we grant it to the next reader.
305 * We prefer to do the first reader grant before counting readers
306 * so we can bail out early if a writer stole the lock.
307 */
308 if (wake_type != RWSEM_WAKE_READ_OWNED) {
309 adjustment = RWSEM_READER_BIAS;
310 oldcount = atomic_long_fetch_add(adjustment, &sem->count);
311 if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
Waiman Long4f23dbc2019-05-20 16:59:06 -0400312 /*
313 * When we've been waiting "too" long (for writers
314 * to give up the lock), request a HANDOFF to
315 * force the issue.
316 */
317 if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
318 time_after(jiffies, waiter->timeout)) {
319 adjustment -= RWSEM_FLAG_HANDOFF;
320 lockevent_inc(rwsem_rlock_handoff);
321 }
322
323 atomic_long_add(-adjustment, &sem->count);
Waiman Long5dec94d2019-05-20 16:59:03 -0400324 return;
325 }
326 /*
327 * Set it to reader-owned to give spinners an early
328 * indication that readers now have the lock.
329 */
330 __rwsem_set_reader_owned(sem, waiter->task);
331 }
332
333 /*
Waiman Longd3681e22019-05-20 16:59:09 -0400334 * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
335 * queue. We know that the woken will be at least 1 as we accounted
Waiman Long5dec94d2019-05-20 16:59:03 -0400336 * for above. Note we increment the 'active part' of the count by the
337 * number of readers before waking any processes up.
338 *
Waiman Longd3681e22019-05-20 16:59:09 -0400339 * This is an adaptation of the phase-fair R/W locks where at the
340 * reader phase (first waiter is a reader), all readers are eligible
341 * to acquire the lock at the same time irrespective of their order
342 * in the queue. The writers acquire the lock according to their
343 * order in the queue.
344 *
Waiman Long5dec94d2019-05-20 16:59:03 -0400345 * We have to do wakeup in 2 passes to prevent the possibility that
346 * the reader count may be decremented before it is incremented. It
347 * is because the to-be-woken waiter may not have slept yet. So it
348 * may see waiter->task got cleared, finish its critical section and
349 * do an unlock before the reader count increment.
350 *
351 * 1) Collect the read-waiters in a separate list, count them and
352 * fully increment the reader count in rwsem.
353 * 2) For each waiters in the new list, clear waiter->task and
354 * put them into wake_q to be woken up later.
355 */
Waiman Longd3681e22019-05-20 16:59:09 -0400356 INIT_LIST_HEAD(&wlist);
357 list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400358 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
Waiman Longd3681e22019-05-20 16:59:09 -0400359 continue;
Waiman Long5dec94d2019-05-20 16:59:03 -0400360
361 woken++;
Waiman Longd3681e22019-05-20 16:59:09 -0400362 list_move_tail(&waiter->list, &wlist);
363
364 /*
365 * Limit # of readers that can be woken up per wakeup call.
366 */
367 if (woken >= MAX_READERS_WAKEUP)
368 break;
Waiman Long5dec94d2019-05-20 16:59:03 -0400369 }
Waiman Long5dec94d2019-05-20 16:59:03 -0400370
371 adjustment = woken * RWSEM_READER_BIAS - adjustment;
372 lockevent_cond_inc(rwsem_wake_reader, woken);
373 if (list_empty(&sem->wait_list)) {
374 /* hit end of list above */
375 adjustment -= RWSEM_FLAG_WAITERS;
376 }
377
Waiman Long4f23dbc2019-05-20 16:59:06 -0400378 /*
379 * When we've woken a reader, we no longer need to force writers
380 * to give up the lock and we can clear HANDOFF.
381 */
382 if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
383 adjustment -= RWSEM_FLAG_HANDOFF;
384
Waiman Long5dec94d2019-05-20 16:59:03 -0400385 if (adjustment)
386 atomic_long_add(adjustment, &sem->count);
387
388 /* 2nd pass */
389 list_for_each_entry_safe(waiter, tmp, &wlist, list) {
390 struct task_struct *tsk;
391
392 tsk = waiter->task;
393 get_task_struct(tsk);
394
395 /*
396 * Ensure calling get_task_struct() before setting the reader
Waiman Long6cef7ff62019-05-20 16:59:04 -0400397 * waiter to nil such that rwsem_down_read_slowpath() cannot
Waiman Long5dec94d2019-05-20 16:59:03 -0400398 * race with do_exit() by always holding a reference count
399 * to the task to wakeup.
400 */
401 smp_store_release(&waiter->task, NULL);
402 /*
403 * Ensure issuing the wakeup (either by us or someone else)
404 * after setting the reader waiter to nil.
405 */
406 wake_q_add_safe(wake_q, tsk);
407 }
408}
409
410/*
411 * This function must be called with the sem->wait_lock held to prevent
412 * race conditions between checking the rwsem wait list and setting the
413 * sem->count accordingly.
Waiman Long4f23dbc2019-05-20 16:59:06 -0400414 *
415 * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
416 * bit is set or the lock is acquired with handoff bit cleared.
Waiman Long5dec94d2019-05-20 16:59:03 -0400417 */
Waiman Long00f3c5a2019-05-20 16:59:07 -0400418static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
Waiman Long4f23dbc2019-05-20 16:59:06 -0400419 enum writer_wait_state wstate)
Waiman Long5dec94d2019-05-20 16:59:03 -0400420{
Waiman Long00f3c5a2019-05-20 16:59:07 -0400421 long count, new;
Waiman Long5dec94d2019-05-20 16:59:03 -0400422
Waiman Long4f23dbc2019-05-20 16:59:06 -0400423 lockdep_assert_held(&sem->wait_lock);
424
Waiman Long00f3c5a2019-05-20 16:59:07 -0400425 count = atomic_long_read(&sem->count);
Waiman Long4f23dbc2019-05-20 16:59:06 -0400426 do {
427 bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
428
429 if (has_handoff && wstate == WRITER_NOT_FIRST)
430 return false;
431
432 new = count;
433
434 if (count & RWSEM_LOCK_MASK) {
435 if (has_handoff || (wstate != WRITER_HANDOFF))
436 return false;
437
438 new |= RWSEM_FLAG_HANDOFF;
439 } else {
440 new |= RWSEM_WRITER_LOCKED;
441 new &= ~RWSEM_FLAG_HANDOFF;
442
443 if (list_is_singular(&sem->wait_list))
444 new &= ~RWSEM_FLAG_WAITERS;
445 }
446 } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
447
448 /*
449 * We have either acquired the lock with handoff bit cleared or
450 * set the handoff bit.
451 */
452 if (new & RWSEM_FLAG_HANDOFF)
Waiman Long5dec94d2019-05-20 16:59:03 -0400453 return false;
454
Waiman Long4f23dbc2019-05-20 16:59:06 -0400455 rwsem_set_owner(sem);
456 return true;
Waiman Long5dec94d2019-05-20 16:59:03 -0400457}
458
459#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
460/*
461 * Try to acquire write lock before the writer has been put on wait queue.
462 */
463static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
464{
465 long count = atomic_long_read(&sem->count);
466
Waiman Long4f23dbc2019-05-20 16:59:06 -0400467 while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400468 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
Waiman Long4f23dbc2019-05-20 16:59:06 -0400469 count | RWSEM_WRITER_LOCKED)) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400470 rwsem_set_owner(sem);
471 lockevent_inc(rwsem_opt_wlock);
472 return true;
473 }
474 }
475 return false;
476}
477
478static inline bool owner_on_cpu(struct task_struct *owner)
479{
480 /*
481 * As lock holder preemption issue, we both skip spinning if
482 * task is not on cpu or its cpu is preempted
483 */
484 return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
485}
486
487static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
488{
489 struct task_struct *owner;
490 bool ret = true;
491
Waiman Long02f10822019-05-20 16:59:10 -0400492 BUILD_BUG_ON(is_rwsem_owner_spinnable(RWSEM_OWNER_UNKNOWN));
Waiman Long5dec94d2019-05-20 16:59:03 -0400493
494 if (need_resched())
495 return false;
496
497 rcu_read_lock();
498 owner = READ_ONCE(sem->owner);
499 if (owner) {
500 ret = is_rwsem_owner_spinnable(owner) &&
501 owner_on_cpu(owner);
502 }
503 rcu_read_unlock();
504 return ret;
505}
506
507/*
Waiman Long3f6d5172019-05-20 16:59:05 -0400508 * The rwsem_spin_on_owner() function returns the folowing 4 values
509 * depending on the lock owner state.
510 * OWNER_NULL : owner is currently NULL
511 * OWNER_WRITER: when owner changes and is a writer
512 * OWNER_READER: when owner changes and the new owner may be a reader.
513 * OWNER_NONSPINNABLE:
514 * when optimistic spinning has to stop because either the
515 * owner stops running, is unknown, or its timeslice has
516 * been used up.
Waiman Long5dec94d2019-05-20 16:59:03 -0400517 */
Waiman Long3f6d5172019-05-20 16:59:05 -0400518enum owner_state {
519 OWNER_NULL = 1 << 0,
520 OWNER_WRITER = 1 << 1,
521 OWNER_READER = 1 << 2,
522 OWNER_NONSPINNABLE = 1 << 3,
523};
524#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER)
Waiman Long5dec94d2019-05-20 16:59:03 -0400525
Waiman Long3f6d5172019-05-20 16:59:05 -0400526static inline enum owner_state rwsem_owner_state(unsigned long owner)
527{
528 if (!owner)
529 return OWNER_NULL;
530
Waiman Long02f10822019-05-20 16:59:10 -0400531 if (owner & RWSEM_NONSPINNABLE)
Waiman Long3f6d5172019-05-20 16:59:05 -0400532 return OWNER_NONSPINNABLE;
533
534 if (owner & RWSEM_READER_OWNED)
535 return OWNER_READER;
536
537 return OWNER_WRITER;
538}
539
540static noinline enum owner_state rwsem_spin_on_owner(struct rw_semaphore *sem)
541{
542 struct task_struct *tmp, *owner = READ_ONCE(sem->owner);
543 enum owner_state state = rwsem_owner_state((unsigned long)owner);
544
545 if (state != OWNER_WRITER)
546 return state;
Waiman Long5dec94d2019-05-20 16:59:03 -0400547
548 rcu_read_lock();
Waiman Long3f6d5172019-05-20 16:59:05 -0400549 for (;;) {
Waiman Long4f23dbc2019-05-20 16:59:06 -0400550 if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) {
551 state = OWNER_NONSPINNABLE;
552 break;
553 }
554
Waiman Long3f6d5172019-05-20 16:59:05 -0400555 tmp = READ_ONCE(sem->owner);
556 if (tmp != owner) {
557 state = rwsem_owner_state((unsigned long)tmp);
558 break;
559 }
560
Waiman Long5dec94d2019-05-20 16:59:03 -0400561 /*
562 * Ensure we emit the owner->on_cpu, dereference _after_
563 * checking sem->owner still matches owner, if that fails,
564 * owner might point to free()d memory, if it still matches,
565 * the rcu_read_lock() ensures the memory stays valid.
566 */
567 barrier();
568
Waiman Long5dec94d2019-05-20 16:59:03 -0400569 if (need_resched() || !owner_on_cpu(owner)) {
Waiman Long3f6d5172019-05-20 16:59:05 -0400570 state = OWNER_NONSPINNABLE;
571 break;
Waiman Long5dec94d2019-05-20 16:59:03 -0400572 }
573
574 cpu_relax();
575 }
576 rcu_read_unlock();
577
Waiman Long3f6d5172019-05-20 16:59:05 -0400578 return state;
Waiman Long5dec94d2019-05-20 16:59:03 -0400579}
580
581static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
582{
583 bool taken = false;
Waiman Long990fa732019-05-20 16:59:08 -0400584 int prev_owner_state = OWNER_NULL;
Waiman Long5dec94d2019-05-20 16:59:03 -0400585
586 preempt_disable();
587
588 /* sem->wait_lock should not be held when doing optimistic spinning */
589 if (!rwsem_can_spin_on_owner(sem))
590 goto done;
591
592 if (!osq_lock(&sem->osq))
593 goto done;
594
595 /*
596 * Optimistically spin on the owner field and attempt to acquire the
597 * lock whenever the owner changes. Spinning will be stopped when:
598 * 1) the owning writer isn't running; or
599 * 2) readers own the lock as we can't determine if they are
600 * actively running or not.
601 */
Waiman Long990fa732019-05-20 16:59:08 -0400602 for (;;) {
603 enum owner_state owner_state = rwsem_spin_on_owner(sem);
604
605 if (!(owner_state & OWNER_SPINNABLE))
606 break;
607
Waiman Long5dec94d2019-05-20 16:59:03 -0400608 /*
609 * Try to acquire the lock
610 */
611 if (rwsem_try_write_lock_unqueued(sem)) {
612 taken = true;
613 break;
614 }
615
616 /*
Waiman Long990fa732019-05-20 16:59:08 -0400617 * An RT task cannot do optimistic spinning if it cannot
618 * be sure the lock holder is running or live-lock may
619 * happen if the current task and the lock holder happen
620 * to run in the same CPU. However, aborting optimistic
621 * spinning while a NULL owner is detected may miss some
622 * opportunity where spinning can continue without causing
623 * problem.
624 *
625 * There are 2 possible cases where an RT task may be able
626 * to continue spinning.
627 *
628 * 1) The lock owner is in the process of releasing the
629 * lock, sem->owner is cleared but the lock has not
630 * been released yet.
631 * 2) The lock was free and owner cleared, but another
632 * task just comes in and acquire the lock before
633 * we try to get it. The new owner may be a spinnable
634 * writer.
635 *
636 * To take advantage of two scenarios listed agove, the RT
637 * task is made to retry one more time to see if it can
638 * acquire the lock or continue spinning on the new owning
639 * writer. Of course, if the time lag is long enough or the
640 * new owner is not a writer or spinnable, the RT task will
641 * quit spinning.
642 *
643 * If the owner is a writer, the need_resched() check is
644 * done inside rwsem_spin_on_owner(). If the owner is not
645 * a writer, need_resched() check needs to be done here.
Waiman Long5dec94d2019-05-20 16:59:03 -0400646 */
Waiman Long990fa732019-05-20 16:59:08 -0400647 if (owner_state != OWNER_WRITER) {
648 if (need_resched())
649 break;
650 if (rt_task(current) &&
651 (prev_owner_state != OWNER_WRITER))
652 break;
653 }
654 prev_owner_state = owner_state;
Waiman Long5dec94d2019-05-20 16:59:03 -0400655
656 /*
657 * The cpu_relax() call is a compiler barrier which forces
658 * everything in this loop to be re-loaded. We don't need
659 * memory barriers as we'll eventually observe the right
660 * values at the cost of a few extra spins.
661 */
662 cpu_relax();
663 }
664 osq_unlock(&sem->osq);
665done:
666 preempt_enable();
667 lockevent_cond_inc(rwsem_opt_fail, !taken);
668 return taken;
669}
670#else
671static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
672{
673 return false;
674}
675#endif
676
677/*
678 * Wait for the read lock to be granted
679 */
Waiman Long6cef7ff62019-05-20 16:59:04 -0400680static struct rw_semaphore __sched *
681rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
Waiman Long5dec94d2019-05-20 16:59:03 -0400682{
683 long count, adjustment = -RWSEM_READER_BIAS;
684 struct rwsem_waiter waiter;
685 DEFINE_WAKE_Q(wake_q);
686
687 waiter.task = current;
688 waiter.type = RWSEM_WAITING_FOR_READ;
Waiman Long4f23dbc2019-05-20 16:59:06 -0400689 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
Waiman Long5dec94d2019-05-20 16:59:03 -0400690
691 raw_spin_lock_irq(&sem->wait_lock);
692 if (list_empty(&sem->wait_list)) {
693 /*
694 * In case the wait queue is empty and the lock isn't owned
Waiman Long4f23dbc2019-05-20 16:59:06 -0400695 * by a writer or has the handoff bit set, this reader can
696 * exit the slowpath and return immediately as its
697 * RWSEM_READER_BIAS has already been set in the count.
Waiman Long5dec94d2019-05-20 16:59:03 -0400698 */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400699 if (!(atomic_long_read(&sem->count) &
700 (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400701 raw_spin_unlock_irq(&sem->wait_lock);
702 rwsem_set_reader_owned(sem);
703 lockevent_inc(rwsem_rlock_fast);
704 return sem;
705 }
706 adjustment += RWSEM_FLAG_WAITERS;
707 }
708 list_add_tail(&waiter.list, &sem->wait_list);
709
710 /* we're now waiting on the lock, but no longer actively locking */
711 count = atomic_long_add_return(adjustment, &sem->count);
712
713 /*
714 * If there are no active locks, wake the front queued process(es).
715 *
716 * If there are no writers and we are first in the queue,
717 * wake our own waiter to join the existing active readers !
718 */
719 if (!(count & RWSEM_LOCK_MASK) ||
720 (!(count & RWSEM_WRITER_MASK) && (adjustment & RWSEM_FLAG_WAITERS)))
Waiman Long6cef7ff62019-05-20 16:59:04 -0400721 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
Waiman Long5dec94d2019-05-20 16:59:03 -0400722
723 raw_spin_unlock_irq(&sem->wait_lock);
724 wake_up_q(&wake_q);
725
726 /* wait to be given the lock */
727 while (true) {
728 set_current_state(state);
729 if (!waiter.task)
730 break;
731 if (signal_pending_state(state, current)) {
732 raw_spin_lock_irq(&sem->wait_lock);
733 if (waiter.task)
734 goto out_nolock;
735 raw_spin_unlock_irq(&sem->wait_lock);
736 break;
737 }
738 schedule();
739 lockevent_inc(rwsem_sleep_reader);
740 }
741
742 __set_current_state(TASK_RUNNING);
743 lockevent_inc(rwsem_rlock);
744 return sem;
745out_nolock:
746 list_del(&waiter.list);
Waiman Long4f23dbc2019-05-20 16:59:06 -0400747 if (list_empty(&sem->wait_list)) {
748 atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
749 &sem->count);
750 }
Waiman Long5dec94d2019-05-20 16:59:03 -0400751 raw_spin_unlock_irq(&sem->wait_lock);
752 __set_current_state(TASK_RUNNING);
753 lockevent_inc(rwsem_rlock_fail);
754 return ERR_PTR(-EINTR);
755}
756
Waiman Long5dec94d2019-05-20 16:59:03 -0400757/*
758 * Wait until we successfully acquire the write lock
759 */
Waiman Long6cef7ff62019-05-20 16:59:04 -0400760static struct rw_semaphore *
761rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
Waiman Long5dec94d2019-05-20 16:59:03 -0400762{
763 long count;
Waiman Long4f23dbc2019-05-20 16:59:06 -0400764 enum writer_wait_state wstate;
Waiman Long5dec94d2019-05-20 16:59:03 -0400765 struct rwsem_waiter waiter;
766 struct rw_semaphore *ret = sem;
767 DEFINE_WAKE_Q(wake_q);
768
769 /* do optimistic spinning and steal lock if possible */
770 if (rwsem_optimistic_spin(sem))
771 return sem;
772
773 /*
774 * Optimistic spinning failed, proceed to the slowpath
775 * and block until we can acquire the sem.
776 */
777 waiter.task = current;
778 waiter.type = RWSEM_WAITING_FOR_WRITE;
Waiman Long4f23dbc2019-05-20 16:59:06 -0400779 waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
Waiman Long5dec94d2019-05-20 16:59:03 -0400780
781 raw_spin_lock_irq(&sem->wait_lock);
782
783 /* account for this before adding a new element to the list */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400784 wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
Waiman Long5dec94d2019-05-20 16:59:03 -0400785
786 list_add_tail(&waiter.list, &sem->wait_list);
787
788 /* we're now waiting on the lock */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400789 if (wstate == WRITER_NOT_FIRST) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400790 count = atomic_long_read(&sem->count);
791
792 /*
Waiman Long4f23dbc2019-05-20 16:59:06 -0400793 * If there were already threads queued before us and:
794 * 1) there are no no active locks, wake the front
795 * queued process(es) as the handoff bit might be set.
796 * 2) there are no active writers and some readers, the lock
797 * must be read owned; so we try to wake any read lock
798 * waiters that were queued ahead of us.
Waiman Long5dec94d2019-05-20 16:59:03 -0400799 */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400800 if (count & RWSEM_WRITER_MASK)
801 goto wait;
Waiman Long5dec94d2019-05-20 16:59:03 -0400802
Waiman Long4f23dbc2019-05-20 16:59:06 -0400803 rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
804 ? RWSEM_WAKE_READERS
805 : RWSEM_WAKE_ANY, &wake_q);
Waiman Long5dec94d2019-05-20 16:59:03 -0400806
Waiman Long00f3c5a2019-05-20 16:59:07 -0400807 if (!wake_q_empty(&wake_q)) {
808 /*
809 * We want to minimize wait_lock hold time especially
810 * when a large number of readers are to be woken up.
811 */
812 raw_spin_unlock_irq(&sem->wait_lock);
813 wake_up_q(&wake_q);
814 wake_q_init(&wake_q); /* Used again, reinit */
815 raw_spin_lock_irq(&sem->wait_lock);
816 }
Waiman Long5dec94d2019-05-20 16:59:03 -0400817 } else {
Waiman Long00f3c5a2019-05-20 16:59:07 -0400818 atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
Waiman Long5dec94d2019-05-20 16:59:03 -0400819 }
820
Waiman Long4f23dbc2019-05-20 16:59:06 -0400821wait:
Waiman Long5dec94d2019-05-20 16:59:03 -0400822 /* wait until we successfully acquire the lock */
823 set_current_state(state);
824 while (true) {
Waiman Long00f3c5a2019-05-20 16:59:07 -0400825 if (rwsem_try_write_lock(sem, wstate))
Waiman Long5dec94d2019-05-20 16:59:03 -0400826 break;
Waiman Long4f23dbc2019-05-20 16:59:06 -0400827
Waiman Long5dec94d2019-05-20 16:59:03 -0400828 raw_spin_unlock_irq(&sem->wait_lock);
829
830 /* Block until there are no active lockers. */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400831 for (;;) {
Waiman Long5dec94d2019-05-20 16:59:03 -0400832 if (signal_pending_state(state, current))
833 goto out_nolock;
834
835 schedule();
836 lockevent_inc(rwsem_sleep_writer);
837 set_current_state(state);
Waiman Long4f23dbc2019-05-20 16:59:06 -0400838 /*
839 * If HANDOFF bit is set, unconditionally do
840 * a trylock.
841 */
842 if (wstate == WRITER_HANDOFF)
843 break;
844
845 if ((wstate == WRITER_NOT_FIRST) &&
846 (rwsem_first_waiter(sem) == &waiter))
847 wstate = WRITER_FIRST;
848
Waiman Long5dec94d2019-05-20 16:59:03 -0400849 count = atomic_long_read(&sem->count);
Waiman Long4f23dbc2019-05-20 16:59:06 -0400850 if (!(count & RWSEM_LOCK_MASK))
851 break;
852
853 /*
854 * The setting of the handoff bit is deferred
855 * until rwsem_try_write_lock() is called.
856 */
857 if ((wstate == WRITER_FIRST) && (rt_task(current) ||
858 time_after(jiffies, waiter.timeout))) {
859 wstate = WRITER_HANDOFF;
860 lockevent_inc(rwsem_wlock_handoff);
861 break;
862 }
863 }
Waiman Long5dec94d2019-05-20 16:59:03 -0400864
865 raw_spin_lock_irq(&sem->wait_lock);
866 }
867 __set_current_state(TASK_RUNNING);
868 list_del(&waiter.list);
869 raw_spin_unlock_irq(&sem->wait_lock);
870 lockevent_inc(rwsem_wlock);
871
872 return ret;
873
874out_nolock:
875 __set_current_state(TASK_RUNNING);
876 raw_spin_lock_irq(&sem->wait_lock);
877 list_del(&waiter.list);
Waiman Long4f23dbc2019-05-20 16:59:06 -0400878
879 if (unlikely(wstate == WRITER_HANDOFF))
880 atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
881
Waiman Long5dec94d2019-05-20 16:59:03 -0400882 if (list_empty(&sem->wait_list))
883 atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
884 else
Waiman Long6cef7ff62019-05-20 16:59:04 -0400885 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
Waiman Long5dec94d2019-05-20 16:59:03 -0400886 raw_spin_unlock_irq(&sem->wait_lock);
887 wake_up_q(&wake_q);
888 lockevent_inc(rwsem_wlock_fail);
889
890 return ERR_PTR(-EINTR);
891}
892
Waiman Long5dec94d2019-05-20 16:59:03 -0400893/*
894 * handle waking up a waiter on the semaphore
895 * - up_read/up_write has decremented the active part of count if we come here
896 */
Waiman Long4f23dbc2019-05-20 16:59:06 -0400897static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
Waiman Long5dec94d2019-05-20 16:59:03 -0400898{
899 unsigned long flags;
900 DEFINE_WAKE_Q(wake_q);
901
902 raw_spin_lock_irqsave(&sem->wait_lock, flags);
903
904 if (!list_empty(&sem->wait_list))
Waiman Long6cef7ff62019-05-20 16:59:04 -0400905 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
Waiman Long5dec94d2019-05-20 16:59:03 -0400906
907 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
908 wake_up_q(&wake_q);
909
910 return sem;
911}
Waiman Long5dec94d2019-05-20 16:59:03 -0400912
913/*
914 * downgrade a write lock into a read lock
915 * - caller incremented waiting part of count and discovered it still negative
916 * - just wake up any readers at the front of the queue
917 */
Waiman Long6cef7ff62019-05-20 16:59:04 -0400918static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
Waiman Long5dec94d2019-05-20 16:59:03 -0400919{
920 unsigned long flags;
921 DEFINE_WAKE_Q(wake_q);
922
923 raw_spin_lock_irqsave(&sem->wait_lock, flags);
924
925 if (!list_empty(&sem->wait_list))
Waiman Long6cef7ff62019-05-20 16:59:04 -0400926 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
Waiman Long5dec94d2019-05-20 16:59:03 -0400927
928 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
929 wake_up_q(&wake_q);
930
931 return sem;
932}
Waiman Long5dec94d2019-05-20 16:59:03 -0400933
934/*
935 * lock for reading
936 */
937inline void __down_read(struct rw_semaphore *sem)
938{
939 if (unlikely(atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
940 &sem->count) & RWSEM_READ_FAILED_MASK)) {
Waiman Long6cef7ff62019-05-20 16:59:04 -0400941 rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
Waiman Long5dec94d2019-05-20 16:59:03 -0400942 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
943 RWSEM_READER_OWNED), sem);
944 } else {
945 rwsem_set_reader_owned(sem);
946 }
947}
948
949static inline int __down_read_killable(struct rw_semaphore *sem)
950{
951 if (unlikely(atomic_long_fetch_add_acquire(RWSEM_READER_BIAS,
952 &sem->count) & RWSEM_READ_FAILED_MASK)) {
Waiman Long6cef7ff62019-05-20 16:59:04 -0400953 if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
Waiman Long5dec94d2019-05-20 16:59:03 -0400954 return -EINTR;
955 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
956 RWSEM_READER_OWNED), sem);
957 } else {
958 rwsem_set_reader_owned(sem);
959 }
960 return 0;
961}
962
963static inline int __down_read_trylock(struct rw_semaphore *sem)
964{
965 /*
966 * Optimize for the case when the rwsem is not locked at all.
967 */
968 long tmp = RWSEM_UNLOCKED_VALUE;
969
Waiman Long5dec94d2019-05-20 16:59:03 -0400970 do {
971 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
972 tmp + RWSEM_READER_BIAS)) {
973 rwsem_set_reader_owned(sem);
974 return 1;
975 }
976 } while (!(tmp & RWSEM_READ_FAILED_MASK));
977 return 0;
978}
979
980/*
981 * lock for writing
982 */
983static inline void __down_write(struct rw_semaphore *sem)
984{
Waiman Long6cef7ff62019-05-20 16:59:04 -0400985 long tmp = RWSEM_UNLOCKED_VALUE;
986
987 if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
988 RWSEM_WRITER_LOCKED)))
989 rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
Waiman Long5dec94d2019-05-20 16:59:03 -0400990 rwsem_set_owner(sem);
991}
992
993static inline int __down_write_killable(struct rw_semaphore *sem)
994{
Waiman Long6cef7ff62019-05-20 16:59:04 -0400995 long tmp = RWSEM_UNLOCKED_VALUE;
996
997 if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
998 RWSEM_WRITER_LOCKED))) {
999 if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
Waiman Long5dec94d2019-05-20 16:59:03 -04001000 return -EINTR;
Waiman Long6cef7ff62019-05-20 16:59:04 -04001001 }
Waiman Long5dec94d2019-05-20 16:59:03 -04001002 rwsem_set_owner(sem);
1003 return 0;
1004}
1005
1006static inline int __down_write_trylock(struct rw_semaphore *sem)
1007{
Waiman Long6cef7ff62019-05-20 16:59:04 -04001008 long tmp = RWSEM_UNLOCKED_VALUE;
Waiman Long5dec94d2019-05-20 16:59:03 -04001009
Waiman Long6cef7ff62019-05-20 16:59:04 -04001010 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1011 RWSEM_WRITER_LOCKED)) {
Waiman Long5dec94d2019-05-20 16:59:03 -04001012 rwsem_set_owner(sem);
1013 return true;
1014 }
1015 return false;
1016}
1017
1018/*
1019 * unlock after reading
1020 */
1021inline void __up_read(struct rw_semaphore *sem)
1022{
1023 long tmp;
1024
Waiman Long6cef7ff62019-05-20 16:59:04 -04001025 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), sem);
Waiman Long5dec94d2019-05-20 16:59:03 -04001026 rwsem_clear_reader_owned(sem);
1027 tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
Waiman Long6cef7ff62019-05-20 16:59:04 -04001028 if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1029 RWSEM_FLAG_WAITERS))
Waiman Long4f23dbc2019-05-20 16:59:06 -04001030 rwsem_wake(sem, tmp);
Waiman Long5dec94d2019-05-20 16:59:03 -04001031}
1032
1033/*
1034 * unlock after writing
1035 */
1036static inline void __up_write(struct rw_semaphore *sem)
1037{
Waiman Long6cef7ff62019-05-20 16:59:04 -04001038 long tmp;
1039
Waiman Long02f10822019-05-20 16:59:10 -04001040 /*
1041 * sem->owner may differ from current if the ownership is transferred
1042 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1043 */
1044 DEBUG_RWSEMS_WARN_ON((sem->owner != current) &&
1045 !((long)sem->owner & RWSEM_NONSPINNABLE), sem);
Waiman Long5dec94d2019-05-20 16:59:03 -04001046 rwsem_clear_owner(sem);
Waiman Long6cef7ff62019-05-20 16:59:04 -04001047 tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1048 if (unlikely(tmp & RWSEM_FLAG_WAITERS))
Waiman Long4f23dbc2019-05-20 16:59:06 -04001049 rwsem_wake(sem, tmp);
Waiman Long5dec94d2019-05-20 16:59:03 -04001050}
1051
1052/*
1053 * downgrade write lock to read lock
1054 */
1055static inline void __downgrade_write(struct rw_semaphore *sem)
1056{
1057 long tmp;
1058
1059 /*
1060 * When downgrading from exclusive to shared ownership,
1061 * anything inside the write-locked region cannot leak
1062 * into the read side. In contrast, anything in the
1063 * read-locked region is ok to be re-ordered into the
1064 * write side. As such, rely on RELEASE semantics.
1065 */
1066 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
1067 tmp = atomic_long_fetch_add_release(
1068 -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1069 rwsem_set_reader_owned(sem);
1070 if (tmp & RWSEM_FLAG_WAITERS)
1071 rwsem_downgrade_wake(sem);
1072}
Davidlohr Bueso4fc828e2014-05-02 11:24:15 -07001073
Ingo Molnarc4e05112006-07-03 00:24:29 -07001074/*
1075 * lock for reading
1076 */
Livio Soaresc7af77b2007-12-18 15:21:13 +01001077void __sched down_read(struct rw_semaphore *sem)
Ingo Molnarc4e05112006-07-03 00:24:29 -07001078{
1079 might_sleep();
1080 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1081
Peter Zijlstra4fe87742007-07-19 01:48:58 -07001082 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
Ingo Molnarc4e05112006-07-03 00:24:29 -07001083}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001084EXPORT_SYMBOL(down_read);
1085
Kirill Tkhai76f85072017-09-29 19:06:38 +03001086int __sched down_read_killable(struct rw_semaphore *sem)
1087{
1088 might_sleep();
1089 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1090
1091 if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1092 rwsem_release(&sem->dep_map, 1, _RET_IP_);
1093 return -EINTR;
1094 }
1095
Kirill Tkhai76f85072017-09-29 19:06:38 +03001096 return 0;
1097}
Kirill Tkhai76f85072017-09-29 19:06:38 +03001098EXPORT_SYMBOL(down_read_killable);
1099
Ingo Molnarc4e05112006-07-03 00:24:29 -07001100/*
1101 * trylock for reading -- returns 1 if successful, 0 if contention
1102 */
1103int down_read_trylock(struct rw_semaphore *sem)
1104{
1105 int ret = __down_read_trylock(sem);
1106
Waiman Longc7580c12019-04-04 13:43:11 -04001107 if (ret == 1)
Ingo Molnarc4e05112006-07-03 00:24:29 -07001108 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1109 return ret;
1110}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001111EXPORT_SYMBOL(down_read_trylock);
1112
1113/*
1114 * lock for writing
1115 */
Livio Soaresc7af77b2007-12-18 15:21:13 +01001116void __sched down_write(struct rw_semaphore *sem)
Ingo Molnarc4e05112006-07-03 00:24:29 -07001117{
1118 might_sleep();
1119 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
Peter Zijlstra4fe87742007-07-19 01:48:58 -07001120 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
Ingo Molnarc4e05112006-07-03 00:24:29 -07001121}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001122EXPORT_SYMBOL(down_write);
1123
1124/*
Michal Hocko916633a2016-04-07 17:12:31 +02001125 * lock for writing
1126 */
1127int __sched down_write_killable(struct rw_semaphore *sem)
1128{
1129 might_sleep();
1130 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1131
Waiman Long6cef7ff62019-05-20 16:59:04 -04001132 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1133 __down_write_killable)) {
Michal Hocko916633a2016-04-07 17:12:31 +02001134 rwsem_release(&sem->dep_map, 1, _RET_IP_);
1135 return -EINTR;
1136 }
1137
Michal Hocko916633a2016-04-07 17:12:31 +02001138 return 0;
1139}
Michal Hocko916633a2016-04-07 17:12:31 +02001140EXPORT_SYMBOL(down_write_killable);
1141
1142/*
Ingo Molnarc4e05112006-07-03 00:24:29 -07001143 * trylock for writing -- returns 1 if successful, 0 if contention
1144 */
1145int down_write_trylock(struct rw_semaphore *sem)
1146{
1147 int ret = __down_write_trylock(sem);
1148
Waiman Longc7580c12019-04-04 13:43:11 -04001149 if (ret == 1)
Pavel Emelianov428e6ce2007-05-08 00:29:10 -07001150 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
Davidlohr Bueso4fc828e2014-05-02 11:24:15 -07001151
Ingo Molnarc4e05112006-07-03 00:24:29 -07001152 return ret;
1153}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001154EXPORT_SYMBOL(down_write_trylock);
1155
1156/*
1157 * release a read lock
1158 */
1159void up_read(struct rw_semaphore *sem)
1160{
1161 rwsem_release(&sem->dep_map, 1, _RET_IP_);
Ingo Molnarc4e05112006-07-03 00:24:29 -07001162 __up_read(sem);
1163}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001164EXPORT_SYMBOL(up_read);
1165
1166/*
1167 * release a write lock
1168 */
1169void up_write(struct rw_semaphore *sem)
1170{
1171 rwsem_release(&sem->dep_map, 1, _RET_IP_);
Ingo Molnarc4e05112006-07-03 00:24:29 -07001172 __up_write(sem);
1173}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001174EXPORT_SYMBOL(up_write);
1175
1176/*
1177 * downgrade write lock to read lock
1178 */
1179void downgrade_write(struct rw_semaphore *sem)
1180{
J. R. Okajima6419c4a2017-02-03 01:38:17 +09001181 lock_downgrade(&sem->dep_map, _RET_IP_);
Ingo Molnarc4e05112006-07-03 00:24:29 -07001182 __downgrade_write(sem);
1183}
Ingo Molnarc4e05112006-07-03 00:24:29 -07001184EXPORT_SYMBOL(downgrade_write);
Ingo Molnar4ea21762006-07-03 00:24:53 -07001185
1186#ifdef CONFIG_DEBUG_LOCK_ALLOC
1187
1188void down_read_nested(struct rw_semaphore *sem, int subclass)
1189{
1190 might_sleep();
1191 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
Peter Zijlstra4fe87742007-07-19 01:48:58 -07001192 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
Ingo Molnar4ea21762006-07-03 00:24:53 -07001193}
Ingo Molnar4ea21762006-07-03 00:24:53 -07001194EXPORT_SYMBOL(down_read_nested);
1195
Jiri Kosina1b963c82013-01-11 14:31:56 -08001196void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1197{
1198 might_sleep();
1199 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
Jiri Kosina1b963c82013-01-11 14:31:56 -08001200 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1201}
Jiri Kosina1b963c82013-01-11 14:31:56 -08001202EXPORT_SYMBOL(_down_write_nest_lock);
1203
Kent Overstreet84759c62011-09-21 21:43:05 -07001204void down_read_non_owner(struct rw_semaphore *sem)
1205{
1206 might_sleep();
Kent Overstreet84759c62011-09-21 21:43:05 -07001207 __down_read(sem);
Waiman Long925b9cd2018-09-06 16:18:34 -04001208 __rwsem_set_reader_owned(sem, NULL);
Kent Overstreet84759c62011-09-21 21:43:05 -07001209}
Kent Overstreet84759c62011-09-21 21:43:05 -07001210EXPORT_SYMBOL(down_read_non_owner);
1211
Ingo Molnar4ea21762006-07-03 00:24:53 -07001212void down_write_nested(struct rw_semaphore *sem, int subclass)
1213{
1214 might_sleep();
1215 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
Peter Zijlstra4fe87742007-07-19 01:48:58 -07001216 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
Ingo Molnar4ea21762006-07-03 00:24:53 -07001217}
Ingo Molnar4ea21762006-07-03 00:24:53 -07001218EXPORT_SYMBOL(down_write_nested);
1219
Al Viro887bddf2016-05-26 00:04:58 -04001220int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1221{
1222 might_sleep();
1223 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1224
Waiman Long6cef7ff62019-05-20 16:59:04 -04001225 if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1226 __down_write_killable)) {
Al Viro887bddf2016-05-26 00:04:58 -04001227 rwsem_release(&sem->dep_map, 1, _RET_IP_);
1228 return -EINTR;
1229 }
1230
Al Viro887bddf2016-05-26 00:04:58 -04001231 return 0;
1232}
Al Viro887bddf2016-05-26 00:04:58 -04001233EXPORT_SYMBOL(down_write_killable_nested);
1234
Kent Overstreet84759c62011-09-21 21:43:05 -07001235void up_read_non_owner(struct rw_semaphore *sem)
1236{
Waiman Long3b4ba662019-04-04 13:43:15 -04001237 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
1238 sem);
Kent Overstreet84759c62011-09-21 21:43:05 -07001239 __up_read(sem);
1240}
Kent Overstreet84759c62011-09-21 21:43:05 -07001241EXPORT_SYMBOL(up_read_non_owner);
1242
Ingo Molnar4ea21762006-07-03 00:24:53 -07001243#endif