Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 2 | #ifndef _LINUX_SWAIT_H |
| 3 | #define _LINUX_SWAIT_H |
| 4 | |
| 5 | #include <linux/list.h> |
| 6 | #include <linux/stddef.h> |
| 7 | #include <linux/spinlock.h> |
Sebastian Andrzej Siewior | a59a68f | 2018-05-04 12:42:24 +0200 | [diff] [blame] | 8 | #include <linux/wait.h> |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 9 | #include <asm/current.h> |
| 10 | |
| 11 | /* |
Linus Torvalds | c5e7a7e | 2018-06-04 12:01:15 -0700 | [diff] [blame] | 12 | * BROKEN wait-queues. |
| 13 | * |
| 14 | * These "simple" wait-queues are broken garbage, and should never be |
| 15 | * used. The comments below claim that they are "similar" to regular |
| 16 | * wait-queues, but the semantics are actually completely different, and |
| 17 | * every single user we have ever had has been buggy (or pointless). |
| 18 | * |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 19 | * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what |
Linus Torvalds | c5e7a7e | 2018-06-04 12:01:15 -0700 | [diff] [blame] | 20 | * "wake_up()" does, and has led to problems. In other cases, it has |
| 21 | * been fine, because there's only ever one waiter (kvm), but in that |
| 22 | * case gthe whole "simple" wait-queue is just pointless to begin with, |
| 23 | * since there is no "queue". Use "wake_up_process()" with a direct |
| 24 | * pointer instead. |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 25 | * |
Davidlohr Bueso | 88796e7 | 2017-10-20 10:13:46 -0700 | [diff] [blame] | 26 | * While these are very similar to regular wait queues (wait.h) the most |
| 27 | * important difference is that the simple waitqueue allows for deterministic |
| 28 | * behaviour -- IOW it has strictly bounded IRQ and lock hold times. |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 29 | * |
Davidlohr Bueso | 88796e7 | 2017-10-20 10:13:46 -0700 | [diff] [blame] | 30 | * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all |
| 31 | * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher |
| 32 | * priority task a chance to run. |
| 33 | * |
| 34 | * Secondly, we had to drop a fair number of features of the other waitqueue |
| 35 | * code; notably: |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 36 | * |
| 37 | * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; |
| 38 | * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right |
| 39 | * sleeper state. |
| 40 | * |
Peter Zijlstra | 0abf17b | 2018-06-12 10:34:51 +0200 | [diff] [blame] | 41 | * - the !exclusive mode; because that leads to O(n) wakeups, everything is |
| 42 | * exclusive. |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 43 | * |
Davidlohr Bueso | 88796e7 | 2017-10-20 10:13:46 -0700 | [diff] [blame] | 44 | * - custom wake callback functions; because you cannot give any guarantees |
| 45 | * about random code. This also allows swait to be used in RT, such that |
| 46 | * raw spinlock can be used for the swait queue head. |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 47 | * |
Davidlohr Bueso | 88796e7 | 2017-10-20 10:13:46 -0700 | [diff] [blame] | 48 | * As a side effect of these; the data structures are slimmer albeit more ad-hoc. |
| 49 | * For all the above, note that simple wait queues should _only_ be used under |
| 50 | * very specific realtime constraints -- it is best to stick with the regular |
| 51 | * wait queues in most cases. |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 52 | */ |
| 53 | |
| 54 | struct task_struct; |
| 55 | |
| 56 | struct swait_queue_head { |
| 57 | raw_spinlock_t lock; |
| 58 | struct list_head task_list; |
| 59 | }; |
| 60 | |
| 61 | struct swait_queue { |
| 62 | struct task_struct *task; |
| 63 | struct list_head task_list; |
| 64 | }; |
| 65 | |
| 66 | #define __SWAITQUEUE_INITIALIZER(name) { \ |
| 67 | .task = current, \ |
| 68 | .task_list = LIST_HEAD_INIT((name).task_list), \ |
| 69 | } |
| 70 | |
| 71 | #define DECLARE_SWAITQUEUE(name) \ |
| 72 | struct swait_queue name = __SWAITQUEUE_INITIALIZER(name) |
| 73 | |
| 74 | #define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \ |
| 75 | .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ |
| 76 | .task_list = LIST_HEAD_INIT((name).task_list), \ |
| 77 | } |
| 78 | |
| 79 | #define DECLARE_SWAIT_QUEUE_HEAD(name) \ |
| 80 | struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name) |
| 81 | |
| 82 | extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name, |
| 83 | struct lock_class_key *key); |
| 84 | |
| 85 | #define init_swait_queue_head(q) \ |
| 86 | do { \ |
| 87 | static struct lock_class_key __key; \ |
| 88 | __init_swait_queue_head((q), #q, &__key); \ |
| 89 | } while (0) |
| 90 | |
| 91 | #ifdef CONFIG_LOCKDEP |
| 92 | # define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ |
| 93 | ({ init_swait_queue_head(&name); name; }) |
| 94 | # define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ |
| 95 | struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) |
| 96 | #else |
| 97 | # define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ |
| 98 | DECLARE_SWAIT_QUEUE_HEAD(name) |
| 99 | #endif |
| 100 | |
Davidlohr Bueso | 8cd641e | 2017-09-13 13:08:18 -0700 | [diff] [blame] | 101 | /** |
| 102 | * swait_active -- locklessly test for waiters on the queue |
| 103 | * @wq: the waitqueue to test for waiters |
| 104 | * |
| 105 | * returns true if the wait list is not empty |
| 106 | * |
| 107 | * NOTE: this function is lockless and requires care, incorrect usage _will_ |
| 108 | * lead to sporadic and non-obvious failure. |
| 109 | * |
| 110 | * NOTE2: this function has the same above implications as regular waitqueues. |
| 111 | * |
| 112 | * Use either while holding swait_queue_head::lock or when used for wakeups |
| 113 | * with an extra smp_mb() like: |
| 114 | * |
| 115 | * CPU0 - waker CPU1 - waiter |
| 116 | * |
| 117 | * for (;;) { |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 118 | * @cond = true; prepare_to_swait_exclusive(&wq_head, &wait, state); |
Davidlohr Bueso | 8cd641e | 2017-09-13 13:08:18 -0700 | [diff] [blame] | 119 | * smp_mb(); // smp_mb() from set_current_state() |
| 120 | * if (swait_active(wq_head)) if (@cond) |
| 121 | * wake_up(wq_head); break; |
| 122 | * schedule(); |
| 123 | * } |
| 124 | * finish_swait(&wq_head, &wait); |
| 125 | * |
| 126 | * Because without the explicit smp_mb() it's possible for the |
| 127 | * swait_active() load to get hoisted over the @cond store such that we'll |
| 128 | * observe an empty wait list while the waiter might not observe @cond. |
| 129 | * This, in turn, can trigger missing wakeups. |
| 130 | * |
| 131 | * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), |
| 132 | * which (when the lock is uncontended) are of roughly equal cost. |
| 133 | */ |
| 134 | static inline int swait_active(struct swait_queue_head *wq) |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 135 | { |
Davidlohr Bueso | 8cd641e | 2017-09-13 13:08:18 -0700 | [diff] [blame] | 136 | return !list_empty(&wq->task_list); |
| 137 | } |
| 138 | |
| 139 | /** |
| 140 | * swq_has_sleeper - check if there are any waiting processes |
| 141 | * @wq: the waitqueue to test for waiters |
| 142 | * |
| 143 | * Returns true if @wq has waiting processes |
| 144 | * |
| 145 | * Please refer to the comment for swait_active. |
| 146 | */ |
| 147 | static inline bool swq_has_sleeper(struct swait_queue_head *wq) |
| 148 | { |
| 149 | /* |
| 150 | * We need to be sure we are in sync with the list_add() |
| 151 | * modifications to the wait queue (task_list). |
| 152 | * |
| 153 | * This memory barrier should be paired with one on the |
| 154 | * waiting side. |
| 155 | */ |
| 156 | smp_mb(); |
| 157 | return swait_active(wq); |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 158 | } |
| 159 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 160 | extern void swake_up_one(struct swait_queue_head *q); |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 161 | extern void swake_up_all(struct swait_queue_head *q); |
| 162 | extern void swake_up_locked(struct swait_queue_head *q); |
| 163 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 164 | extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 165 | extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); |
| 166 | |
| 167 | extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); |
| 168 | extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); |
| 169 | |
Peter Zijlstra | 0abf17b | 2018-06-12 10:34:51 +0200 | [diff] [blame] | 170 | /* as per ___wait_event() but for swait, therefore "exclusive == 1" */ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 171 | #define ___swait_event(wq, condition, state, ret, cmd) \ |
| 172 | ({ \ |
Peter Zijlstra | 0abf17b | 2018-06-12 10:34:51 +0200 | [diff] [blame] | 173 | __label__ __out; \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 174 | struct swait_queue __wait; \ |
| 175 | long __ret = ret; \ |
| 176 | \ |
| 177 | INIT_LIST_HEAD(&__wait.task_list); \ |
| 178 | for (;;) { \ |
| 179 | long __int = prepare_to_swait_event(&wq, &__wait, state);\ |
| 180 | \ |
| 181 | if (condition) \ |
| 182 | break; \ |
| 183 | \ |
| 184 | if (___wait_is_interruptible(state) && __int) { \ |
| 185 | __ret = __int; \ |
Peter Zijlstra | 0abf17b | 2018-06-12 10:34:51 +0200 | [diff] [blame] | 186 | goto __out; \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 187 | } \ |
| 188 | \ |
| 189 | cmd; \ |
| 190 | } \ |
| 191 | finish_swait(&wq, &__wait); \ |
Peter Zijlstra | 0abf17b | 2018-06-12 10:34:51 +0200 | [diff] [blame] | 192 | __out: __ret; \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 193 | }) |
| 194 | |
| 195 | #define __swait_event(wq, condition) \ |
| 196 | (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ |
| 197 | schedule()) |
| 198 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 199 | #define swait_event_exclusive(wq, condition) \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 200 | do { \ |
| 201 | if (condition) \ |
| 202 | break; \ |
| 203 | __swait_event(wq, condition); \ |
| 204 | } while (0) |
| 205 | |
| 206 | #define __swait_event_timeout(wq, condition, timeout) \ |
| 207 | ___swait_event(wq, ___wait_cond_timeout(condition), \ |
| 208 | TASK_UNINTERRUPTIBLE, timeout, \ |
| 209 | __ret = schedule_timeout(__ret)) |
| 210 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 211 | #define swait_event_timeout_exclusive(wq, condition, timeout) \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 212 | ({ \ |
| 213 | long __ret = timeout; \ |
| 214 | if (!___wait_cond_timeout(condition)) \ |
| 215 | __ret = __swait_event_timeout(wq, condition, timeout); \ |
| 216 | __ret; \ |
| 217 | }) |
| 218 | |
| 219 | #define __swait_event_interruptible(wq, condition) \ |
| 220 | ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ |
| 221 | schedule()) |
| 222 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 223 | #define swait_event_interruptible_exclusive(wq, condition) \ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 224 | ({ \ |
| 225 | int __ret = 0; \ |
| 226 | if (!(condition)) \ |
| 227 | __ret = __swait_event_interruptible(wq, condition); \ |
| 228 | __ret; \ |
| 229 | }) |
| 230 | |
| 231 | #define __swait_event_interruptible_timeout(wq, condition, timeout) \ |
| 232 | ___swait_event(wq, ___wait_cond_timeout(condition), \ |
| 233 | TASK_INTERRUPTIBLE, timeout, \ |
| 234 | __ret = schedule_timeout(__ret)) |
| 235 | |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 236 | #define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\ |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 237 | ({ \ |
| 238 | long __ret = timeout; \ |
| 239 | if (!___wait_cond_timeout(condition)) \ |
| 240 | __ret = __swait_event_interruptible_timeout(wq, \ |
| 241 | condition, timeout); \ |
| 242 | __ret; \ |
| 243 | }) |
| 244 | |
Luis R. Rodriguez | 352eee1 | 2017-06-20 14:45:46 -0700 | [diff] [blame] | 245 | #define __swait_event_idle(wq, condition) \ |
| 246 | (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) |
| 247 | |
| 248 | /** |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 249 | * swait_event_idle_exclusive - wait without system load contribution |
Luis R. Rodriguez | 352eee1 | 2017-06-20 14:45:46 -0700 | [diff] [blame] | 250 | * @wq: the waitqueue to wait on |
| 251 | * @condition: a C expression for the event to wait for |
| 252 | * |
| 253 | * The process is put to sleep (TASK_IDLE) until the @condition evaluates to |
| 254 | * true. The @condition is checked each time the waitqueue @wq is woken up. |
| 255 | * |
| 256 | * This function is mostly used when a kthread or workqueue waits for some |
| 257 | * condition and doesn't want to contribute to system load. Signals are |
| 258 | * ignored. |
| 259 | */ |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 260 | #define swait_event_idle_exclusive(wq, condition) \ |
Luis R. Rodriguez | 352eee1 | 2017-06-20 14:45:46 -0700 | [diff] [blame] | 261 | do { \ |
| 262 | if (condition) \ |
| 263 | break; \ |
| 264 | __swait_event_idle(wq, condition); \ |
| 265 | } while (0) |
| 266 | |
| 267 | #define __swait_event_idle_timeout(wq, condition, timeout) \ |
| 268 | ___swait_event(wq, ___wait_cond_timeout(condition), \ |
| 269 | TASK_IDLE, timeout, \ |
| 270 | __ret = schedule_timeout(__ret)) |
| 271 | |
| 272 | /** |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 273 | * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution |
Luis R. Rodriguez | 352eee1 | 2017-06-20 14:45:46 -0700 | [diff] [blame] | 274 | * @wq: the waitqueue to wait on |
| 275 | * @condition: a C expression for the event to wait for |
| 276 | * @timeout: timeout at which we'll give up in jiffies |
| 277 | * |
| 278 | * The process is put to sleep (TASK_IDLE) until the @condition evaluates to |
| 279 | * true. The @condition is checked each time the waitqueue @wq is woken up. |
| 280 | * |
| 281 | * This function is mostly used when a kthread or workqueue waits for some |
| 282 | * condition and doesn't want to contribute to system load. Signals are |
| 283 | * ignored. |
| 284 | * |
| 285 | * Returns: |
| 286 | * 0 if the @condition evaluated to %false after the @timeout elapsed, |
| 287 | * 1 if the @condition evaluated to %true after the @timeout elapsed, |
| 288 | * or the remaining jiffies (at least 1) if the @condition evaluated |
| 289 | * to %true before the @timeout elapsed. |
| 290 | */ |
Peter Zijlstra | b3dae10 | 2018-06-12 10:34:52 +0200 | [diff] [blame] | 291 | #define swait_event_idle_timeout_exclusive(wq, condition, timeout) \ |
Luis R. Rodriguez | 352eee1 | 2017-06-20 14:45:46 -0700 | [diff] [blame] | 292 | ({ \ |
| 293 | long __ret = timeout; \ |
| 294 | if (!___wait_cond_timeout(condition)) \ |
| 295 | __ret = __swait_event_idle_timeout(wq, \ |
| 296 | condition, timeout); \ |
| 297 | __ret; \ |
| 298 | }) |
| 299 | |
Peter Zijlstra (Intel) | 13b3568 | 2016-02-19 09:46:37 +0100 | [diff] [blame] | 300 | #endif /* _LINUX_SWAIT_H */ |