blob: 155fe38756ecfda251f26fa8616a325dddd8d455 [file] [log] [blame]
Omar Sandoval88459642016-09-17 08:38:44 -06001/*
2 * Copyright (C) 2016 Facebook
3 * Copyright (C) 2013-2014 Jens Axboe
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
Ingo Molnaraf8601a2017-02-03 09:57:00 +010018#include <linux/sched.h>
Omar Sandoval98d95412016-09-17 01:28:25 -070019#include <linux/random.h>
Omar Sandoval88459642016-09-17 08:38:44 -060020#include <linux/sbitmap.h>
Omar Sandoval24af1ccf2017-01-25 14:32:13 -080021#include <linux/seq_file.h>
Omar Sandoval88459642016-09-17 08:38:44 -060022
Jens Axboeb2dbff12018-12-11 18:39:41 -070023/*
24 * See if we have deferred clears that we can batch move
25 */
26static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
27{
28 unsigned long mask, val;
Jens Axboeb2dbff12018-12-11 18:39:41 -070029 bool ret = false;
Ming Leife76fc62019-01-15 11:59:52 +080030 unsigned long flags;
Jens Axboeb2dbff12018-12-11 18:39:41 -070031
Ming Leife76fc62019-01-15 11:59:52 +080032 spin_lock_irqsave(&sb->map[index].swap_lock, flags);
Jens Axboeb2dbff12018-12-11 18:39:41 -070033
34 if (!sb->map[index].cleared)
35 goto out_unlock;
36
37 /*
38 * First get a stable cleared mask, setting the old mask to 0.
39 */
40 do {
41 mask = sb->map[index].cleared;
42 } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask);
43
44 /*
45 * Now clear the masked bits in our free word
46 */
47 do {
48 val = sb->map[index].word;
49 } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
50
51 ret = true;
52out_unlock:
Ming Leife76fc62019-01-15 11:59:52 +080053 spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
Jens Axboeb2dbff12018-12-11 18:39:41 -070054 return ret;
55}
56
Omar Sandoval88459642016-09-17 08:38:44 -060057int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
58 gfp_t flags, int node)
59{
60 unsigned int bits_per_word;
61 unsigned int i;
62
63 if (shift < 0) {
64 shift = ilog2(BITS_PER_LONG);
65 /*
66 * If the bitmap is small, shrink the number of bits per word so
67 * we spread over a few cachelines, at least. If less than 4
68 * bits, just forget about it, it's not going to work optimally
69 * anyway.
70 */
71 if (depth >= 4) {
72 while ((4U << shift) > depth)
73 shift--;
74 }
75 }
76 bits_per_word = 1U << shift;
77 if (bits_per_word > BITS_PER_LONG)
78 return -EINVAL;
79
80 sb->shift = shift;
81 sb->depth = depth;
82 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
83
84 if (depth == 0) {
85 sb->map = NULL;
86 return 0;
87 }
88
Kees Cook590b5b72018-06-12 14:04:20 -070089 sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
Omar Sandoval88459642016-09-17 08:38:44 -060090 if (!sb->map)
91 return -ENOMEM;
92
93 for (i = 0; i < sb->map_nr; i++) {
94 sb->map[i].depth = min(depth, bits_per_word);
95 depth -= sb->map[i].depth;
Jens Axboeea86ea22018-11-30 13:18:06 -070096 spin_lock_init(&sb->map[i].swap_lock);
Omar Sandoval88459642016-09-17 08:38:44 -060097 }
98 return 0;
99}
100EXPORT_SYMBOL_GPL(sbitmap_init_node);
101
102void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
103{
104 unsigned int bits_per_word = 1U << sb->shift;
105 unsigned int i;
106
Jens Axboeb2dbff12018-12-11 18:39:41 -0700107 for (i = 0; i < sb->map_nr; i++)
108 sbitmap_deferred_clear(sb, i);
109
Omar Sandoval88459642016-09-17 08:38:44 -0600110 sb->depth = depth;
111 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
112
113 for (i = 0; i < sb->map_nr; i++) {
114 sb->map[i].depth = min(depth, bits_per_word);
115 depth -= sb->map[i].depth;
116 }
117}
118EXPORT_SYMBOL_GPL(sbitmap_resize);
119
Omar Sandovalc05e6672017-04-14 00:59:58 -0700120static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
121 unsigned int hint, bool wrap)
Omar Sandoval88459642016-09-17 08:38:44 -0600122{
123 unsigned int orig_hint = hint;
124 int nr;
125
126 while (1) {
Omar Sandovalc05e6672017-04-14 00:59:58 -0700127 nr = find_next_zero_bit(word, depth, hint);
128 if (unlikely(nr >= depth)) {
Omar Sandoval88459642016-09-17 08:38:44 -0600129 /*
130 * We started with an offset, and we didn't reset the
131 * offset to 0 in a failure case, so start from 0 to
132 * exhaust the map.
133 */
134 if (orig_hint && hint && wrap) {
135 hint = orig_hint = 0;
136 continue;
137 }
138 return -1;
139 }
140
Omar Sandoval4ace53f2018-02-27 16:56:43 -0800141 if (!test_and_set_bit_lock(nr, word))
Omar Sandoval88459642016-09-17 08:38:44 -0600142 break;
143
144 hint = nr + 1;
Omar Sandovalc05e6672017-04-14 00:59:58 -0700145 if (hint >= depth - 1)
Omar Sandoval88459642016-09-17 08:38:44 -0600146 hint = 0;
147 }
148
149 return nr;
150}
151
Jens Axboeea86ea22018-11-30 13:18:06 -0700152static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
153 unsigned int alloc_hint, bool round_robin)
154{
155 int nr;
156
157 do {
158 nr = __sbitmap_get_word(&sb->map[index].word,
159 sb->map[index].depth, alloc_hint,
160 !round_robin);
161 if (nr != -1)
162 break;
163 if (!sbitmap_deferred_clear(sb, index))
164 break;
165 } while (1);
166
167 return nr;
168}
169
Omar Sandoval88459642016-09-17 08:38:44 -0600170int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
171{
172 unsigned int i, index;
173 int nr = -1;
174
175 index = SB_NR_TO_INDEX(sb, alloc_hint);
176
Jens Axboe27fae422018-11-29 12:35:16 -0700177 /*
178 * Unless we're doing round robin tag allocation, just use the
179 * alloc_hint to find the right word index. No point in looping
180 * twice in find_next_zero_bit() for that case.
181 */
182 if (round_robin)
183 alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
184 else
185 alloc_hint = 0;
186
Omar Sandoval88459642016-09-17 08:38:44 -0600187 for (i = 0; i < sb->map_nr; i++) {
Jens Axboeea86ea22018-11-30 13:18:06 -0700188 nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
189 round_robin);
Omar Sandoval88459642016-09-17 08:38:44 -0600190 if (nr != -1) {
191 nr += index << sb->shift;
192 break;
193 }
194
195 /* Jump to next index. */
Jens Axboe27fae422018-11-29 12:35:16 -0700196 alloc_hint = 0;
197 if (++index >= sb->map_nr)
Omar Sandoval88459642016-09-17 08:38:44 -0600198 index = 0;
Omar Sandoval88459642016-09-17 08:38:44 -0600199 }
200
201 return nr;
202}
203EXPORT_SYMBOL_GPL(sbitmap_get);
204
Omar Sandovalc05e6672017-04-14 00:59:58 -0700205int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
206 unsigned long shallow_depth)
207{
208 unsigned int i, index;
209 int nr = -1;
210
211 index = SB_NR_TO_INDEX(sb, alloc_hint);
212
213 for (i = 0; i < sb->map_nr; i++) {
Jens Axboeb2dbff12018-12-11 18:39:41 -0700214again:
Omar Sandovalc05e6672017-04-14 00:59:58 -0700215 nr = __sbitmap_get_word(&sb->map[index].word,
216 min(sb->map[index].depth, shallow_depth),
217 SB_NR_TO_BIT(sb, alloc_hint), true);
218 if (nr != -1) {
219 nr += index << sb->shift;
220 break;
221 }
222
Jens Axboeb2dbff12018-12-11 18:39:41 -0700223 if (sbitmap_deferred_clear(sb, index))
224 goto again;
225
Omar Sandovalc05e6672017-04-14 00:59:58 -0700226 /* Jump to next index. */
227 index++;
228 alloc_hint = index << sb->shift;
229
230 if (index >= sb->map_nr) {
231 index = 0;
232 alloc_hint = 0;
233 }
234 }
235
236 return nr;
237}
238EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
239
Omar Sandoval88459642016-09-17 08:38:44 -0600240bool sbitmap_any_bit_set(const struct sbitmap *sb)
241{
242 unsigned int i;
243
244 for (i = 0; i < sb->map_nr; i++) {
Jens Axboeb2dbff12018-12-11 18:39:41 -0700245 if (sb->map[i].word & ~sb->map[i].cleared)
Omar Sandoval88459642016-09-17 08:38:44 -0600246 return true;
247 }
248 return false;
249}
250EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
251
252bool sbitmap_any_bit_clear(const struct sbitmap *sb)
253{
254 unsigned int i;
255
256 for (i = 0; i < sb->map_nr; i++) {
257 const struct sbitmap_word *word = &sb->map[i];
Jens Axboeb2dbff12018-12-11 18:39:41 -0700258 unsigned long mask = word->word & ~word->cleared;
Omar Sandoval88459642016-09-17 08:38:44 -0600259 unsigned long ret;
260
Jens Axboeb2dbff12018-12-11 18:39:41 -0700261 ret = find_first_zero_bit(&mask, word->depth);
Omar Sandoval88459642016-09-17 08:38:44 -0600262 if (ret < word->depth)
263 return true;
264 }
265 return false;
266}
267EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
268
Jens Axboeea86ea22018-11-30 13:18:06 -0700269static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
Omar Sandoval88459642016-09-17 08:38:44 -0600270{
Colin Ian King60658e02016-09-19 14:34:08 +0100271 unsigned int i, weight = 0;
Omar Sandoval88459642016-09-17 08:38:44 -0600272
273 for (i = 0; i < sb->map_nr; i++) {
274 const struct sbitmap_word *word = &sb->map[i];
275
Jens Axboeea86ea22018-11-30 13:18:06 -0700276 if (set)
277 weight += bitmap_weight(&word->word, word->depth);
278 else
279 weight += bitmap_weight(&word->cleared, word->depth);
Omar Sandoval88459642016-09-17 08:38:44 -0600280 }
281 return weight;
282}
Jens Axboeea86ea22018-11-30 13:18:06 -0700283
284static unsigned int sbitmap_weight(const struct sbitmap *sb)
285{
286 return __sbitmap_weight(sb, true);
287}
288
289static unsigned int sbitmap_cleared(const struct sbitmap *sb)
290{
291 return __sbitmap_weight(sb, false);
292}
Omar Sandoval88459642016-09-17 08:38:44 -0600293
Omar Sandoval24af1ccf2017-01-25 14:32:13 -0800294void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
295{
296 seq_printf(m, "depth=%u\n", sb->depth);
Jens Axboeea86ea22018-11-30 13:18:06 -0700297 seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
298 seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
Omar Sandoval24af1ccf2017-01-25 14:32:13 -0800299 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
300 seq_printf(m, "map_nr=%u\n", sb->map_nr);
301}
302EXPORT_SYMBOL_GPL(sbitmap_show);
303
304static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
305{
306 if ((offset & 0xf) == 0) {
307 if (offset != 0)
308 seq_putc(m, '\n');
309 seq_printf(m, "%08x:", offset);
310 }
311 if ((offset & 0x1) == 0)
312 seq_putc(m, ' ');
313 seq_printf(m, "%02x", byte);
314}
315
316void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
317{
318 u8 byte = 0;
319 unsigned int byte_bits = 0;
320 unsigned int offset = 0;
321 int i;
322
323 for (i = 0; i < sb->map_nr; i++) {
324 unsigned long word = READ_ONCE(sb->map[i].word);
325 unsigned int word_bits = READ_ONCE(sb->map[i].depth);
326
327 while (word_bits > 0) {
328 unsigned int bits = min(8 - byte_bits, word_bits);
329
330 byte |= (word & (BIT(bits) - 1)) << byte_bits;
331 byte_bits += bits;
332 if (byte_bits == 8) {
333 emit_byte(m, offset, byte);
334 byte = 0;
335 byte_bits = 0;
336 offset++;
337 }
338 word >>= bits;
339 word_bits -= bits;
340 }
341 }
342 if (byte_bits) {
343 emit_byte(m, offset, byte);
344 offset++;
345 }
346 if (offset)
347 seq_putc(m, '\n');
348}
349EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
350
Omar Sandovala3275532018-05-09 17:16:31 -0700351static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
352 unsigned int depth)
Omar Sandoval88459642016-09-17 08:38:44 -0600353{
354 unsigned int wake_batch;
Omar Sandovala3275532018-05-09 17:16:31 -0700355 unsigned int shallow_depth;
Omar Sandoval88459642016-09-17 08:38:44 -0600356
357 /*
358 * For each batch, we wake up one queue. We need to make sure that our
Omar Sandovala3275532018-05-09 17:16:31 -0700359 * batch size is small enough that the full depth of the bitmap,
360 * potentially limited by a shallow depth, is enough to wake up all of
361 * the queues.
362 *
363 * Each full word of the bitmap has bits_per_word bits, and there might
364 * be a partial word. There are depth / bits_per_word full words and
365 * depth % bits_per_word bits left over. In bitwise arithmetic:
366 *
367 * bits_per_word = 1 << shift
368 * depth / bits_per_word = depth >> shift
369 * depth % bits_per_word = depth & ((1 << shift) - 1)
370 *
371 * Each word can be limited to sbq->min_shallow_depth bits.
Omar Sandoval88459642016-09-17 08:38:44 -0600372 */
Omar Sandovala3275532018-05-09 17:16:31 -0700373 shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
374 depth = ((depth >> sbq->sb.shift) * shallow_depth +
375 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
376 wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
377 SBQ_WAKE_BATCH);
Omar Sandoval88459642016-09-17 08:38:44 -0600378
379 return wake_batch;
380}
381
382int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700383 int shift, bool round_robin, gfp_t flags, int node)
Omar Sandoval88459642016-09-17 08:38:44 -0600384{
385 int ret;
386 int i;
387
388 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
389 if (ret)
390 return ret;
391
Omar Sandoval40aabb62016-09-17 01:28:23 -0700392 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
393 if (!sbq->alloc_hint) {
394 sbitmap_free(&sbq->sb);
395 return -ENOMEM;
396 }
397
Omar Sandoval98d95412016-09-17 01:28:25 -0700398 if (depth && !round_robin) {
399 for_each_possible_cpu(i)
400 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
401 }
402
Omar Sandovala3275532018-05-09 17:16:31 -0700403 sbq->min_shallow_depth = UINT_MAX;
404 sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
Omar Sandoval88459642016-09-17 08:38:44 -0600405 atomic_set(&sbq->wake_index, 0);
Jens Axboe5d2ee712018-11-29 17:36:41 -0700406 atomic_set(&sbq->ws_active, 0);
Omar Sandoval88459642016-09-17 08:38:44 -0600407
Omar Sandoval48e28162016-09-17 01:28:22 -0700408 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
Omar Sandoval88459642016-09-17 08:38:44 -0600409 if (!sbq->ws) {
Omar Sandoval40aabb62016-09-17 01:28:23 -0700410 free_percpu(sbq->alloc_hint);
Omar Sandoval88459642016-09-17 08:38:44 -0600411 sbitmap_free(&sbq->sb);
412 return -ENOMEM;
413 }
414
415 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
416 init_waitqueue_head(&sbq->ws[i].wait);
417 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
418 }
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700419
420 sbq->round_robin = round_robin;
Omar Sandoval88459642016-09-17 08:38:44 -0600421 return 0;
422}
423EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
424
Omar Sandovala3275532018-05-09 17:16:31 -0700425static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
426 unsigned int depth)
Omar Sandoval88459642016-09-17 08:38:44 -0600427{
Omar Sandovala3275532018-05-09 17:16:31 -0700428 unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800429 int i;
430
431 if (sbq->wake_batch != wake_batch) {
432 WRITE_ONCE(sbq->wake_batch, wake_batch);
433 /*
Ming Leie6fc4642018-05-24 11:00:39 -0600434 * Pairs with the memory barrier in sbitmap_queue_wake_up()
435 * to ensure that the batch size is updated before the wait
436 * counts.
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800437 */
438 smp_mb__before_atomic();
439 for (i = 0; i < SBQ_WAIT_QUEUES; i++)
440 atomic_set(&sbq->ws[i].wait_cnt, 1);
441 }
Omar Sandovala3275532018-05-09 17:16:31 -0700442}
443
444void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
445{
446 sbitmap_queue_update_wake_batch(sbq, depth);
Omar Sandoval88459642016-09-17 08:38:44 -0600447 sbitmap_resize(&sbq->sb, depth);
448}
449EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
450
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700451int __sbitmap_queue_get(struct sbitmap_queue *sbq)
Omar Sandoval40aabb62016-09-17 01:28:23 -0700452{
Omar Sandoval05fd0952016-09-17 01:28:26 -0700453 unsigned int hint, depth;
Omar Sandoval40aabb62016-09-17 01:28:23 -0700454 int nr;
455
456 hint = this_cpu_read(*sbq->alloc_hint);
Omar Sandoval05fd0952016-09-17 01:28:26 -0700457 depth = READ_ONCE(sbq->sb.depth);
458 if (unlikely(hint >= depth)) {
459 hint = depth ? prandom_u32() % depth : 0;
460 this_cpu_write(*sbq->alloc_hint, hint);
461 }
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700462 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
Omar Sandoval40aabb62016-09-17 01:28:23 -0700463
464 if (nr == -1) {
465 /* If the map is full, a hint won't do us much good. */
466 this_cpu_write(*sbq->alloc_hint, 0);
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700467 } else if (nr == hint || unlikely(sbq->round_robin)) {
Omar Sandoval40aabb62016-09-17 01:28:23 -0700468 /* Only update the hint if we used it. */
469 hint = nr + 1;
Omar Sandoval05fd0952016-09-17 01:28:26 -0700470 if (hint >= depth - 1)
Omar Sandoval40aabb62016-09-17 01:28:23 -0700471 hint = 0;
472 this_cpu_write(*sbq->alloc_hint, hint);
473 }
474
475 return nr;
476}
477EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
478
Omar Sandovalc05e6672017-04-14 00:59:58 -0700479int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
480 unsigned int shallow_depth)
481{
482 unsigned int hint, depth;
483 int nr;
484
Omar Sandoval61445b562018-05-09 17:29:24 -0700485 WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
486
Omar Sandovalc05e6672017-04-14 00:59:58 -0700487 hint = this_cpu_read(*sbq->alloc_hint);
488 depth = READ_ONCE(sbq->sb.depth);
489 if (unlikely(hint >= depth)) {
490 hint = depth ? prandom_u32() % depth : 0;
491 this_cpu_write(*sbq->alloc_hint, hint);
492 }
493 nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
494
495 if (nr == -1) {
496 /* If the map is full, a hint won't do us much good. */
497 this_cpu_write(*sbq->alloc_hint, 0);
498 } else if (nr == hint || unlikely(sbq->round_robin)) {
499 /* Only update the hint if we used it. */
500 hint = nr + 1;
501 if (hint >= depth - 1)
502 hint = 0;
503 this_cpu_write(*sbq->alloc_hint, hint);
504 }
505
506 return nr;
507}
508EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
509
Omar Sandovala3275532018-05-09 17:16:31 -0700510void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
511 unsigned int min_shallow_depth)
512{
513 sbq->min_shallow_depth = min_shallow_depth;
514 sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
515}
516EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
517
Omar Sandoval88459642016-09-17 08:38:44 -0600518static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
519{
520 int i, wake_index;
521
Jens Axboe5d2ee712018-11-29 17:36:41 -0700522 if (!atomic_read(&sbq->ws_active))
523 return NULL;
524
Omar Sandoval88459642016-09-17 08:38:44 -0600525 wake_index = atomic_read(&sbq->wake_index);
526 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
527 struct sbq_wait_state *ws = &sbq->ws[wake_index];
528
529 if (waitqueue_active(&ws->wait)) {
530 int o = atomic_read(&sbq->wake_index);
531
532 if (wake_index != o)
533 atomic_cmpxchg(&sbq->wake_index, o, wake_index);
534 return ws;
535 }
536
537 wake_index = sbq_index_inc(wake_index);
538 }
539
540 return NULL;
541}
542
Jens Axboec854ab52018-05-14 12:17:31 -0600543static bool __sbq_wake_up(struct sbitmap_queue *sbq)
Omar Sandoval88459642016-09-17 08:38:44 -0600544{
545 struct sbq_wait_state *ws;
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800546 unsigned int wake_batch;
Omar Sandoval88459642016-09-17 08:38:44 -0600547 int wait_cnt;
548
Omar Sandoval88459642016-09-17 08:38:44 -0600549 ws = sbq_wake_ptr(sbq);
550 if (!ws)
Jens Axboec854ab52018-05-14 12:17:31 -0600551 return false;
Omar Sandoval88459642016-09-17 08:38:44 -0600552
553 wait_cnt = atomic_dec_return(&ws->wait_cnt);
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800554 if (wait_cnt <= 0) {
Jens Axboec854ab52018-05-14 12:17:31 -0600555 int ret;
556
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800557 wake_batch = READ_ONCE(sbq->wake_batch);
Jens Axboec854ab52018-05-14 12:17:31 -0600558
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800559 /*
560 * Pairs with the memory barrier in sbitmap_queue_resize() to
561 * ensure that we see the batch size update before the wait
562 * count is reset.
563 */
564 smp_mb__before_atomic();
Jens Axboec854ab52018-05-14 12:17:31 -0600565
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800566 /*
Jens Axboec854ab52018-05-14 12:17:31 -0600567 * For concurrent callers of this, the one that failed the
568 * atomic_cmpxhcg() race should call this function again
569 * to wakeup a new batch on a different 'ws'.
Omar Sandoval6c0ca7a2017-01-18 11:55:22 -0800570 */
Jens Axboec854ab52018-05-14 12:17:31 -0600571 ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
572 if (ret == wait_cnt) {
573 sbq_index_atomic_inc(&sbq->wake_index);
574 wake_up_nr(&ws->wait, wake_batch);
575 return false;
576 }
577
578 return true;
Omar Sandoval88459642016-09-17 08:38:44 -0600579 }
Jens Axboec854ab52018-05-14 12:17:31 -0600580
581 return false;
582}
583
Ming Leie6fc4642018-05-24 11:00:39 -0600584void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
Jens Axboec854ab52018-05-14 12:17:31 -0600585{
586 while (__sbq_wake_up(sbq))
587 ;
Omar Sandoval88459642016-09-17 08:38:44 -0600588}
Ming Leie6fc4642018-05-24 11:00:39 -0600589EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
Omar Sandoval88459642016-09-17 08:38:44 -0600590
Omar Sandoval40aabb62016-09-17 01:28:23 -0700591void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
Omar Sandovalf4a644d2016-09-17 01:28:24 -0700592 unsigned int cpu)
Omar Sandoval88459642016-09-17 08:38:44 -0600593{
Ming Leie6d1fa52019-03-22 09:13:51 +0800594 /*
595 * Once the clear bit is set, the bit may be allocated out.
596 *
597 * Orders READ/WRITE on the asssociated instance(such as request
598 * of blk_mq) by this bit for avoiding race with re-allocation,
599 * and its pair is the memory barrier implied in __sbitmap_get_word.
600 *
601 * One invariant is that the clear bit has to be zero when the bit
602 * is in use.
603 */
604 smp_mb__before_atomic();
Jens Axboeea86ea22018-11-30 13:18:06 -0700605 sbitmap_deferred_clear_bit(&sbq->sb, nr);
606
Ming Leie6fc4642018-05-24 11:00:39 -0600607 /*
608 * Pairs with the memory barrier in set_current_state() to ensure the
609 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
610 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
611 * waiter. See the comment on waitqueue_active().
612 */
613 smp_mb__after_atomic();
614 sbitmap_queue_wake_up(sbq);
615
Omar Sandoval5c64a8d2016-09-17 12:20:54 -0700616 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
Omar Sandoval40aabb62016-09-17 01:28:23 -0700617 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
Omar Sandoval88459642016-09-17 08:38:44 -0600618}
619EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
620
621void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
622{
623 int i, wake_index;
624
625 /*
Omar Sandovalf66227d2017-01-18 11:55:21 -0800626 * Pairs with the memory barrier in set_current_state() like in
Ming Leie6fc4642018-05-24 11:00:39 -0600627 * sbitmap_queue_wake_up().
Omar Sandoval88459642016-09-17 08:38:44 -0600628 */
629 smp_mb();
630 wake_index = atomic_read(&sbq->wake_index);
631 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
632 struct sbq_wait_state *ws = &sbq->ws[wake_index];
633
634 if (waitqueue_active(&ws->wait))
635 wake_up(&ws->wait);
636
637 wake_index = sbq_index_inc(wake_index);
638 }
639}
640EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
Omar Sandoval24af1ccf2017-01-25 14:32:13 -0800641
642void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
643{
644 bool first;
645 int i;
646
647 sbitmap_show(&sbq->sb, m);
648
649 seq_puts(m, "alloc_hint={");
650 first = true;
651 for_each_possible_cpu(i) {
652 if (!first)
653 seq_puts(m, ", ");
654 first = false;
655 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
656 }
657 seq_puts(m, "}\n");
658
659 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
660 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
Jens Axboe5d2ee712018-11-29 17:36:41 -0700661 seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));
Omar Sandoval24af1ccf2017-01-25 14:32:13 -0800662
663 seq_puts(m, "ws={\n");
664 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
665 struct sbq_wait_state *ws = &sbq->ws[i];
666
667 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
668 atomic_read(&ws->wait_cnt),
669 waitqueue_active(&ws->wait) ? "active" : "inactive");
670 }
671 seq_puts(m, "}\n");
672
673 seq_printf(m, "round_robin=%d\n", sbq->round_robin);
Omar Sandovala3275532018-05-09 17:16:31 -0700674 seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
Omar Sandoval24af1ccf2017-01-25 14:32:13 -0800675}
676EXPORT_SYMBOL_GPL(sbitmap_queue_show);
Jens Axboe5d2ee712018-11-29 17:36:41 -0700677
Jens Axboe9f6b7ef2018-12-20 08:49:00 -0700678void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
679 struct sbq_wait_state *ws,
680 struct sbq_wait *sbq_wait)
681{
682 if (!sbq_wait->sbq) {
683 sbq_wait->sbq = sbq;
684 atomic_inc(&sbq->ws_active);
685 }
686 add_wait_queue(&ws->wait, &sbq_wait->wait);
687}
688EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
689
690void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
691{
692 list_del_init(&sbq_wait->wait.entry);
693 if (sbq_wait->sbq) {
694 atomic_dec(&sbq_wait->sbq->ws_active);
695 sbq_wait->sbq = NULL;
696 }
697}
698EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);
699
Jens Axboe5d2ee712018-11-29 17:36:41 -0700700void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
701 struct sbq_wait_state *ws,
702 struct sbq_wait *sbq_wait, int state)
703{
Jens Axboe9f6b7ef2018-12-20 08:49:00 -0700704 if (!sbq_wait->sbq) {
Jens Axboe5d2ee712018-11-29 17:36:41 -0700705 atomic_inc(&sbq->ws_active);
Jens Axboe9f6b7ef2018-12-20 08:49:00 -0700706 sbq_wait->sbq = sbq;
Jens Axboe5d2ee712018-11-29 17:36:41 -0700707 }
708 prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
709}
710EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
711
712void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
713 struct sbq_wait *sbq_wait)
714{
715 finish_wait(&ws->wait, &sbq_wait->wait);
Jens Axboe9f6b7ef2018-12-20 08:49:00 -0700716 if (sbq_wait->sbq) {
Jens Axboe5d2ee712018-11-29 17:36:41 -0700717 atomic_dec(&sbq->ws_active);
Jens Axboe9f6b7ef2018-12-20 08:49:00 -0700718 sbq_wait->sbq = NULL;
Jens Axboe5d2ee712018-11-29 17:36:41 -0700719 }
720}
721EXPORT_SYMBOL_GPL(sbitmap_finish_wait);