blob: 6f4bdb8209f7d3f2c37af6bea13f7952ec2a3f1d [file] [log] [blame]
Jens Axboebd166ef2017-01-17 06:03:22 -07001/*
2 * blk-mq scheduling framework
3 *
4 * Copyright (C) 2016 Jens Axboe
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/blk-mq.h>
9
10#include <trace/events/block.h>
11
12#include "blk.h"
13#include "blk-mq.h"
Omar Sandovald332ce02017-05-04 08:24:40 -060014#include "blk-mq-debugfs.h"
Jens Axboebd166ef2017-01-17 06:03:22 -070015#include "blk-mq-sched.h"
16#include "blk-mq-tag.h"
17#include "blk-wbt.h"
18
19void blk_mq_sched_free_hctx_data(struct request_queue *q,
20 void (*exit)(struct blk_mq_hw_ctx *))
21{
22 struct blk_mq_hw_ctx *hctx;
23 int i;
24
25 queue_for_each_hw_ctx(q, hctx, i) {
26 if (exit && hctx->sched_data)
27 exit(hctx);
28 kfree(hctx->sched_data);
29 hctx->sched_data = NULL;
30 }
31}
32EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
33
Christoph Hellwig44e8c2b2017-06-16 18:15:25 +020034void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
Jens Axboebd166ef2017-01-17 06:03:22 -070035{
Christoph Hellwig44e8c2b2017-06-16 18:15:25 +020036 struct request_queue *q = rq->q;
37 struct io_context *ioc = rq_ioc(bio);
Jens Axboebd166ef2017-01-17 06:03:22 -070038 struct io_cq *icq;
39
40 spin_lock_irq(q->queue_lock);
41 icq = ioc_lookup_icq(ioc, q);
42 spin_unlock_irq(q->queue_lock);
43
44 if (!icq) {
45 icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
46 if (!icq)
47 return;
48 }
Christoph Hellwigea511e32017-06-16 18:15:20 +020049 get_io_context(icq->ioc);
Christoph Hellwig44e8c2b2017-06-16 18:15:25 +020050 rq->elv.icq = icq;
Jens Axboebd166ef2017-01-17 06:03:22 -070051}
52
Jens Axboe8e8320c2017-06-20 17:56:13 -060053/*
54 * Mark a hardware queue as needing a restart. For shared queues, maintain
55 * a count of how many hardware queues are marked for restart.
56 */
57static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
58{
59 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
60 return;
61
62 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
63 struct request_queue *q = hctx->queue;
64
65 if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
66 atomic_inc(&q->shared_hctx_restart);
67 } else
68 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
69}
70
Jens Axboe05b79412017-11-08 10:38:29 -070071static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
Jens Axboe8e8320c2017-06-20 17:56:13 -060072{
73 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
Jens Axboe05b79412017-11-08 10:38:29 -070074 return false;
Jens Axboe8e8320c2017-06-20 17:56:13 -060075
Jens Axboe05b79412017-11-08 10:38:29 -070076 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
77 struct request_queue *q = hctx->queue;
78
79 if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
80 atomic_dec(&q->shared_hctx_restart);
81 } else
82 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
Jens Axboe8e8320c2017-06-20 17:56:13 -060083
84 if (blk_mq_hctx_has_pending(hctx)) {
85 blk_mq_run_hw_queue(hctx, true);
Jens Axboe05b79412017-11-08 10:38:29 -070086 return true;
Jens Axboe8e8320c2017-06-20 17:56:13 -060087 }
Jens Axboe05b79412017-11-08 10:38:29 -070088
89 return false;
Jens Axboe8e8320c2017-06-20 17:56:13 -060090}
91
Ming Lei1f460b62017-10-27 12:43:30 +080092/*
93 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
94 * its queue by itself in its completion handler, so we don't need to
95 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
96 */
97static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
Ming Leicaf8eb02017-10-14 17:22:26 +080098{
99 struct request_queue *q = hctx->queue;
100 struct elevator_queue *e = q->elevator;
101 LIST_HEAD(rq_list);
102
103 do {
Ming Leide148292017-10-14 17:22:29 +0800104 struct request *rq;
Ming Leicaf8eb02017-10-14 17:22:26 +0800105
Ming Leide148292017-10-14 17:22:29 +0800106 if (e->type->ops.mq.has_work &&
107 !e->type->ops.mq.has_work(hctx))
Ming Leicaf8eb02017-10-14 17:22:26 +0800108 break;
Ming Leide148292017-10-14 17:22:29 +0800109
Ming Lei88022d72017-11-05 02:21:12 +0800110 if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei1f460b62017-10-27 12:43:30 +0800111 break;
Ming Leide148292017-10-14 17:22:29 +0800112
113 rq = e->type->ops.mq.dispatch_request(hctx);
114 if (!rq) {
115 blk_mq_put_dispatch_budget(hctx);
116 break;
Ming Leide148292017-10-14 17:22:29 +0800117 }
118
119 /*
120 * Now this rq owns the budget which has to be released
121 * if this rq won't be queued to driver via .queue_rq()
122 * in blk_mq_dispatch_rq_list().
123 */
Ming Leicaf8eb02017-10-14 17:22:26 +0800124 list_add(&rq->queuelist, &rq_list);
Ming Leide148292017-10-14 17:22:29 +0800125 } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
Ming Leicaf8eb02017-10-14 17:22:26 +0800126}
127
Ming Leib3476892017-10-14 17:22:30 +0800128static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
129 struct blk_mq_ctx *ctx)
130{
131 unsigned idx = ctx->index_hw;
132
133 if (++idx == hctx->nr_ctx)
134 idx = 0;
135
136 return hctx->ctxs[idx];
137}
138
Ming Lei1f460b62017-10-27 12:43:30 +0800139/*
140 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
141 * its queue by itself in its completion handler, so we don't need to
142 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
143 */
144static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
Ming Leib3476892017-10-14 17:22:30 +0800145{
146 struct request_queue *q = hctx->queue;
147 LIST_HEAD(rq_list);
148 struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
149
150 do {
151 struct request *rq;
Ming Leib3476892017-10-14 17:22:30 +0800152
153 if (!sbitmap_any_bit_set(&hctx->ctx_map))
154 break;
155
Ming Lei88022d72017-11-05 02:21:12 +0800156 if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei1f460b62017-10-27 12:43:30 +0800157 break;
Ming Leib3476892017-10-14 17:22:30 +0800158
159 rq = blk_mq_dequeue_from_ctx(hctx, ctx);
160 if (!rq) {
161 blk_mq_put_dispatch_budget(hctx);
162 break;
Ming Leib3476892017-10-14 17:22:30 +0800163 }
164
165 /*
166 * Now this rq owns the budget which has to be released
167 * if this rq won't be queued to driver via .queue_rq()
168 * in blk_mq_dispatch_rq_list().
169 */
170 list_add(&rq->queuelist, &rq_list);
171
172 /* round robin for fair dispatch */
173 ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
174
175 } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
176
177 WRITE_ONCE(hctx->dispatch_from, ctx);
Ming Leib3476892017-10-14 17:22:30 +0800178}
179
Ming Leide148292017-10-14 17:22:29 +0800180/* return true if hw queue need to be run again */
Ming Lei1f460b62017-10-27 12:43:30 +0800181void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
Jens Axboebd166ef2017-01-17 06:03:22 -0700182{
Omar Sandoval81380ca2017-04-07 08:56:26 -0600183 struct request_queue *q = hctx->queue;
184 struct elevator_queue *e = q->elevator;
Jens Axboe64765a72017-02-17 11:39:26 -0700185 const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
Jens Axboebd166ef2017-01-17 06:03:22 -0700186 LIST_HEAD(rq_list);
187
Ming Leif4560ff2017-06-18 14:24:27 -0600188 /* RCU or SRCU read lock is needed before checking quiesced flag */
189 if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
Ming Lei1f460b62017-10-27 12:43:30 +0800190 return;
Jens Axboebd166ef2017-01-17 06:03:22 -0700191
192 hctx->run++;
193
194 /*
195 * If we have previous entries on our dispatch list, grab them first for
196 * more fair dispatch.
197 */
198 if (!list_empty_careful(&hctx->dispatch)) {
199 spin_lock(&hctx->lock);
200 if (!list_empty(&hctx->dispatch))
201 list_splice_init(&hctx->dispatch, &rq_list);
202 spin_unlock(&hctx->lock);
203 }
204
205 /*
206 * Only ask the scheduler for requests, if we didn't have residual
207 * requests from the dispatch list. This is to avoid the case where
208 * we only ever dispatch a fraction of the requests available because
209 * of low device queue depth. Once we pull requests out of the IO
210 * scheduler, we can no longer merge or sort them. So it's best to
211 * leave them there for as long as we can. Mark the hw queue as
212 * needing a restart in that case.
Ming Leicaf8eb02017-10-14 17:22:26 +0800213 *
Ming Lei5e3d02b2017-10-14 17:22:25 +0800214 * We want to dispatch from the scheduler if there was nothing
215 * on the dispatch list or we were able to dispatch from the
216 * dispatch list.
Jens Axboe64765a72017-02-17 11:39:26 -0700217 */
Ming Leicaf8eb02017-10-14 17:22:26 +0800218 if (!list_empty(&rq_list)) {
219 blk_mq_sched_mark_restart_hctx(hctx);
Ming Leib3476892017-10-14 17:22:30 +0800220 if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
221 if (has_sched_dispatch)
Ming Lei1f460b62017-10-27 12:43:30 +0800222 blk_mq_do_dispatch_sched(hctx);
Ming Leib3476892017-10-14 17:22:30 +0800223 else
Ming Lei1f460b62017-10-27 12:43:30 +0800224 blk_mq_do_dispatch_ctx(hctx);
Ming Leib3476892017-10-14 17:22:30 +0800225 }
Ming Leicaf8eb02017-10-14 17:22:26 +0800226 } else if (has_sched_dispatch) {
Ming Lei1f460b62017-10-27 12:43:30 +0800227 blk_mq_do_dispatch_sched(hctx);
Ming Leib3476892017-10-14 17:22:30 +0800228 } else if (q->mq_ops->get_budget) {
229 /*
230 * If we need to get budget before queuing request, we
231 * dequeue request one by one from sw queue for avoiding
232 * to mess up I/O merge when dispatch runs out of resource.
233 *
234 * TODO: get more budgets, and dequeue more requests in
235 * one time.
236 */
Ming Lei1f460b62017-10-27 12:43:30 +0800237 blk_mq_do_dispatch_ctx(hctx);
Ming Leicaf8eb02017-10-14 17:22:26 +0800238 } else {
239 blk_mq_flush_busy_ctxs(hctx, &rq_list);
Ming Leide148292017-10-14 17:22:29 +0800240 blk_mq_dispatch_rq_list(q, &rq_list, false);
Jens Axboec13660a2017-01-26 12:40:07 -0700241 }
Jens Axboebd166ef2017-01-17 06:03:22 -0700242}
243
Jens Axboee4d750c2017-02-03 09:48:28 -0700244bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
245 struct request **merged_request)
Jens Axboebd166ef2017-01-17 06:03:22 -0700246{
247 struct request *rq;
Jens Axboebd166ef2017-01-17 06:03:22 -0700248
Christoph Hellwig34fe7c02017-02-08 14:46:48 +0100249 switch (elv_merge(q, &rq, bio)) {
250 case ELEVATOR_BACK_MERGE:
Jens Axboebd166ef2017-01-17 06:03:22 -0700251 if (!blk_mq_sched_allow_merge(q, rq, bio))
252 return false;
Christoph Hellwig34fe7c02017-02-08 14:46:48 +0100253 if (!bio_attempt_back_merge(q, rq, bio))
254 return false;
255 *merged_request = attempt_back_merge(q, rq);
256 if (!*merged_request)
257 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
258 return true;
259 case ELEVATOR_FRONT_MERGE:
Jens Axboebd166ef2017-01-17 06:03:22 -0700260 if (!blk_mq_sched_allow_merge(q, rq, bio))
261 return false;
Christoph Hellwig34fe7c02017-02-08 14:46:48 +0100262 if (!bio_attempt_front_merge(q, rq, bio))
263 return false;
264 *merged_request = attempt_front_merge(q, rq);
265 if (!*merged_request)
266 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
267 return true;
268 default:
269 return false;
Jens Axboebd166ef2017-01-17 06:03:22 -0700270 }
Jens Axboebd166ef2017-01-17 06:03:22 -0700271}
272EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
273
Ming Lei9bddeb22017-05-26 19:53:20 +0800274/*
275 * Reverse check our software queue for entries that we could potentially
276 * merge with. Currently includes a hand-wavy stop count of 8, to not spend
277 * too much time checking for merges.
278 */
279static bool blk_mq_attempt_merge(struct request_queue *q,
280 struct blk_mq_ctx *ctx, struct bio *bio)
281{
282 struct request *rq;
283 int checked = 8;
284
Bart Van Assche7b607812017-06-20 11:15:47 -0700285 lockdep_assert_held(&ctx->lock);
286
Ming Lei9bddeb22017-05-26 19:53:20 +0800287 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
288 bool merged = false;
289
290 if (!checked--)
291 break;
292
293 if (!blk_rq_merge_ok(rq, bio))
294 continue;
295
296 switch (blk_try_merge(rq, bio)) {
297 case ELEVATOR_BACK_MERGE:
298 if (blk_mq_sched_allow_merge(q, rq, bio))
299 merged = bio_attempt_back_merge(q, rq, bio);
300 break;
301 case ELEVATOR_FRONT_MERGE:
302 if (blk_mq_sched_allow_merge(q, rq, bio))
303 merged = bio_attempt_front_merge(q, rq, bio);
304 break;
305 case ELEVATOR_DISCARD_MERGE:
306 merged = bio_attempt_discard_merge(q, rq, bio);
307 break;
308 default:
309 continue;
310 }
311
312 if (merged)
313 ctx->rq_merged++;
314 return merged;
315 }
316
317 return false;
318}
319
Jens Axboebd166ef2017-01-17 06:03:22 -0700320bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
321{
322 struct elevator_queue *e = q->elevator;
Ming Lei9bddeb22017-05-26 19:53:20 +0800323 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
324 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
325 bool ret = false;
Jens Axboebd166ef2017-01-17 06:03:22 -0700326
Ming Lei9bddeb22017-05-26 19:53:20 +0800327 if (e && e->type->ops.mq.bio_merge) {
Jens Axboebd166ef2017-01-17 06:03:22 -0700328 blk_mq_put_ctx(ctx);
329 return e->type->ops.mq.bio_merge(hctx, bio);
330 }
331
Ming Lei9bddeb22017-05-26 19:53:20 +0800332 if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
333 /* default per sw-queue merge */
334 spin_lock(&ctx->lock);
335 ret = blk_mq_attempt_merge(q, ctx, bio);
336 spin_unlock(&ctx->lock);
337 }
338
339 blk_mq_put_ctx(ctx);
340 return ret;
Jens Axboebd166ef2017-01-17 06:03:22 -0700341}
342
343bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
344{
345 return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
346}
347EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
348
349void blk_mq_sched_request_inserted(struct request *rq)
350{
351 trace_block_rq_insert(rq->q, rq);
352}
353EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
354
Omar Sandoval0cacba62017-02-02 15:42:39 -0800355static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
Ming Leia6a252e2017-11-02 23:24:36 +0800356 bool has_sched,
Omar Sandoval0cacba62017-02-02 15:42:39 -0800357 struct request *rq)
Jens Axboebd166ef2017-01-17 06:03:22 -0700358{
Ming Leia6a252e2017-11-02 23:24:36 +0800359 /* dispatch flush rq directly */
360 if (rq->rq_flags & RQF_FLUSH_SEQ) {
361 spin_lock(&hctx->lock);
362 list_add(&rq->queuelist, &hctx->dispatch);
363 spin_unlock(&hctx->lock);
364 return true;
Jens Axboebd166ef2017-01-17 06:03:22 -0700365 }
366
Ming Lei923218f2017-11-02 23:24:38 +0800367 if (has_sched)
Ming Leia6a252e2017-11-02 23:24:36 +0800368 rq->rq_flags |= RQF_SORTED;
Ming Leia6a252e2017-11-02 23:24:36 +0800369
370 return false;
Jens Axboebd166ef2017-01-17 06:03:22 -0700371}
Jens Axboebd166ef2017-01-17 06:03:22 -0700372
Jens Axboe05b79412017-11-08 10:38:29 -0700373/**
374 * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
375 * @pos: loop cursor.
376 * @skip: the list element that will not be examined. Iteration starts at
377 * @skip->next.
378 * @head: head of the list to examine. This list must have at least one
379 * element, namely @skip.
380 * @member: name of the list_head structure within typeof(*pos).
381 */
382#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
383 for ((pos) = (skip); \
384 (pos = (pos)->member.next != (head) ? list_entry_rcu( \
385 (pos)->member.next, typeof(*pos), member) : \
386 list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
387 (pos) != (skip); )
388
389/*
390 * Called after a driver tag has been freed to check whether a hctx needs to
391 * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
392 * queues in a round-robin fashion if the tag set of @hctx is shared with other
393 * hardware queues.
394 */
395void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
396{
397 struct blk_mq_tags *const tags = hctx->tags;
398 struct blk_mq_tag_set *const set = hctx->queue->tag_set;
399 struct request_queue *const queue = hctx->queue, *q;
400 struct blk_mq_hw_ctx *hctx2;
401 unsigned int i, j;
402
403 if (set->flags & BLK_MQ_F_TAG_SHARED) {
404 /*
405 * If this is 0, then we know that no hardware queues
406 * have RESTART marked. We're done.
407 */
408 if (!atomic_read(&queue->shared_hctx_restart))
409 return;
410
411 rcu_read_lock();
412 list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
413 tag_set_list) {
414 queue_for_each_hw_ctx(q, hctx2, i)
415 if (hctx2->tags == tags &&
416 blk_mq_sched_restart_hctx(hctx2))
417 goto done;
418 }
419 j = hctx->queue_num + 1;
420 for (i = 0; i < queue->nr_hw_queues; i++, j++) {
421 if (j == queue->nr_hw_queues)
422 j = 0;
423 hctx2 = queue->queue_hw_ctx[j];
424 if (hctx2->tags == tags &&
425 blk_mq_sched_restart_hctx(hctx2))
426 break;
427 }
428done:
429 rcu_read_unlock();
430 } else {
431 blk_mq_sched_restart_hctx(hctx);
432 }
433}
434
Jens Axboebd6737f2017-01-27 01:00:47 -0700435void blk_mq_sched_insert_request(struct request *rq, bool at_head,
436 bool run_queue, bool async, bool can_block)
437{
438 struct request_queue *q = rq->q;
439 struct elevator_queue *e = q->elevator;
440 struct blk_mq_ctx *ctx = rq->mq_ctx;
441 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
442
Ming Leia6a252e2017-11-02 23:24:36 +0800443 /* flush rq in flush machinery need to be dispatched directly */
444 if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
Ming Lei923218f2017-11-02 23:24:38 +0800445 blk_insert_flush(rq);
446 goto run;
Jens Axboebd6737f2017-01-27 01:00:47 -0700447 }
448
Ming Lei923218f2017-11-02 23:24:38 +0800449 WARN_ON(e && (rq->tag != -1));
450
Ming Leia6a252e2017-11-02 23:24:36 +0800451 if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
Omar Sandoval0cacba62017-02-02 15:42:39 -0800452 goto run;
453
Jens Axboebd6737f2017-01-27 01:00:47 -0700454 if (e && e->type->ops.mq.insert_requests) {
455 LIST_HEAD(list);
456
457 list_add(&rq->queuelist, &list);
458 e->type->ops.mq.insert_requests(hctx, &list, at_head);
459 } else {
460 spin_lock(&ctx->lock);
461 __blk_mq_insert_request(hctx, rq, at_head);
462 spin_unlock(&ctx->lock);
463 }
464
Omar Sandoval0cacba62017-02-02 15:42:39 -0800465run:
Jens Axboebd6737f2017-01-27 01:00:47 -0700466 if (run_queue)
467 blk_mq_run_hw_queue(hctx, async);
468}
469
470void blk_mq_sched_insert_requests(struct request_queue *q,
471 struct blk_mq_ctx *ctx,
472 struct list_head *list, bool run_queue_async)
473{
474 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
475 struct elevator_queue *e = hctx->queue->elevator;
476
477 if (e && e->type->ops.mq.insert_requests)
478 e->type->ops.mq.insert_requests(hctx, list, false);
479 else
480 blk_mq_insert_requests(hctx, ctx, list);
481
482 blk_mq_run_hw_queue(hctx, run_queue_async);
483}
484
Jens Axboebd166ef2017-01-17 06:03:22 -0700485static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
486 struct blk_mq_hw_ctx *hctx,
487 unsigned int hctx_idx)
488{
489 if (hctx->sched_tags) {
490 blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
491 blk_mq_free_rq_map(hctx->sched_tags);
492 hctx->sched_tags = NULL;
493 }
494}
495
Omar Sandoval6917ff02017-04-05 12:01:30 -0700496static int blk_mq_sched_alloc_tags(struct request_queue *q,
497 struct blk_mq_hw_ctx *hctx,
498 unsigned int hctx_idx)
Jens Axboebd166ef2017-01-17 06:03:22 -0700499{
500 struct blk_mq_tag_set *set = q->tag_set;
Omar Sandoval6917ff02017-04-05 12:01:30 -0700501 int ret;
Jens Axboebd166ef2017-01-17 06:03:22 -0700502
Omar Sandoval6917ff02017-04-05 12:01:30 -0700503 hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
504 set->reserved_tags);
505 if (!hctx->sched_tags)
506 return -ENOMEM;
Jens Axboebd166ef2017-01-17 06:03:22 -0700507
Omar Sandoval6917ff02017-04-05 12:01:30 -0700508 ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
509 if (ret)
510 blk_mq_sched_free_tags(set, hctx, hctx_idx);
Jens Axboebd166ef2017-01-17 06:03:22 -0700511
Omar Sandoval6917ff02017-04-05 12:01:30 -0700512 return ret;
Jens Axboebd166ef2017-01-17 06:03:22 -0700513}
514
Omar Sandoval54d53292017-04-07 08:52:27 -0600515static void blk_mq_sched_tags_teardown(struct request_queue *q)
Jens Axboebd166ef2017-01-17 06:03:22 -0700516{
517 struct blk_mq_tag_set *set = q->tag_set;
518 struct blk_mq_hw_ctx *hctx;
519 int i;
520
521 queue_for_each_hw_ctx(q, hctx, i)
522 blk_mq_sched_free_tags(set, hctx, i);
523}
Jens Axboed3484992017-01-13 14:43:58 -0700524
Omar Sandoval93252632017-04-05 12:01:31 -0700525int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
526 unsigned int hctx_idx)
527{
528 struct elevator_queue *e = q->elevator;
Omar Sandovalee056f92017-04-05 12:01:34 -0700529 int ret;
Omar Sandoval93252632017-04-05 12:01:31 -0700530
531 if (!e)
532 return 0;
533
Omar Sandovalee056f92017-04-05 12:01:34 -0700534 ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
535 if (ret)
536 return ret;
537
538 if (e->type->ops.mq.init_hctx) {
539 ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
540 if (ret) {
541 blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
542 return ret;
543 }
544 }
545
Omar Sandovald332ce02017-05-04 08:24:40 -0600546 blk_mq_debugfs_register_sched_hctx(q, hctx);
547
Omar Sandovalee056f92017-04-05 12:01:34 -0700548 return 0;
Omar Sandoval93252632017-04-05 12:01:31 -0700549}
550
551void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
552 unsigned int hctx_idx)
553{
554 struct elevator_queue *e = q->elevator;
555
556 if (!e)
557 return;
558
Omar Sandovald332ce02017-05-04 08:24:40 -0600559 blk_mq_debugfs_unregister_sched_hctx(hctx);
560
Omar Sandovalee056f92017-04-05 12:01:34 -0700561 if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
562 e->type->ops.mq.exit_hctx(hctx, hctx_idx);
563 hctx->sched_data = NULL;
564 }
565
Omar Sandoval93252632017-04-05 12:01:31 -0700566 blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
567}
568
Omar Sandoval6917ff02017-04-05 12:01:30 -0700569int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
570{
571 struct blk_mq_hw_ctx *hctx;
Omar Sandovalee056f92017-04-05 12:01:34 -0700572 struct elevator_queue *eq;
Omar Sandoval6917ff02017-04-05 12:01:30 -0700573 unsigned int i;
574 int ret;
575
576 if (!e) {
577 q->elevator = NULL;
578 return 0;
579 }
580
581 /*
Ming Lei32825c42017-07-03 20:37:14 +0800582 * Default to double of smaller one between hw queue_depth and 128,
583 * since we don't split into sync/async like the old code did.
584 * Additionally, this is a per-hw queue depth.
Omar Sandoval6917ff02017-04-05 12:01:30 -0700585 */
Ming Lei32825c42017-07-03 20:37:14 +0800586 q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
587 BLKDEV_MAX_RQ);
Omar Sandoval6917ff02017-04-05 12:01:30 -0700588
589 queue_for_each_hw_ctx(q, hctx, i) {
590 ret = blk_mq_sched_alloc_tags(q, hctx, i);
591 if (ret)
592 goto err;
593 }
594
595 ret = e->ops.mq.init_sched(q, e);
596 if (ret)
597 goto err;
598
Omar Sandovald332ce02017-05-04 08:24:40 -0600599 blk_mq_debugfs_register_sched(q);
600
601 queue_for_each_hw_ctx(q, hctx, i) {
602 if (e->ops.mq.init_hctx) {
Omar Sandovalee056f92017-04-05 12:01:34 -0700603 ret = e->ops.mq.init_hctx(hctx, i);
604 if (ret) {
605 eq = q->elevator;
606 blk_mq_exit_sched(q, eq);
607 kobject_put(&eq->kobj);
608 return ret;
609 }
610 }
Omar Sandovald332ce02017-05-04 08:24:40 -0600611 blk_mq_debugfs_register_sched_hctx(q, hctx);
Omar Sandovalee056f92017-04-05 12:01:34 -0700612 }
613
Omar Sandoval6917ff02017-04-05 12:01:30 -0700614 return 0;
615
616err:
Omar Sandoval54d53292017-04-07 08:52:27 -0600617 blk_mq_sched_tags_teardown(q);
618 q->elevator = NULL;
Omar Sandoval6917ff02017-04-05 12:01:30 -0700619 return ret;
620}
621
Omar Sandoval54d53292017-04-07 08:52:27 -0600622void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
623{
Omar Sandovalee056f92017-04-05 12:01:34 -0700624 struct blk_mq_hw_ctx *hctx;
625 unsigned int i;
626
Omar Sandovald332ce02017-05-04 08:24:40 -0600627 queue_for_each_hw_ctx(q, hctx, i) {
628 blk_mq_debugfs_unregister_sched_hctx(hctx);
629 if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
630 e->type->ops.mq.exit_hctx(hctx, i);
631 hctx->sched_data = NULL;
Omar Sandovalee056f92017-04-05 12:01:34 -0700632 }
633 }
Omar Sandovald332ce02017-05-04 08:24:40 -0600634 blk_mq_debugfs_unregister_sched(q);
Omar Sandoval54d53292017-04-07 08:52:27 -0600635 if (e->type->ops.mq.exit_sched)
636 e->type->ops.mq.exit_sched(e);
637 blk_mq_sched_tags_teardown(q);
638 q->elevator = NULL;
639}
640
Jens Axboed3484992017-01-13 14:43:58 -0700641int blk_mq_sched_init(struct request_queue *q)
642{
643 int ret;
644
Jens Axboed3484992017-01-13 14:43:58 -0700645 mutex_lock(&q->sysfs_lock);
646 ret = elevator_init(q, NULL);
647 mutex_unlock(&q->sysfs_lock);
648
649 return ret;
650}