blob: bbc61394eef3922fa0419c683739d89c935a1893 [file] [log] [blame]
Christoph Hellwig3dcf60b2019-04-30 14:42:43 -04001// SPDX-License-Identifier: GPL-2.0
Jens Axboe75bb4622014-05-28 10:15:41 -06002/*
3 * Block multiqueue core code
4 *
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
7 */
Jens Axboe320ae512013-10-24 09:20:05 +01008#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/backing-dev.h>
11#include <linux/bio.h>
12#include <linux/blkdev.h>
Christoph Hellwigfe45e632021-09-20 14:33:27 +020013#include <linux/blk-integrity.h>
Catalin Marinasf75782e2015-09-14 18:16:02 +010014#include <linux/kmemleak.h>
Jens Axboe320ae512013-10-24 09:20:05 +010015#include <linux/mm.h>
16#include <linux/init.h>
17#include <linux/slab.h>
18#include <linux/workqueue.h>
19#include <linux/smp.h>
Christoph Hellwige41d12f2021-09-20 14:33:13 +020020#include <linux/interrupt.h>
Jens Axboe320ae512013-10-24 09:20:05 +010021#include <linux/llist.h>
22#include <linux/list_sort.h>
23#include <linux/cpu.h>
24#include <linux/cache.h>
25#include <linux/sched/sysctl.h>
Ingo Molnar105ab3d2017-02-01 16:36:40 +010026#include <linux/sched/topology.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010027#include <linux/sched/signal.h>
Jens Axboe320ae512013-10-24 09:20:05 +010028#include <linux/delay.h>
Jens Axboeaedcd722014-09-17 08:27:03 -060029#include <linux/crash_dump.h>
Jens Axboe88c7b2b2016-08-25 08:07:30 -060030#include <linux/prefetch.h>
Satya Tangiralaa892c8d2020-05-14 00:37:18 +000031#include <linux/blk-crypto.h>
Jens Axboe320ae512013-10-24 09:20:05 +010032
33#include <trace/events/block.h>
34
35#include <linux/blk-mq.h>
Max Gurtovoy54d4e6a2019-09-16 18:44:29 +030036#include <linux/t10-pi.h>
Jens Axboe320ae512013-10-24 09:20:05 +010037#include "blk.h"
38#include "blk-mq.h"
Omar Sandoval9c1051a2017-05-04 08:17:21 -060039#include "blk-mq-debugfs.h"
Jens Axboe320ae512013-10-24 09:20:05 +010040#include "blk-mq-tag.h"
Bart Van Assche986d4132018-09-26 14:01:10 -070041#include "blk-pm.h"
Jens Axboecf43e6b2016-11-07 21:32:37 -070042#include "blk-stat.h"
Jens Axboebd166ef2017-01-17 06:03:22 -070043#include "blk-mq-sched.h"
Josef Bacikc1c80382018-07-03 11:14:59 -040044#include "blk-rq-qos.h"
Jens Axboe320ae512013-10-24 09:20:05 +010045
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +010046static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
Christoph Hellwigc3077b52020-06-11 08:44:41 +020047
Omar Sandoval34dbad52017-03-21 08:56:08 -070048static void blk_mq_poll_stats_start(struct request_queue *q);
49static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
50
Stephen Bates720b8cc2017-04-07 06:24:03 -060051static int blk_mq_poll_stats_bkt(const struct request *rq)
52{
Hou Tao3d244302019-05-21 15:59:03 +080053 int ddir, sectors, bucket;
Stephen Bates720b8cc2017-04-07 06:24:03 -060054
Jens Axboe99c749a2017-04-21 07:55:42 -060055 ddir = rq_data_dir(rq);
Hou Tao3d244302019-05-21 15:59:03 +080056 sectors = blk_rq_stats_sectors(rq);
Stephen Bates720b8cc2017-04-07 06:24:03 -060057
Hou Tao3d244302019-05-21 15:59:03 +080058 bucket = ddir + 2 * ilog2(sectors);
Stephen Bates720b8cc2017-04-07 06:24:03 -060059
60 if (bucket < 0)
61 return -1;
62 else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
63 return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
64
65 return bucket;
66}
67
Christoph Hellwig3e087732021-10-12 13:12:24 +020068#define BLK_QC_T_SHIFT 16
69#define BLK_QC_T_INTERNAL (1U << 31)
70
Christoph Hellwigf70299f2021-10-12 13:12:15 +020071static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
72 blk_qc_t qc)
73{
74 return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT];
75}
76
Christoph Hellwigc6699d62021-10-12 13:12:16 +020077static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
78 blk_qc_t qc)
79{
Christoph Hellwigefbabbe2021-10-12 13:12:17 +020080 unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
81
82 if (qc & BLK_QC_T_INTERNAL)
83 return blk_mq_tag_to_rq(hctx->sched_tags, tag);
84 return blk_mq_tag_to_rq(hctx->tags, tag);
Christoph Hellwigc6699d62021-10-12 13:12:16 +020085}
86
Christoph Hellwig3e087732021-10-12 13:12:24 +020087static inline blk_qc_t blk_rq_to_qc(struct request *rq)
88{
89 return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
90 (rq->tag != -1 ?
91 rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
92}
93
Jens Axboe320ae512013-10-24 09:20:05 +010094/*
Yufen Yu85fae292019-03-24 17:57:08 +080095 * Check if any of the ctx, dispatch list or elevator
96 * have pending work in this hardware queue.
Jens Axboe320ae512013-10-24 09:20:05 +010097 */
Jens Axboe79f720a2017-11-10 09:13:21 -070098static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
Jens Axboe320ae512013-10-24 09:20:05 +010099{
Jens Axboe79f720a2017-11-10 09:13:21 -0700100 return !list_empty_careful(&hctx->dispatch) ||
101 sbitmap_any_bit_set(&hctx->ctx_map) ||
Jens Axboebd166ef2017-01-17 06:03:22 -0700102 blk_mq_sched_has_work(hctx);
Jens Axboe320ae512013-10-24 09:20:05 +0100103}
104
105/*
106 * Mark this ctx as having pending work in this hardware queue
107 */
108static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
109 struct blk_mq_ctx *ctx)
110{
Jens Axboef31967f2018-10-29 13:13:29 -0600111 const int bit = ctx->index_hw[hctx->type];
112
113 if (!sbitmap_test_bit(&hctx->ctx_map, bit))
114 sbitmap_set_bit(&hctx->ctx_map, bit);
Jens Axboe1429d7c2014-05-19 09:23:55 -0600115}
116
117static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
118 struct blk_mq_ctx *ctx)
119{
Jens Axboef31967f2018-10-29 13:13:29 -0600120 const int bit = ctx->index_hw[hctx->type];
121
122 sbitmap_clear_bit(&hctx->ctx_map, bit);
Jens Axboe320ae512013-10-24 09:20:05 +0100123}
124
Jens Axboef299b7c2017-08-08 17:51:45 -0600125struct mq_inflight {
Christoph Hellwig8446fe92020-11-24 09:36:54 +0100126 struct block_device *part;
Pavel Begunkova2e80f62019-09-30 21:55:34 +0300127 unsigned int inflight[2];
Jens Axboef299b7c2017-08-08 17:51:45 -0600128};
129
Jens Axboe7baa8572018-11-08 10:24:07 -0700130static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
Jens Axboef299b7c2017-08-08 17:51:45 -0600131 struct request *rq, void *priv,
132 bool reserved)
133{
134 struct mq_inflight *mi = priv;
135
Jeffle Xub0d97552020-12-02 19:11:45 +0800136 if ((!mi->part->bd_partno || rq->part == mi->part) &&
137 blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
Pavel Begunkovbb4e6b12019-09-30 21:55:33 +0300138 mi->inflight[rq_data_dir(rq)]++;
Jens Axboe7baa8572018-11-08 10:24:07 -0700139
140 return true;
Jens Axboef299b7c2017-08-08 17:51:45 -0600141}
142
Christoph Hellwig8446fe92020-11-24 09:36:54 +0100143unsigned int blk_mq_in_flight(struct request_queue *q,
144 struct block_device *part)
Jens Axboef299b7c2017-08-08 17:51:45 -0600145{
Pavel Begunkova2e80f62019-09-30 21:55:34 +0300146 struct mq_inflight mi = { .part = part };
Jens Axboef299b7c2017-08-08 17:51:45 -0600147
Jens Axboef299b7c2017-08-08 17:51:45 -0600148 blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
Mikulas Patockae016b782018-12-06 11:41:21 -0500149
Pavel Begunkova2e80f62019-09-30 21:55:34 +0300150 return mi.inflight[0] + mi.inflight[1];
Omar Sandovalbf0ddab2018-04-26 00:21:59 -0700151}
152
Christoph Hellwig8446fe92020-11-24 09:36:54 +0100153void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
154 unsigned int inflight[2])
Omar Sandovalbf0ddab2018-04-26 00:21:59 -0700155{
Pavel Begunkova2e80f62019-09-30 21:55:34 +0300156 struct mq_inflight mi = { .part = part };
Omar Sandovalbf0ddab2018-04-26 00:21:59 -0700157
Pavel Begunkovbb4e6b12019-09-30 21:55:33 +0300158 blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
Pavel Begunkova2e80f62019-09-30 21:55:34 +0300159 inflight[0] = mi.inflight[0];
160 inflight[1] = mi.inflight[1];
Omar Sandovalbf0ddab2018-04-26 00:21:59 -0700161}
162
Ming Lei1671d522017-03-27 20:06:57 +0800163void blk_freeze_queue_start(struct request_queue *q)
Ming Lei43a5e4e2013-12-26 21:31:35 +0800164{
Bob Liu7996a8b2019-05-21 11:25:55 +0800165 mutex_lock(&q->mq_freeze_lock);
166 if (++q->mq_freeze_depth == 1) {
Dan Williams3ef28e82015-10-21 13:20:12 -0400167 percpu_ref_kill(&q->q_usage_counter);
Bob Liu7996a8b2019-05-21 11:25:55 +0800168 mutex_unlock(&q->mq_freeze_lock);
Jens Axboe344e9ff2018-11-15 12:22:51 -0700169 if (queue_is_mq(q))
Ming Lei055f6e12017-11-09 10:49:53 -0800170 blk_mq_run_hw_queues(q, false);
Bob Liu7996a8b2019-05-21 11:25:55 +0800171 } else {
172 mutex_unlock(&q->mq_freeze_lock);
Tejun Heocddd5d12014-08-16 08:02:24 -0400173 }
Tejun Heof3af0202014-11-04 13:52:27 -0500174}
Ming Lei1671d522017-03-27 20:06:57 +0800175EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
Tejun Heof3af0202014-11-04 13:52:27 -0500176
Keith Busch6bae363e2017-03-01 14:22:10 -0500177void blk_mq_freeze_queue_wait(struct request_queue *q)
Tejun Heof3af0202014-11-04 13:52:27 -0500178{
Dan Williams3ef28e82015-10-21 13:20:12 -0400179 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
Ming Lei43a5e4e2013-12-26 21:31:35 +0800180}
Keith Busch6bae363e2017-03-01 14:22:10 -0500181EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
Ming Lei43a5e4e2013-12-26 21:31:35 +0800182
Keith Buschf91328c2017-03-01 14:22:11 -0500183int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
184 unsigned long timeout)
185{
186 return wait_event_timeout(q->mq_freeze_wq,
187 percpu_ref_is_zero(&q->q_usage_counter),
188 timeout);
189}
190EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
Jens Axboe320ae512013-10-24 09:20:05 +0100191
Tejun Heof3af0202014-11-04 13:52:27 -0500192/*
193 * Guarantee no request is in use, so we can change any data structure of
194 * the queue afterward.
195 */
Dan Williams3ef28e82015-10-21 13:20:12 -0400196void blk_freeze_queue(struct request_queue *q)
Tejun Heof3af0202014-11-04 13:52:27 -0500197{
Dan Williams3ef28e82015-10-21 13:20:12 -0400198 /*
199 * In the !blk_mq case we are only calling this to kill the
200 * q_usage_counter, otherwise this increases the freeze depth
201 * and waits for it to return to zero. For this reason there is
202 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
203 * exported to drivers as the only user for unfreeze is blk_mq.
204 */
Ming Lei1671d522017-03-27 20:06:57 +0800205 blk_freeze_queue_start(q);
Tejun Heof3af0202014-11-04 13:52:27 -0500206 blk_mq_freeze_queue_wait(q);
207}
Dan Williams3ef28e82015-10-21 13:20:12 -0400208
209void blk_mq_freeze_queue(struct request_queue *q)
210{
211 /*
212 * ...just an alias to keep freeze and unfreeze actions balanced
213 * in the blk_mq_* namespace
214 */
215 blk_freeze_queue(q);
216}
Jens Axboec761d962015-01-02 15:05:12 -0700217EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
Tejun Heof3af0202014-11-04 13:52:27 -0500218
Christoph Hellwigaec89dc2021-09-29 09:12:41 +0200219void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
Jens Axboe320ae512013-10-24 09:20:05 +0100220{
Bob Liu7996a8b2019-05-21 11:25:55 +0800221 mutex_lock(&q->mq_freeze_lock);
Christoph Hellwigaec89dc2021-09-29 09:12:41 +0200222 if (force_atomic)
223 q->q_usage_counter.data->force_atomic = true;
Bob Liu7996a8b2019-05-21 11:25:55 +0800224 q->mq_freeze_depth--;
225 WARN_ON_ONCE(q->mq_freeze_depth < 0);
226 if (!q->mq_freeze_depth) {
Bart Van Asschebdd63162018-09-26 14:01:08 -0700227 percpu_ref_resurrect(&q->q_usage_counter);
Jens Axboe320ae512013-10-24 09:20:05 +0100228 wake_up_all(&q->mq_freeze_wq);
Tejun Heoadd703f2014-07-01 10:34:38 -0600229 }
Bob Liu7996a8b2019-05-21 11:25:55 +0800230 mutex_unlock(&q->mq_freeze_lock);
Jens Axboe320ae512013-10-24 09:20:05 +0100231}
Christoph Hellwigaec89dc2021-09-29 09:12:41 +0200232
233void blk_mq_unfreeze_queue(struct request_queue *q)
234{
235 __blk_mq_unfreeze_queue(q, false);
236}
Keith Buschb4c6a022014-12-19 17:54:14 -0700237EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
Jens Axboe320ae512013-10-24 09:20:05 +0100238
Bart Van Assche852ec802017-06-21 10:55:47 -0700239/*
240 * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
241 * mpt3sas driver such that this function can be removed.
242 */
243void blk_mq_quiesce_queue_nowait(struct request_queue *q)
244{
Bart Van Assche8814ce82018-03-07 17:10:04 -0800245 blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
Bart Van Assche852ec802017-06-21 10:55:47 -0700246}
247EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
248
Bart Van Assche6a83e742016-11-02 10:09:51 -0600249/**
Ming Lei69e07c42017-06-06 23:22:07 +0800250 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
Bart Van Assche6a83e742016-11-02 10:09:51 -0600251 * @q: request queue.
252 *
253 * Note: this function does not prevent that the struct request end_io()
Ming Lei69e07c42017-06-06 23:22:07 +0800254 * callback function is invoked. Once this function is returned, we make
255 * sure no dispatch can happen until the queue is unquiesced via
256 * blk_mq_unquiesce_queue().
Bart Van Assche6a83e742016-11-02 10:09:51 -0600257 */
258void blk_mq_quiesce_queue(struct request_queue *q)
259{
260 struct blk_mq_hw_ctx *hctx;
261 unsigned int i;
262 bool rcu = false;
263
Ming Lei1d9e9bc2017-06-06 23:22:08 +0800264 blk_mq_quiesce_queue_nowait(q);
Ming Leif4560ff2017-06-18 14:24:27 -0600265
Bart Van Assche6a83e742016-11-02 10:09:51 -0600266 queue_for_each_hw_ctx(q, hctx, i) {
267 if (hctx->flags & BLK_MQ_F_BLOCKING)
Tejun Heo05707b62018-01-09 08:29:53 -0800268 synchronize_srcu(hctx->srcu);
Bart Van Assche6a83e742016-11-02 10:09:51 -0600269 else
270 rcu = true;
271 }
272 if (rcu)
273 synchronize_rcu();
274}
275EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
276
Ming Leie4e73912017-06-06 23:22:03 +0800277/*
278 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
279 * @q: request queue.
280 *
281 * This function recovers queue into the state before quiescing
282 * which is done by blk_mq_quiesce_queue.
283 */
284void blk_mq_unquiesce_queue(struct request_queue *q)
285{
Bart Van Assche8814ce82018-03-07 17:10:04 -0800286 blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
Ming Leif4560ff2017-06-18 14:24:27 -0600287
Ming Lei1d9e9bc2017-06-06 23:22:08 +0800288 /* dispatch requests which are inserted during quiescing */
289 blk_mq_run_hw_queues(q, true);
Ming Leie4e73912017-06-06 23:22:03 +0800290}
291EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
292
Jens Axboeaed3ea92014-12-22 14:04:42 -0700293void blk_mq_wake_waiters(struct request_queue *q)
294{
295 struct blk_mq_hw_ctx *hctx;
296 unsigned int i;
297
298 queue_for_each_hw_ctx(q, hctx, i)
299 if (blk_mq_hw_queue_mapped(hctx))
300 blk_mq_tag_wakeup_all(hctx->tags, true);
301}
302
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200303static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
Christoph Hellwig7ea4d8a2020-05-29 15:53:10 +0200304 unsigned int tag, u64 alloc_time_ns)
Jens Axboe320ae512013-10-24 09:20:05 +0100305{
Pavel Begunkov605f7842021-10-18 21:37:28 +0100306 struct blk_mq_ctx *ctx = data->ctx;
307 struct blk_mq_hw_ctx *hctx = data->hctx;
308 struct request_queue *q = data->q;
309 struct elevator_queue *e = q->elevator;
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200310 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
311 struct request *rq = tags->static_rqs[tag];
Pavel Begunkov12845902021-10-18 21:37:29 +0100312 unsigned int rq_flags = 0;
Bart Van Asschec3a148d2017-06-20 11:15:43 -0700313
Pavel Begunkov605f7842021-10-18 21:37:28 +0100314 if (e) {
Pavel Begunkov12845902021-10-18 21:37:29 +0100315 rq_flags = RQF_ELV;
Christoph Hellwig766473682020-05-29 15:53:12 +0200316 rq->tag = BLK_MQ_NO_TAG;
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200317 rq->internal_tag = tag;
318 } else {
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200319 rq->tag = tag;
Christoph Hellwig766473682020-05-29 15:53:12 +0200320 rq->internal_tag = BLK_MQ_NO_TAG;
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200321 }
322
Pavel Begunkov12845902021-10-18 21:37:29 +0100323 if (data->flags & BLK_MQ_REQ_PM)
324 rq_flags |= RQF_PM;
325 if (blk_queue_io_stat(q))
326 rq_flags |= RQF_IO_STAT;
327 rq->rq_flags = rq_flags;
328
Pavel Begunkov4f266f22021-10-18 21:37:27 +0100329 if (blk_mq_need_time_stamp(rq))
330 rq->start_time_ns = ktime_get_ns();
331 else
332 rq->start_time_ns = 0;
Christoph Hellwigaf76e552014-05-06 12:12:45 +0200333 /* csd/requeue_work/fifo_time is initialized before use */
Pavel Begunkov605f7842021-10-18 21:37:28 +0100334 rq->q = q;
335 rq->mq_ctx = ctx;
336 rq->mq_hctx = hctx;
Christoph Hellwig7ea4d8a2020-05-29 15:53:10 +0200337 rq->cmd_flags = data->cmd_flags;
Christoph Hellwigaf76e552014-05-06 12:12:45 +0200338 rq->rq_disk = NULL;
339 rq->part = NULL;
Tejun Heo6f816b42019-08-28 15:05:57 -0700340#ifdef CONFIG_BLK_RQ_ALLOC_TIME
341 rq->alloc_time_ns = alloc_time_ns;
342#endif
Omar Sandoval544ccc8d2018-05-09 02:08:50 -0700343 rq->io_start_time_ns = 0;
Hou Tao3d244302019-05-21 15:59:03 +0800344 rq->stats_sectors = 0;
Christoph Hellwigaf76e552014-05-06 12:12:45 +0200345 rq->nr_phys_segments = 0;
346#if defined(CONFIG_BLK_DEV_INTEGRITY)
347 rq->nr_integrity_segments = 0;
348#endif
Jens Axboef6be4fb2014-06-06 11:03:48 -0600349 rq->timeout = 0;
Christoph Hellwigaf76e552014-05-06 12:12:45 +0200350 rq->end_io = NULL;
351 rq->end_io_data = NULL;
Christoph Hellwigaf76e552014-05-06 12:12:45 +0200352
Pavel Begunkov4f266f22021-10-18 21:37:27 +0100353 blk_crypto_rq_set_defaults(rq);
354 INIT_LIST_HEAD(&rq->queuelist);
355 /* tag was already set */
356 WRITE_ONCE(rq->deadline, 0);
Keith Busch12f5b932018-05-29 15:52:28 +0200357 refcount_set(&rq->ref, 1);
Christoph Hellwig7ea4d8a2020-05-29 15:53:10 +0200358
Pavel Begunkov4f266f22021-10-18 21:37:27 +0100359 if (rq->rq_flags & RQF_ELV) {
Christoph Hellwig7ea4d8a2020-05-29 15:53:10 +0200360 struct elevator_queue *e = data->q->elevator;
361
362 rq->elv.icq = NULL;
Pavel Begunkov4f266f22021-10-18 21:37:27 +0100363 INIT_HLIST_NODE(&rq->hash);
364 RB_CLEAR_NODE(&rq->rb_node);
365
366 if (!op_is_flush(data->cmd_flags) &&
367 e->type->ops.prepare_request) {
Christoph Hellwig7ea4d8a2020-05-29 15:53:10 +0200368 if (e->type->icq_cache)
369 blk_mq_sched_assign_ioc(rq);
370
371 e->type->ops.prepare_request(rq);
372 rq->rq_flags |= RQF_ELVPRIV;
373 }
374 }
375
Christoph Hellwige4cdf1a2017-06-16 18:15:27 +0200376 return rq;
Jens Axboe320ae512013-10-24 09:20:05 +0100377}
378
Jens Axboe349302d2021-10-09 13:10:39 -0600379static inline struct request *
380__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
381 u64 alloc_time_ns)
382{
383 unsigned int tag, tag_offset;
384 struct request *rq;
385 unsigned long tags;
386 int i, nr = 0;
387
388 tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
389 if (unlikely(!tags))
390 return NULL;
391
392 for (i = 0; tags; i++) {
393 if (!(tags & (1UL << i)))
394 continue;
395 tag = tag_offset + i;
396 tags &= ~(1UL << i);
397 rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
Jens Axboe013a7f92021-10-13 07:58:52 -0600398 rq_list_add(data->cached_rq, rq);
Jens Axboe349302d2021-10-09 13:10:39 -0600399 }
400 data->nr_tags -= nr;
401
Jens Axboe013a7f92021-10-13 07:58:52 -0600402 return rq_list_pop(data->cached_rq);
Jens Axboe349302d2021-10-09 13:10:39 -0600403}
404
Christoph Hellwigb90cfae2021-10-12 12:40:44 +0200405static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200406{
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200407 struct request_queue *q = data->q;
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200408 struct elevator_queue *e = q->elevator;
Tejun Heo6f816b42019-08-28 15:05:57 -0700409 u64 alloc_time_ns = 0;
Jens Axboe47c122e2021-10-06 06:34:11 -0600410 struct request *rq;
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200411 unsigned int tag;
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200412
Tejun Heo6f816b42019-08-28 15:05:57 -0700413 /* alloc_time includes depth and tag waits */
414 if (blk_queue_rq_alloc_time(q))
415 alloc_time_ns = ktime_get_ns();
416
Jens Axboef9afca42018-10-29 13:11:38 -0600417 if (data->cmd_flags & REQ_NOWAIT)
Goldwyn Rodrigues03a07c92017-06-20 07:05:46 -0500418 data->flags |= BLK_MQ_REQ_NOWAIT;
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200419
420 if (e) {
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200421 /*
Lin Feng8d663f32021-04-15 11:39:20 +0800422 * Flush/passthrough requests are special and go directly to the
Jens Axboe17a51192018-05-09 13:28:50 -0600423 * dispatch list. Don't include reserved tags in the
424 * limiting, as it isn't useful.
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200425 */
Jens Axboef9afca42018-10-29 13:11:38 -0600426 if (!op_is_flush(data->cmd_flags) &&
Lin Feng8d663f32021-04-15 11:39:20 +0800427 !blk_op_is_passthrough(data->cmd_flags) &&
Jens Axboef9afca42018-10-29 13:11:38 -0600428 e->type->ops.limit_depth &&
Jens Axboe17a51192018-05-09 13:28:50 -0600429 !(data->flags & BLK_MQ_REQ_RESERVED))
Jens Axboef9afca42018-10-29 13:11:38 -0600430 e->type->ops.limit_depth(data->cmd_flags, data);
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200431 }
432
Ming Leibf0beec2020-05-29 15:53:15 +0200433retry:
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200434 data->ctx = blk_mq_get_ctx(q);
435 data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
Christoph Hellwig42fdc5e2020-06-29 17:08:34 +0200436 if (!e)
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200437 blk_mq_tag_busy(data->hctx);
438
Ming Leibf0beec2020-05-29 15:53:15 +0200439 /*
Jens Axboe349302d2021-10-09 13:10:39 -0600440 * Try batched alloc if we want more than 1 tag.
441 */
442 if (data->nr_tags > 1) {
443 rq = __blk_mq_alloc_requests_batch(data, alloc_time_ns);
444 if (rq)
445 return rq;
446 data->nr_tags = 1;
447 }
448
449 /*
Ming Leibf0beec2020-05-29 15:53:15 +0200450 * Waiting allocations only fail because of an inactive hctx. In that
451 * case just retry the hctx assignment and tag allocation as CPU hotplug
452 * should have migrated us to an online CPU by now.
453 */
Jens Axboe349302d2021-10-09 13:10:39 -0600454 tag = blk_mq_get_tag(data);
455 if (tag == BLK_MQ_NO_TAG) {
456 if (data->flags & BLK_MQ_REQ_NOWAIT)
457 return NULL;
458 /*
459 * Give up the CPU and sleep for a random short time to
460 * ensure that thread using a realtime scheduling class
461 * are migrated off the CPU, and thus off the hctx that
462 * is going away.
463 */
464 msleep(3);
465 goto retry;
466 }
Ming Leibf0beec2020-05-29 15:53:15 +0200467
Jens Axboe349302d2021-10-09 13:10:39 -0600468 return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
Christoph Hellwigd2c0d382017-06-16 18:15:19 +0200469}
470
Bart Van Asschecd6ce142017-06-20 11:15:39 -0700471struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
Bart Van Assche9a95e4e2017-11-09 10:49:59 -0800472 blk_mq_req_flags_t flags)
Jens Axboe320ae512013-10-24 09:20:05 +0100473{
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200474 struct blk_mq_alloc_data data = {
475 .q = q,
476 .flags = flags,
477 .cmd_flags = op,
Jens Axboe47c122e2021-10-06 06:34:11 -0600478 .nr_tags = 1,
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200479 };
Jens Axboebd166ef2017-01-17 06:03:22 -0700480 struct request *rq;
Joe Lawrencea492f072014-08-28 08:15:21 -0600481 int ret;
Jens Axboe320ae512013-10-24 09:20:05 +0100482
Bart Van Assche3a0a5292017-11-09 10:49:58 -0800483 ret = blk_queue_enter(q, flags);
Joe Lawrencea492f072014-08-28 08:15:21 -0600484 if (ret)
485 return ERR_PTR(ret);
Jens Axboe320ae512013-10-24 09:20:05 +0100486
Christoph Hellwigb90cfae2021-10-12 12:40:44 +0200487 rq = __blk_mq_alloc_requests(&data);
Jens Axboebd166ef2017-01-17 06:03:22 -0700488 if (!rq)
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200489 goto out_queue_exit;
Christoph Hellwig0c4de0f2016-07-19 11:31:50 +0200490 rq->__data_len = 0;
491 rq->__sector = (sector_t) -1;
492 rq->bio = rq->biotail = NULL;
Jens Axboe320ae512013-10-24 09:20:05 +0100493 return rq;
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200494out_queue_exit:
495 blk_queue_exit(q);
496 return ERR_PTR(-EWOULDBLOCK);
Jens Axboe320ae512013-10-24 09:20:05 +0100497}
Jens Axboe4bb659b2014-05-09 09:36:49 -0600498EXPORT_SYMBOL(blk_mq_alloc_request);
Jens Axboe320ae512013-10-24 09:20:05 +0100499
Bart Van Asschecd6ce142017-06-20 11:15:39 -0700500struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
Bart Van Assche9a95e4e2017-11-09 10:49:59 -0800501 unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
Ming Lin1f5bd332016-06-13 16:45:21 +0200502{
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200503 struct blk_mq_alloc_data data = {
504 .q = q,
505 .flags = flags,
506 .cmd_flags = op,
Jens Axboe47c122e2021-10-06 06:34:11 -0600507 .nr_tags = 1,
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200508 };
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200509 u64 alloc_time_ns = 0;
Omar Sandoval6d2809d2017-02-27 10:28:27 -0800510 unsigned int cpu;
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200511 unsigned int tag;
Ming Lin1f5bd332016-06-13 16:45:21 +0200512 int ret;
513
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200514 /* alloc_time includes depth and tag waits */
515 if (blk_queue_rq_alloc_time(q))
516 alloc_time_ns = ktime_get_ns();
517
Ming Lin1f5bd332016-06-13 16:45:21 +0200518 /*
519 * If the tag allocator sleeps we could get an allocation for a
520 * different hardware context. No need to complicate the low level
521 * allocator for this for the rare use case of a command tied to
522 * a specific queue.
523 */
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200524 if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
Ming Lin1f5bd332016-06-13 16:45:21 +0200525 return ERR_PTR(-EINVAL);
526
527 if (hctx_idx >= q->nr_hw_queues)
528 return ERR_PTR(-EIO);
529
Bart Van Assche3a0a5292017-11-09 10:49:58 -0800530 ret = blk_queue_enter(q, flags);
Ming Lin1f5bd332016-06-13 16:45:21 +0200531 if (ret)
532 return ERR_PTR(ret);
533
Christoph Hellwigc8712c62016-09-23 10:25:48 -0600534 /*
535 * Check if the hardware context is actually mapped to anything.
536 * If not tell the caller that it should skip this queue.
537 */
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200538 ret = -EXDEV;
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200539 data.hctx = q->queue_hw_ctx[hctx_idx];
540 if (!blk_mq_hw_queue_mapped(data.hctx))
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200541 goto out_queue_exit;
Christoph Hellwige6e7abf2020-05-29 15:53:09 +0200542 cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
543 data.ctx = __blk_mq_get_ctx(q, cpu);
Ming Lin1f5bd332016-06-13 16:45:21 +0200544
Christoph Hellwig42fdc5e2020-06-29 17:08:34 +0200545 if (!q->elevator)
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200546 blk_mq_tag_busy(data.hctx);
547
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200548 ret = -EWOULDBLOCK;
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200549 tag = blk_mq_get_tag(&data);
550 if (tag == BLK_MQ_NO_TAG)
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200551 goto out_queue_exit;
Christoph Hellwig600c3b02020-05-29 15:53:13 +0200552 return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
553
Christoph Hellwiga5ea581102020-05-16 20:27:58 +0200554out_queue_exit:
555 blk_queue_exit(q);
556 return ERR_PTR(ret);
Ming Lin1f5bd332016-06-13 16:45:21 +0200557}
558EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
559
Keith Busch12f5b932018-05-29 15:52:28 +0200560static void __blk_mq_free_request(struct request *rq)
561{
562 struct request_queue *q = rq->q;
563 struct blk_mq_ctx *ctx = rq->mq_ctx;
Jens Axboeea4f9952018-10-29 15:06:13 -0600564 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
Keith Busch12f5b932018-05-29 15:52:28 +0200565 const int sched_tag = rq->internal_tag;
566
Satya Tangiralaa892c8d2020-05-14 00:37:18 +0000567 blk_crypto_free_request(rq);
Bart Van Assche986d4132018-09-26 14:01:10 -0700568 blk_pm_mark_last_busy(rq);
Jens Axboeea4f9952018-10-29 15:06:13 -0600569 rq->mq_hctx = NULL;
Christoph Hellwig766473682020-05-29 15:53:12 +0200570 if (rq->tag != BLK_MQ_NO_TAG)
John Garrycae740a2020-02-26 20:10:15 +0800571 blk_mq_put_tag(hctx->tags, ctx, rq->tag);
Christoph Hellwig766473682020-05-29 15:53:12 +0200572 if (sched_tag != BLK_MQ_NO_TAG)
John Garrycae740a2020-02-26 20:10:15 +0800573 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
Keith Busch12f5b932018-05-29 15:52:28 +0200574 blk_mq_sched_restart(hctx);
575 blk_queue_exit(q);
576}
577
Christoph Hellwig6af54052017-06-16 18:15:22 +0200578void blk_mq_free_request(struct request *rq)
Jens Axboe320ae512013-10-24 09:20:05 +0100579{
Jens Axboe320ae512013-10-24 09:20:05 +0100580 struct request_queue *q = rq->q;
Jens Axboeea4f9952018-10-29 15:06:13 -0600581 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
Jens Axboe320ae512013-10-24 09:20:05 +0100582
Jens Axboee0d78af2021-10-18 20:54:39 -0600583 if (rq->rq_flags & RQF_ELVPRIV) {
Jens Axboe2ff06822021-10-15 09:44:38 -0600584 struct elevator_queue *e = q->elevator;
585
586 if (e->type->ops.finish_request)
Jens Axboef9cd4bf2018-11-01 16:41:41 -0600587 e->type->ops.finish_request(rq);
Christoph Hellwig6af54052017-06-16 18:15:22 +0200588 if (rq->elv.icq) {
589 put_io_context(rq->elv.icq->ioc);
590 rq->elv.icq = NULL;
591 }
592 }
593
Christoph Hellwige8064022016-10-20 15:12:13 +0200594 if (rq->rq_flags & RQF_MQ_INFLIGHT)
John Garrybccf5e22020-08-19 23:20:26 +0800595 __blk_mq_dec_active_requests(hctx);
Jens Axboe87760e52016-11-09 12:38:14 -0700596
Jens Axboe7beb2f82017-09-30 02:08:24 -0600597 if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
Christoph Hellwigd152c682021-08-16 15:46:24 +0200598 laptop_io_completion(q->disk->bdi);
Jens Axboe7beb2f82017-09-30 02:08:24 -0600599
Josef Bacika7905042018-07-03 09:32:35 -0600600 rq_qos_done(q, rq);
Jens Axboe0d2602c2014-05-13 15:10:52 -0600601
Keith Busch12f5b932018-05-29 15:52:28 +0200602 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
603 if (refcount_dec_and_test(&rq->ref))
604 __blk_mq_free_request(rq);
Jens Axboe320ae512013-10-24 09:20:05 +0100605}
Jens Axboe1a3b5952014-11-17 10:40:48 -0700606EXPORT_SYMBOL_GPL(blk_mq_free_request);
Jens Axboe320ae512013-10-24 09:20:05 +0100607
Jens Axboe47c122e2021-10-06 06:34:11 -0600608void blk_mq_free_plug_rqs(struct blk_plug *plug)
609{
Jens Axboe013a7f92021-10-13 07:58:52 -0600610 struct request *rq;
Jens Axboe47c122e2021-10-06 06:34:11 -0600611
Jens Axboe013a7f92021-10-13 07:58:52 -0600612 while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
Jens Axboe47c122e2021-10-06 06:34:11 -0600613 percpu_ref_get(&rq->q->q_usage_counter);
614 blk_mq_free_request(rq);
615 }
616}
617
Jens Axboe9be3e062021-10-14 09:17:01 -0600618static void req_bio_endio(struct request *rq, struct bio *bio,
619 unsigned int nbytes, blk_status_t error)
620{
621 if (error)
622 bio->bi_status = error;
623
624 if (unlikely(rq->rq_flags & RQF_QUIET))
625 bio_set_flag(bio, BIO_QUIET);
626
627 bio_advance(bio, nbytes);
628
629 if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
630 /*
631 * Partial zone append completions cannot be supported as the
632 * BIO fragments may end up not being written sequentially.
633 */
634 if (bio->bi_iter.bi_size)
635 bio->bi_status = BLK_STS_IOERR;
636 else
637 bio->bi_iter.bi_sector = rq->__sector;
638 }
639
640 /* don't actually finish bio if it's part of flush sequence */
641 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
642 bio_endio(bio);
643}
644
645static void blk_account_io_completion(struct request *req, unsigned int bytes)
646{
647 if (req->part && blk_do_io_stat(req)) {
648 const int sgrp = op_stat_group(req_op(req));
649
650 part_stat_lock();
651 part_stat_add(req->part, sectors[sgrp], bytes >> 9);
652 part_stat_unlock();
653 }
654}
655
656/**
657 * blk_update_request - Complete multiple bytes without completing the request
658 * @req: the request being processed
659 * @error: block status code
660 * @nr_bytes: number of bytes to complete for @req
661 *
662 * Description:
663 * Ends I/O on a number of bytes attached to @req, but doesn't complete
664 * the request structure even if @req doesn't have leftover.
665 * If @req has leftover, sets it up for the next range of segments.
666 *
667 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees
668 * %false return from this function.
669 *
670 * Note:
671 * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function
672 * except in the consistency check at the end of this function.
673 *
674 * Return:
675 * %false - this request doesn't have any more data
676 * %true - this request has more data
677 **/
678bool blk_update_request(struct request *req, blk_status_t error,
679 unsigned int nr_bytes)
680{
681 int total_bytes;
682
683 trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
684
685 if (!req->bio)
686 return false;
687
688#ifdef CONFIG_BLK_DEV_INTEGRITY
689 if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
690 error == BLK_STS_OK)
691 req->q->integrity.profile->complete_fn(req, nr_bytes);
692#endif
693
694 if (unlikely(error && !blk_rq_is_passthrough(req) &&
695 !(req->rq_flags & RQF_QUIET)))
696 blk_print_req_error(req, error);
697
698 blk_account_io_completion(req, nr_bytes);
699
700 total_bytes = 0;
701 while (req->bio) {
702 struct bio *bio = req->bio;
703 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
704
705 if (bio_bytes == bio->bi_iter.bi_size)
706 req->bio = bio->bi_next;
707
708 /* Completion has already been traced */
709 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
710 req_bio_endio(req, bio, bio_bytes, error);
711
712 total_bytes += bio_bytes;
713 nr_bytes -= bio_bytes;
714
715 if (!nr_bytes)
716 break;
717 }
718
719 /*
720 * completely done
721 */
722 if (!req->bio) {
723 /*
724 * Reset counters so that the request stacking driver
725 * can find how many bytes remain in the request
726 * later.
727 */
728 req->__data_len = 0;
729 return false;
730 }
731
732 req->__data_len -= total_bytes;
733
734 /* update sector only for requests with clear definition of sector */
735 if (!blk_rq_is_passthrough(req))
736 req->__sector += total_bytes >> 9;
737
738 /* mixed attributes always follow the first bio */
739 if (req->rq_flags & RQF_MIXED_MERGE) {
740 req->cmd_flags &= ~REQ_FAILFAST_MASK;
741 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
742 }
743
744 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
745 /*
746 * If total number of sectors is less than the first segment
747 * size, something has gone terribly wrong.
748 */
749 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
750 blk_dump_rq_flags(req, "request botched");
751 req->__data_len = blk_rq_cur_bytes(req);
752 }
753
754 /* recalculate the number of segments */
755 req->nr_phys_segments = blk_recalc_rq_segments(req);
756 }
757
758 return true;
759}
760EXPORT_SYMBOL_GPL(blk_update_request);
761
Jens Axboef794f332021-10-08 05:50:46 -0600762static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
763{
764 if (rq->rq_flags & RQF_STATS) {
765 blk_mq_poll_stats_start(rq->q);
766 blk_stat_add(rq, now);
767 }
768
769 blk_mq_sched_completed_request(rq, now);
770 blk_account_io_done(rq, now);
771}
772
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200773inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
Jens Axboe320ae512013-10-24 09:20:05 +0100774{
Jens Axboef794f332021-10-08 05:50:46 -0600775 if (blk_mq_need_time_stamp(rq))
776 __blk_mq_end_request_acct(rq, ktime_get_ns());
Omar Sandoval4bc63392018-05-09 02:08:52 -0700777
Christoph Hellwig91b63632014-04-16 09:44:53 +0200778 if (rq->end_io) {
Josef Bacika7905042018-07-03 09:32:35 -0600779 rq_qos_done(rq->q, rq);
Jens Axboe320ae512013-10-24 09:20:05 +0100780 rq->end_io(rq, error);
Christoph Hellwig91b63632014-04-16 09:44:53 +0200781 } else {
Jens Axboe320ae512013-10-24 09:20:05 +0100782 blk_mq_free_request(rq);
Christoph Hellwig91b63632014-04-16 09:44:53 +0200783 }
Jens Axboe320ae512013-10-24 09:20:05 +0100784}
Christoph Hellwigc8a446a2014-09-13 16:40:10 -0700785EXPORT_SYMBOL(__blk_mq_end_request);
Christoph Hellwig63151a42014-04-16 09:44:52 +0200786
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200787void blk_mq_end_request(struct request *rq, blk_status_t error)
Christoph Hellwig63151a42014-04-16 09:44:52 +0200788{
789 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
790 BUG();
Christoph Hellwigc8a446a2014-09-13 16:40:10 -0700791 __blk_mq_end_request(rq, error);
Christoph Hellwig63151a42014-04-16 09:44:52 +0200792}
Christoph Hellwigc8a446a2014-09-13 16:40:10 -0700793EXPORT_SYMBOL(blk_mq_end_request);
Jens Axboe320ae512013-10-24 09:20:05 +0100794
Jens Axboef794f332021-10-08 05:50:46 -0600795#define TAG_COMP_BATCH 32
796
797static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
798 int *tag_array, int nr_tags)
799{
800 struct request_queue *q = hctx->queue;
801
802 blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
803 percpu_ref_put_many(&q->q_usage_counter, nr_tags);
804}
805
806void blk_mq_end_request_batch(struct io_comp_batch *iob)
807{
808 int tags[TAG_COMP_BATCH], nr_tags = 0;
809 struct blk_mq_hw_ctx *last_hctx = NULL;
810 struct request *rq;
811 u64 now = 0;
812
813 if (iob->need_ts)
814 now = ktime_get_ns();
815
816 while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
817 prefetch(rq->bio);
818 prefetch(rq->rq_next);
819
820 blk_update_request(rq, BLK_STS_OK, blk_rq_bytes(rq));
821 if (iob->need_ts)
822 __blk_mq_end_request_acct(rq, now);
823
824 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
825 if (!refcount_dec_and_test(&rq->ref))
826 continue;
827
828 blk_crypto_free_request(rq);
829 blk_pm_mark_last_busy(rq);
830 rq_qos_done(rq->q, rq);
831
832 if (nr_tags == TAG_COMP_BATCH ||
833 (last_hctx && last_hctx != rq->mq_hctx)) {
834 blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
835 nr_tags = 0;
836 }
837 tags[nr_tags++] = rq->tag;
838 last_hctx = rq->mq_hctx;
839 }
840
841 if (nr_tags)
842 blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
843}
844EXPORT_SYMBOL_GPL(blk_mq_end_request_batch);
845
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100846static void blk_complete_reqs(struct llist_head *list)
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200847{
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100848 struct llist_node *entry = llist_reverse_order(llist_del_all(list));
849 struct request *rq, *next;
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200850
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100851 llist_for_each_entry_safe(rq, next, entry, ipi_list)
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200852 rq->q->mq_ops->complete(rq);
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200853}
854
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100855static __latent_entropy void blk_done_softirq(struct softirq_action *h)
Christoph Hellwig115243f2020-06-11 08:44:42 +0200856{
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100857 blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200858}
859
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200860static int blk_softirq_cpu_dead(unsigned int cpu)
861{
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100862 blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
Christoph Hellwigc3077b52020-06-11 08:44:41 +0200863 return 0;
864}
865
Christoph Hellwig30a91cb2014-02-10 03:24:38 -0800866static void __blk_mq_complete_request_remote(void *data)
Jens Axboe320ae512013-10-24 09:20:05 +0100867{
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100868 __raise_softirq_irqoff(BLOCK_SOFTIRQ);
Jens Axboe320ae512013-10-24 09:20:05 +0100869}
870
Christoph Hellwig963395262020-06-11 08:44:49 +0200871static inline bool blk_mq_complete_need_ipi(struct request *rq)
Jens Axboe320ae512013-10-24 09:20:05 +0100872{
Christoph Hellwig963395262020-06-11 08:44:49 +0200873 int cpu = raw_smp_processor_id();
Jens Axboe320ae512013-10-24 09:20:05 +0100874
Christoph Hellwig963395262020-06-11 08:44:49 +0200875 if (!IS_ENABLED(CONFIG_SMP) ||
876 !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
877 return false;
Sebastian Andrzej Siewior71425182020-12-04 20:13:54 +0100878 /*
879 * With force threaded interrupts enabled, raising softirq from an SMP
880 * function call will always result in waking the ksoftirqd thread.
881 * This is probably worse than completing the request on a different
882 * cache domain.
883 */
Tanner Love91cc4702021-06-02 14:03:38 -0400884 if (force_irqthreads())
Sebastian Andrzej Siewior71425182020-12-04 20:13:54 +0100885 return false;
Christoph Hellwig963395262020-06-11 08:44:49 +0200886
887 /* same CPU or cache domain? Complete locally */
888 if (cpu == rq->mq_ctx->cpu ||
889 (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
890 cpus_share_cache(cpu, rq->mq_ctx->cpu)))
891 return false;
892
893 /* don't try to IPI to an offline CPU */
894 return cpu_online(rq->mq_ctx->cpu);
895}
896
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100897static void blk_mq_complete_send_ipi(struct request *rq)
898{
899 struct llist_head *list;
900 unsigned int cpu;
901
902 cpu = rq->mq_ctx->cpu;
903 list = &per_cpu(blk_cpu_done, cpu);
904 if (llist_add(&rq->ipi_list, list)) {
905 INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
906 smp_call_function_single_async(cpu, &rq->csd);
907 }
908}
909
910static void blk_mq_raise_softirq(struct request *rq)
911{
912 struct llist_head *list;
913
914 preempt_disable();
915 list = this_cpu_ptr(&blk_cpu_done);
916 if (llist_add(&rq->ipi_list, list))
917 raise_softirq(BLOCK_SOFTIRQ);
918 preempt_enable();
919}
920
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200921bool blk_mq_complete_request_remote(struct request *rq)
922{
Keith Buschaf78ff72018-11-26 09:54:30 -0700923 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
Ming Lei36e76532018-09-28 16:42:20 +0800924
Jens Axboe4ab32bf2018-11-18 16:15:35 -0700925 /*
926 * For a polled request, always complete locallly, it's pointless
927 * to redirect the completion.
928 */
Christoph Hellwig6ce913f2021-10-12 13:12:21 +0200929 if (rq->cmd_flags & REQ_POLLED)
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200930 return false;
Jens Axboe320ae512013-10-24 09:20:05 +0100931
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200932 if (blk_mq_complete_need_ipi(rq)) {
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100933 blk_mq_complete_send_ipi(rq);
934 return true;
Christoph Hellwig3d6efbf2014-01-08 09:33:37 -0800935 }
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200936
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +0100937 if (rq->q->nr_hw_queues == 1) {
938 blk_mq_raise_softirq(rq);
939 return true;
940 }
941 return false;
Jens Axboe320ae512013-10-24 09:20:05 +0100942}
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200943EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
944
Jens Axboe320ae512013-10-24 09:20:05 +0100945/**
Christoph Hellwig15f73f52020-06-11 08:44:47 +0200946 * blk_mq_complete_request - end I/O on a request
947 * @rq: the request being processed
Jens Axboe320ae512013-10-24 09:20:05 +0100948 *
Christoph Hellwig15f73f52020-06-11 08:44:47 +0200949 * Description:
950 * Complete a request by scheduling the ->complete_rq operation.
951 **/
952void blk_mq_complete_request(struct request *rq)
Jens Axboe320ae512013-10-24 09:20:05 +0100953{
Christoph Hellwig40d09b52020-06-11 08:44:50 +0200954 if (!blk_mq_complete_request_remote(rq))
Christoph Hellwig963395262020-06-11 08:44:49 +0200955 rq->q->mq_ops->complete(rq);
Jens Axboe320ae512013-10-24 09:20:05 +0100956}
Christoph Hellwig15f73f52020-06-11 08:44:47 +0200957EXPORT_SYMBOL(blk_mq_complete_request);
Christoph Hellwig30a91cb2014-02-10 03:24:38 -0800958
Jens Axboe04ced152018-01-09 08:29:46 -0800959static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
Bart Van Asscheb7435db2018-01-10 11:34:27 -0800960 __releases(hctx->srcu)
Jens Axboe04ced152018-01-09 08:29:46 -0800961{
962 if (!(hctx->flags & BLK_MQ_F_BLOCKING))
963 rcu_read_unlock();
964 else
Tejun Heo05707b62018-01-09 08:29:53 -0800965 srcu_read_unlock(hctx->srcu, srcu_idx);
Jens Axboe04ced152018-01-09 08:29:46 -0800966}
967
968static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
Bart Van Asscheb7435db2018-01-10 11:34:27 -0800969 __acquires(hctx->srcu)
Jens Axboe04ced152018-01-09 08:29:46 -0800970{
Jens Axboe08b5a6e2018-01-09 09:32:25 -0700971 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
972 /* shut up gcc false positive */
973 *srcu_idx = 0;
Jens Axboe04ced152018-01-09 08:29:46 -0800974 rcu_read_lock();
Jens Axboe08b5a6e2018-01-09 09:32:25 -0700975 } else
Tejun Heo05707b62018-01-09 08:29:53 -0800976 *srcu_idx = srcu_read_lock(hctx->srcu);
Jens Axboe04ced152018-01-09 08:29:46 -0800977}
978
Christoph Hellwig30a91cb2014-02-10 03:24:38 -0800979/**
André Almeida105663f2020-01-06 15:08:18 -0300980 * blk_mq_start_request - Start processing a request
981 * @rq: Pointer to request to be started
982 *
983 * Function used by device drivers to notify the block layer that a request
984 * is going to be processed now, so blk layer can do proper initializations
985 * such as starting the timeout timer.
986 */
Christoph Hellwige2490072014-09-13 16:40:09 -0700987void blk_mq_start_request(struct request *rq)
Jens Axboe320ae512013-10-24 09:20:05 +0100988{
989 struct request_queue *q = rq->q;
990
Christoph Hellwiga54895f2020-12-03 17:21:39 +0100991 trace_block_rq_issue(rq);
Jens Axboe320ae512013-10-24 09:20:05 +0100992
Jens Axboecf43e6b2016-11-07 21:32:37 -0700993 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
Jens Axboe00067072021-10-05 09:23:59 -0600994 u64 start_time;
995#ifdef CONFIG_BLK_CGROUP
996 if (rq->bio)
997 start_time = bio_issue_time(&rq->bio->bi_issue);
998 else
999#endif
1000 start_time = ktime_get_ns();
1001 rq->io_start_time_ns = start_time;
Hou Tao3d244302019-05-21 15:59:03 +08001002 rq->stats_sectors = blk_rq_sectors(rq);
Jens Axboecf43e6b2016-11-07 21:32:37 -07001003 rq->rq_flags |= RQF_STATS;
Josef Bacika7905042018-07-03 09:32:35 -06001004 rq_qos_issue(q, rq);
Jens Axboecf43e6b2016-11-07 21:32:37 -07001005 }
1006
Tejun Heo1d9bd512018-01-09 08:29:48 -08001007 WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
Jens Axboe538b7532014-09-16 10:37:37 -06001008
Tejun Heo1d9bd512018-01-09 08:29:48 -08001009 blk_add_timer(rq);
Keith Busch12f5b932018-05-29 15:52:28 +02001010 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
Christoph Hellwig49f5baa2014-02-11 08:27:14 -08001011
Max Gurtovoy54d4e6a2019-09-16 18:44:29 +03001012#ifdef CONFIG_BLK_DEV_INTEGRITY
1013 if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
1014 q->integrity.profile->prepare_fn(rq);
1015#endif
Christoph Hellwig3e087732021-10-12 13:12:24 +02001016 if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
1017 WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
Jens Axboe320ae512013-10-24 09:20:05 +01001018}
Christoph Hellwige2490072014-09-13 16:40:09 -07001019EXPORT_SYMBOL(blk_mq_start_request);
Jens Axboe320ae512013-10-24 09:20:05 +01001020
Christoph Hellwiged0791b2014-04-16 09:44:57 +02001021static void __blk_mq_requeue_request(struct request *rq)
Jens Axboe320ae512013-10-24 09:20:05 +01001022{
1023 struct request_queue *q = rq->q;
1024
Ming Lei923218f2017-11-02 23:24:38 +08001025 blk_mq_put_driver_tag(rq);
1026
Christoph Hellwiga54895f2020-12-03 17:21:39 +01001027 trace_block_rq_requeue(rq);
Josef Bacika7905042018-07-03 09:32:35 -06001028 rq_qos_requeue(q, rq);
Christoph Hellwig49f5baa2014-02-11 08:27:14 -08001029
Keith Busch12f5b932018-05-29 15:52:28 +02001030 if (blk_mq_request_started(rq)) {
1031 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
Christoph Hellwigda661262018-06-14 13:58:45 +02001032 rq->rq_flags &= ~RQF_TIMED_OUT;
Christoph Hellwige2490072014-09-13 16:40:09 -07001033 }
Jens Axboe320ae512013-10-24 09:20:05 +01001034}
1035
Bart Van Assche2b053ac2016-10-28 17:21:41 -07001036void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
Christoph Hellwiged0791b2014-04-16 09:44:57 +02001037{
Christoph Hellwiged0791b2014-04-16 09:44:57 +02001038 __blk_mq_requeue_request(rq);
Christoph Hellwiged0791b2014-04-16 09:44:57 +02001039
Ming Lei105976f2018-02-23 23:36:56 +08001040 /* this request will be re-inserted to io scheduler queue */
1041 blk_mq_sched_requeue_request(rq);
1042
Jens Axboe7d692332018-10-24 10:48:12 -06001043 BUG_ON(!list_empty(&rq->queuelist));
Bart Van Assche2b053ac2016-10-28 17:21:41 -07001044 blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
Christoph Hellwiged0791b2014-04-16 09:44:57 +02001045}
1046EXPORT_SYMBOL(blk_mq_requeue_request);
1047
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001048static void blk_mq_requeue_work(struct work_struct *work)
1049{
1050 struct request_queue *q =
Mike Snitzer28494502016-09-14 13:28:30 -04001051 container_of(work, struct request_queue, requeue_work.work);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001052 LIST_HEAD(rq_list);
1053 struct request *rq, *next;
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001054
Jens Axboe18e97812017-07-27 08:03:57 -06001055 spin_lock_irq(&q->requeue_lock);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001056 list_splice_init(&q->requeue_list, &rq_list);
Jens Axboe18e97812017-07-27 08:03:57 -06001057 spin_unlock_irq(&q->requeue_lock);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001058
1059 list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
Jianchao Wangaef18972019-02-12 09:56:25 +08001060 if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001061 continue;
1062
Christoph Hellwige8064022016-10-20 15:12:13 +02001063 rq->rq_flags &= ~RQF_SOFTBARRIER;
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001064 list_del_init(&rq->queuelist);
Jianchao Wangaef18972019-02-12 09:56:25 +08001065 /*
1066 * If RQF_DONTPREP, rq has contained some driver specific
1067 * data, so insert it to hctx dispatch list to avoid any
1068 * merge.
1069 */
1070 if (rq->rq_flags & RQF_DONTPREP)
Ming Lei01e99ae2020-02-25 09:04:32 +08001071 blk_mq_request_bypass_insert(rq, false, false);
Jianchao Wangaef18972019-02-12 09:56:25 +08001072 else
1073 blk_mq_sched_insert_request(rq, true, false, false);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001074 }
1075
1076 while (!list_empty(&rq_list)) {
1077 rq = list_entry(rq_list.next, struct request, queuelist);
1078 list_del_init(&rq->queuelist);
Mike Snitzer9e97d292018-01-17 11:25:58 -05001079 blk_mq_sched_insert_request(rq, false, false, false);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001080 }
1081
Bart Van Assche52d7f1b2016-10-28 17:20:32 -07001082 blk_mq_run_hw_queues(q, false);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001083}
1084
Bart Van Assche2b053ac2016-10-28 17:21:41 -07001085void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
1086 bool kick_requeue_list)
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001087{
1088 struct request_queue *q = rq->q;
1089 unsigned long flags;
1090
1091 /*
1092 * We abuse this flag that is otherwise used by the I/O scheduler to
Jens Axboeff821d22017-11-10 22:05:12 -07001093 * request head insertion from the workqueue.
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001094 */
Christoph Hellwige8064022016-10-20 15:12:13 +02001095 BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001096
1097 spin_lock_irqsave(&q->requeue_lock, flags);
1098 if (at_head) {
Christoph Hellwige8064022016-10-20 15:12:13 +02001099 rq->rq_flags |= RQF_SOFTBARRIER;
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001100 list_add(&rq->queuelist, &q->requeue_list);
1101 } else {
1102 list_add_tail(&rq->queuelist, &q->requeue_list);
1103 }
1104 spin_unlock_irqrestore(&q->requeue_lock, flags);
Bart Van Assche2b053ac2016-10-28 17:21:41 -07001105
1106 if (kick_requeue_list)
1107 blk_mq_kick_requeue_list(q);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001108}
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001109
1110void blk_mq_kick_requeue_list(struct request_queue *q)
1111{
Bart Van Asscheae943d22018-01-19 08:58:55 -08001112 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06001113}
1114EXPORT_SYMBOL(blk_mq_kick_requeue_list);
1115
Mike Snitzer28494502016-09-14 13:28:30 -04001116void blk_mq_delay_kick_requeue_list(struct request_queue *q,
1117 unsigned long msecs)
1118{
Bart Van Assched4acf362017-08-09 11:28:06 -07001119 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
1120 msecs_to_jiffies(msecs));
Mike Snitzer28494502016-09-14 13:28:30 -04001121}
1122EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
1123
Jens Axboe0e62f512014-06-04 10:23:49 -06001124struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
1125{
Jens Axboe88c7b2b2016-08-25 08:07:30 -06001126 if (tag < tags->nr_tags) {
1127 prefetch(tags->rqs[tag]);
Hannes Reinecke4ee86ba2016-03-15 12:03:28 -07001128 return tags->rqs[tag];
Jens Axboe88c7b2b2016-08-25 08:07:30 -06001129 }
Hannes Reinecke4ee86ba2016-03-15 12:03:28 -07001130
1131 return NULL;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06001132}
1133EXPORT_SYMBOL(blk_mq_tag_to_rq);
1134
Jens Axboe3c94d832018-12-17 21:11:17 -07001135static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
1136 void *priv, bool reserved)
Jens Axboeae879912018-11-08 09:03:51 -07001137{
1138 /*
Ming Lei05a4fed2020-07-07 11:04:33 -04001139 * If we find a request that isn't idle and the queue matches,
Jens Axboe3c94d832018-12-17 21:11:17 -07001140 * we know the queue is busy. Return false to stop the iteration.
Jens Axboeae879912018-11-08 09:03:51 -07001141 */
Ming Lei05a4fed2020-07-07 11:04:33 -04001142 if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
Jens Axboeae879912018-11-08 09:03:51 -07001143 bool *busy = priv;
1144
1145 *busy = true;
1146 return false;
1147 }
1148
1149 return true;
1150}
1151
Jens Axboe3c94d832018-12-17 21:11:17 -07001152bool blk_mq_queue_inflight(struct request_queue *q)
Jens Axboeae879912018-11-08 09:03:51 -07001153{
1154 bool busy = false;
1155
Jens Axboe3c94d832018-12-17 21:11:17 -07001156 blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
Jens Axboeae879912018-11-08 09:03:51 -07001157 return busy;
1158}
Jens Axboe3c94d832018-12-17 21:11:17 -07001159EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
Jens Axboeae879912018-11-08 09:03:51 -07001160
Tejun Heo358f70d2018-01-09 08:29:50 -08001161static void blk_mq_rq_timed_out(struct request *req, bool reserved)
Jens Axboe320ae512013-10-24 09:20:05 +01001162{
Christoph Hellwigda661262018-06-14 13:58:45 +02001163 req->rq_flags |= RQF_TIMED_OUT;
Christoph Hellwigd1210d52018-05-29 15:52:39 +02001164 if (req->q->mq_ops->timeout) {
1165 enum blk_eh_timer_return ret;
Jens Axboe87ee7b12014-04-24 08:51:47 -06001166
Christoph Hellwigd1210d52018-05-29 15:52:39 +02001167 ret = req->q->mq_ops->timeout(req, reserved);
1168 if (ret == BLK_EH_DONE)
1169 return;
1170 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
Christoph Hellwig46f92d42014-09-13 16:40:12 -07001171 }
Christoph Hellwigd1210d52018-05-29 15:52:39 +02001172
1173 blk_add_timer(req);
Jens Axboe87ee7b12014-04-24 08:51:47 -06001174}
Keith Busch5b3f25f2015-01-07 18:55:46 -07001175
Keith Busch12f5b932018-05-29 15:52:28 +02001176static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
1177{
1178 unsigned long deadline;
1179
1180 if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
1181 return false;
Christoph Hellwigda661262018-06-14 13:58:45 +02001182 if (rq->rq_flags & RQF_TIMED_OUT)
1183 return false;
Keith Busch12f5b932018-05-29 15:52:28 +02001184
Christoph Hellwig079076b2018-11-14 17:02:05 +01001185 deadline = READ_ONCE(rq->deadline);
Keith Busch12f5b932018-05-29 15:52:28 +02001186 if (time_after_eq(jiffies, deadline))
1187 return true;
1188
1189 if (*next == 0)
1190 *next = deadline;
1191 else if (time_after(*next, deadline))
1192 *next = deadline;
1193 return false;
1194}
1195
Ming Lei2e315dc2021-05-11 23:22:34 +08001196void blk_mq_put_rq_ref(struct request *rq)
1197{
Ming Leia9ed27a2021-08-18 09:09:25 +08001198 if (is_flush_rq(rq))
Ming Lei2e315dc2021-05-11 23:22:34 +08001199 rq->end_io(rq, 0);
1200 else if (refcount_dec_and_test(&rq->ref))
1201 __blk_mq_free_request(rq);
1202}
1203
Jens Axboe7baa8572018-11-08 10:24:07 -07001204static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
Christoph Hellwig81481eb2014-09-13 16:40:11 -07001205 struct request *rq, void *priv, bool reserved)
Jens Axboe320ae512013-10-24 09:20:05 +01001206{
Keith Busch12f5b932018-05-29 15:52:28 +02001207 unsigned long *next = priv;
Christoph Hellwig81481eb2014-09-13 16:40:11 -07001208
Keith Busch12f5b932018-05-29 15:52:28 +02001209 /*
Ming Leic797b402021-08-11 23:52:02 +08001210 * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
1211 * be reallocated underneath the timeout handler's processing, then
1212 * the expire check is reliable. If the request is not expired, then
1213 * it was completed and reallocated as a new request after returning
1214 * from blk_mq_check_expired().
Keith Busch12f5b932018-05-29 15:52:28 +02001215 */
1216 if (blk_mq_req_expired(rq, next))
Tejun Heo1d9bd512018-01-09 08:29:48 -08001217 blk_mq_rq_timed_out(rq, reserved);
Jens Axboe7baa8572018-11-08 10:24:07 -07001218 return true;
Tejun Heo1d9bd512018-01-09 08:29:48 -08001219}
1220
Christoph Hellwig287922e2015-10-30 20:57:30 +08001221static void blk_mq_timeout_work(struct work_struct *work)
Jens Axboe320ae512013-10-24 09:20:05 +01001222{
Christoph Hellwig287922e2015-10-30 20:57:30 +08001223 struct request_queue *q =
1224 container_of(work, struct request_queue, timeout_work);
Keith Busch12f5b932018-05-29 15:52:28 +02001225 unsigned long next = 0;
Tejun Heo1d9bd512018-01-09 08:29:48 -08001226 struct blk_mq_hw_ctx *hctx;
Christoph Hellwig81481eb2014-09-13 16:40:11 -07001227 int i;
Jens Axboe320ae512013-10-24 09:20:05 +01001228
Gabriel Krisman Bertazi71f79fb2016-08-01 08:23:39 -06001229 /* A deadlock might occur if a request is stuck requiring a
1230 * timeout at the same time a queue freeze is waiting
1231 * completion, since the timeout code would not be able to
1232 * acquire the queue reference here.
1233 *
1234 * That's why we don't use blk_queue_enter here; instead, we use
1235 * percpu_ref_tryget directly, because we need to be able to
1236 * obtain a reference even in the short window between the queue
1237 * starting to freeze, by dropping the first reference in
Ming Lei1671d522017-03-27 20:06:57 +08001238 * blk_freeze_queue_start, and the moment the last request is
Gabriel Krisman Bertazi71f79fb2016-08-01 08:23:39 -06001239 * consumed, marked by the instant q_usage_counter reaches
1240 * zero.
1241 */
1242 if (!percpu_ref_tryget(&q->q_usage_counter))
Christoph Hellwig287922e2015-10-30 20:57:30 +08001243 return;
1244
Keith Busch12f5b932018-05-29 15:52:28 +02001245 blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
Jens Axboe320ae512013-10-24 09:20:05 +01001246
Keith Busch12f5b932018-05-29 15:52:28 +02001247 if (next != 0) {
1248 mod_timer(&q->timeout, next);
Jens Axboe0d2602c2014-05-13 15:10:52 -06001249 } else {
Bart Van Asschefcd36c32018-01-10 08:33:33 -08001250 /*
1251 * Request timeouts are handled as a forward rolling timer. If
1252 * we end up here it means that no requests are pending and
1253 * also that no request has been pending for a while. Mark
1254 * each hctx as idle.
1255 */
Ming Leif054b562015-04-21 10:00:19 +08001256 queue_for_each_hw_ctx(q, hctx, i) {
1257 /* the hctx may be unmapped, so check it here */
1258 if (blk_mq_hw_queue_mapped(hctx))
1259 blk_mq_tag_idle(hctx);
1260 }
Jens Axboe0d2602c2014-05-13 15:10:52 -06001261 }
Christoph Hellwig287922e2015-10-30 20:57:30 +08001262 blk_queue_exit(q);
Jens Axboe320ae512013-10-24 09:20:05 +01001263}
1264
Omar Sandoval88459642016-09-17 08:38:44 -06001265struct flush_busy_ctx_data {
1266 struct blk_mq_hw_ctx *hctx;
1267 struct list_head *list;
1268};
1269
1270static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
1271{
1272 struct flush_busy_ctx_data *flush_data = data;
1273 struct blk_mq_hw_ctx *hctx = flush_data->hctx;
1274 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
Ming Leic16d6b52018-12-17 08:44:05 -07001275 enum hctx_type type = hctx->type;
Omar Sandoval88459642016-09-17 08:38:44 -06001276
Omar Sandoval88459642016-09-17 08:38:44 -06001277 spin_lock(&ctx->lock);
Ming Leic16d6b52018-12-17 08:44:05 -07001278 list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
Omar Sandovale9a99a62018-02-27 16:56:42 -08001279 sbitmap_clear_bit(sb, bitnr);
Omar Sandoval88459642016-09-17 08:38:44 -06001280 spin_unlock(&ctx->lock);
1281 return true;
1282}
1283
Jens Axboe320ae512013-10-24 09:20:05 +01001284/*
Jens Axboe1429d7c2014-05-19 09:23:55 -06001285 * Process software queues that have been marked busy, splicing them
1286 * to the for-dispatch
1287 */
Jens Axboe2c3ad662016-12-14 14:34:47 -07001288void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
Jens Axboe1429d7c2014-05-19 09:23:55 -06001289{
Omar Sandoval88459642016-09-17 08:38:44 -06001290 struct flush_busy_ctx_data data = {
1291 .hctx = hctx,
1292 .list = list,
1293 };
Jens Axboe1429d7c2014-05-19 09:23:55 -06001294
Omar Sandoval88459642016-09-17 08:38:44 -06001295 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
Jens Axboe1429d7c2014-05-19 09:23:55 -06001296}
Jens Axboe2c3ad662016-12-14 14:34:47 -07001297EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
Jens Axboe1429d7c2014-05-19 09:23:55 -06001298
Ming Leib3476892017-10-14 17:22:30 +08001299struct dispatch_rq_data {
1300 struct blk_mq_hw_ctx *hctx;
1301 struct request *rq;
1302};
1303
1304static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
1305 void *data)
1306{
1307 struct dispatch_rq_data *dispatch_data = data;
1308 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
1309 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
Ming Leic16d6b52018-12-17 08:44:05 -07001310 enum hctx_type type = hctx->type;
Ming Leib3476892017-10-14 17:22:30 +08001311
1312 spin_lock(&ctx->lock);
Ming Leic16d6b52018-12-17 08:44:05 -07001313 if (!list_empty(&ctx->rq_lists[type])) {
1314 dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
Ming Leib3476892017-10-14 17:22:30 +08001315 list_del_init(&dispatch_data->rq->queuelist);
Ming Leic16d6b52018-12-17 08:44:05 -07001316 if (list_empty(&ctx->rq_lists[type]))
Ming Leib3476892017-10-14 17:22:30 +08001317 sbitmap_clear_bit(sb, bitnr);
1318 }
1319 spin_unlock(&ctx->lock);
1320
1321 return !dispatch_data->rq;
1322}
1323
1324struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
1325 struct blk_mq_ctx *start)
1326{
Jens Axboef31967f2018-10-29 13:13:29 -06001327 unsigned off = start ? start->index_hw[hctx->type] : 0;
Ming Leib3476892017-10-14 17:22:30 +08001328 struct dispatch_rq_data data = {
1329 .hctx = hctx,
1330 .rq = NULL,
1331 };
1332
1333 __sbitmap_for_each_set(&hctx->ctx_map, off,
1334 dispatch_rq_from_ctx, &data);
1335
1336 return data.rq;
1337}
1338
Ming Lei570e9b72020-06-30 22:03:55 +08001339static bool __blk_mq_get_driver_tag(struct request *rq)
1340{
John Garryae0f1a72021-10-05 18:23:38 +08001341 struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
Ming Lei570e9b72020-06-30 22:03:55 +08001342 unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
Ming Lei570e9b72020-06-30 22:03:55 +08001343 int tag;
1344
Ming Lei568f2702020-07-06 22:41:11 +08001345 blk_mq_tag_busy(rq->mq_hctx);
1346
Ming Lei570e9b72020-06-30 22:03:55 +08001347 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
John Garryae0f1a72021-10-05 18:23:38 +08001348 bt = &rq->mq_hctx->tags->breserved_tags;
Ming Lei570e9b72020-06-30 22:03:55 +08001349 tag_offset = 0;
Ming Lei28500852020-09-11 18:41:14 +08001350 } else {
1351 if (!hctx_may_queue(rq->mq_hctx, bt))
1352 return false;
Ming Lei570e9b72020-06-30 22:03:55 +08001353 }
1354
Ming Lei570e9b72020-06-30 22:03:55 +08001355 tag = __sbitmap_queue_get(bt);
1356 if (tag == BLK_MQ_NO_TAG)
1357 return false;
1358
1359 rq->tag = tag + tag_offset;
Ming Lei570e9b72020-06-30 22:03:55 +08001360 return true;
1361}
1362
Jan Kara613471542021-06-03 12:47:21 +02001363bool blk_mq_get_driver_tag(struct request *rq)
Ming Lei570e9b72020-06-30 22:03:55 +08001364{
Ming Lei568f2702020-07-06 22:41:11 +08001365 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
1366
1367 if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
1368 return false;
1369
Ming Lei51db1c32020-08-19 23:20:19 +08001370 if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
Ming Lei568f2702020-07-06 22:41:11 +08001371 !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
1372 rq->rq_flags |= RQF_MQ_INFLIGHT;
John Garrybccf5e22020-08-19 23:20:26 +08001373 __blk_mq_inc_active_requests(hctx);
Ming Lei568f2702020-07-06 22:41:11 +08001374 }
1375 hctx->tags->rqs[rq->tag] = rq;
1376 return true;
Ming Lei570e9b72020-06-30 22:03:55 +08001377}
1378
Jens Axboeeb619fd2017-11-09 08:32:43 -07001379static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
1380 int flags, void *key)
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001381{
1382 struct blk_mq_hw_ctx *hctx;
1383
1384 hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
1385
Ming Lei5815839b2018-06-25 19:31:47 +08001386 spin_lock(&hctx->dispatch_wait_lock);
Jens Axboee8618572019-03-25 12:34:10 -06001387 if (!list_empty(&wait->entry)) {
1388 struct sbitmap_queue *sbq;
1389
1390 list_del_init(&wait->entry);
John Garryae0f1a72021-10-05 18:23:38 +08001391 sbq = &hctx->tags->bitmap_tags;
Jens Axboee8618572019-03-25 12:34:10 -06001392 atomic_dec(&sbq->ws_active);
1393 }
Ming Lei5815839b2018-06-25 19:31:47 +08001394 spin_unlock(&hctx->dispatch_wait_lock);
1395
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001396 blk_mq_run_hw_queue(hctx, true);
1397 return 1;
1398}
1399
Jens Axboef906a6a2017-11-09 16:10:13 -07001400/*
1401 * Mark us waiting for a tag. For shared tags, this involves hooking us into
Bart Van Asscheee3e4de2018-01-09 10:09:15 -08001402 * the tag wakeups. For non-shared tags, we can simply mark us needing a
1403 * restart. For both cases, take care to check the condition again after
Jens Axboef906a6a2017-11-09 16:10:13 -07001404 * marking us as waiting.
1405 */
Ming Lei2278d692018-06-25 19:31:46 +08001406static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
Jens Axboef906a6a2017-11-09 16:10:13 -07001407 struct request *rq)
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001408{
John Garryae0f1a72021-10-05 18:23:38 +08001409 struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
Ming Lei5815839b2018-06-25 19:31:47 +08001410 struct wait_queue_head *wq;
Jens Axboef906a6a2017-11-09 16:10:13 -07001411 wait_queue_entry_t *wait;
1412 bool ret;
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001413
Ming Lei51db1c32020-08-19 23:20:19 +08001414 if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
Yufen Yu684b7322019-03-15 11:05:10 +08001415 blk_mq_sched_mark_restart_hctx(hctx);
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001416
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001417 /*
1418 * It's possible that a tag was freed in the window between the
1419 * allocation failure and adding the hardware queue to the wait
1420 * queue.
1421 *
1422 * Don't clear RESTART here, someone else could have set it.
1423 * At most this will cost an extra queue run.
1424 */
Ming Lei8ab6bb9e2018-06-25 19:31:45 +08001425 return blk_mq_get_driver_tag(rq);
Jens Axboeeb619fd2017-11-09 08:32:43 -07001426 }
1427
Ming Lei2278d692018-06-25 19:31:46 +08001428 wait = &hctx->dispatch_wait;
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001429 if (!list_empty_careful(&wait->entry))
1430 return false;
1431
Jens Axboee8618572019-03-25 12:34:10 -06001432 wq = &bt_wait_ptr(sbq, hctx)->wait;
Ming Lei5815839b2018-06-25 19:31:47 +08001433
1434 spin_lock_irq(&wq->lock);
1435 spin_lock(&hctx->dispatch_wait_lock);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001436 if (!list_empty(&wait->entry)) {
Ming Lei5815839b2018-06-25 19:31:47 +08001437 spin_unlock(&hctx->dispatch_wait_lock);
1438 spin_unlock_irq(&wq->lock);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001439 return false;
1440 }
1441
Jens Axboee8618572019-03-25 12:34:10 -06001442 atomic_inc(&sbq->ws_active);
Ming Lei5815839b2018-06-25 19:31:47 +08001443 wait->flags &= ~WQ_FLAG_EXCLUSIVE;
1444 __add_wait_queue(wq, wait);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001445
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001446 /*
Jens Axboeeb619fd2017-11-09 08:32:43 -07001447 * It's possible that a tag was freed in the window between the
1448 * allocation failure and adding the hardware queue to the wait
1449 * queue.
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001450 */
Ming Lei8ab6bb9e2018-06-25 19:31:45 +08001451 ret = blk_mq_get_driver_tag(rq);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001452 if (!ret) {
Ming Lei5815839b2018-06-25 19:31:47 +08001453 spin_unlock(&hctx->dispatch_wait_lock);
1454 spin_unlock_irq(&wq->lock);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001455 return false;
Jens Axboef906a6a2017-11-09 16:10:13 -07001456 }
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001457
1458 /*
1459 * We got a tag, remove ourselves from the wait queue to ensure
1460 * someone else gets the wakeup.
1461 */
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001462 list_del_init(&wait->entry);
Jens Axboee8618572019-03-25 12:34:10 -06001463 atomic_dec(&sbq->ws_active);
Ming Lei5815839b2018-06-25 19:31:47 +08001464 spin_unlock(&hctx->dispatch_wait_lock);
1465 spin_unlock_irq(&wq->lock);
Bart Van Asschec27d53f2018-01-10 13:41:21 -08001466
1467 return true;
Omar Sandovalda55f2c2017-02-22 10:58:29 -08001468}
1469
Ming Lei6e7687172018-07-03 09:03:16 -06001470#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8
1471#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4
1472/*
1473 * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
1474 * - EWMA is one simple way to compute running average value
1475 * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1476 * - take 4 as factor for avoiding to get too small(0) result, and this
1477 * factor doesn't matter because EWMA decreases exponentially
1478 */
1479static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
1480{
1481 unsigned int ewma;
1482
Ming Lei6e7687172018-07-03 09:03:16 -06001483 ewma = hctx->dispatch_busy;
1484
1485 if (!ewma && !busy)
1486 return;
1487
1488 ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
1489 if (busy)
1490 ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
1491 ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
1492
1493 hctx->dispatch_busy = ewma;
1494}
1495
Ming Lei86ff7c22018-01-30 22:04:57 -05001496#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
1497
Johannes Thumshirnc92a4102020-03-25 00:24:44 +09001498static void blk_mq_handle_dev_resource(struct request *rq,
1499 struct list_head *list)
1500{
1501 struct request *next =
1502 list_first_entry_or_null(list, struct request, queuelist);
1503
1504 /*
1505 * If an I/O scheduler has been configured and we got a driver tag for
1506 * the next request already, free it.
1507 */
1508 if (next)
1509 blk_mq_put_driver_tag(next);
1510
1511 list_add(&rq->queuelist, list);
1512 __blk_mq_requeue_request(rq);
1513}
1514
Keith Busch0512a752020-05-12 17:55:47 +09001515static void blk_mq_handle_zone_resource(struct request *rq,
1516 struct list_head *zone_list)
1517{
1518 /*
1519 * If we end up here it is because we cannot dispatch a request to a
1520 * specific zone due to LLD level zone-write locking or other zone
1521 * related resource not being available. In this case, set the request
1522 * aside in zone_list for retrying it later.
1523 */
1524 list_add(&rq->queuelist, zone_list);
1525 __blk_mq_requeue_request(rq);
1526}
1527
Ming Lei75383522020-06-30 18:24:58 +08001528enum prep_dispatch {
1529 PREP_DISPATCH_OK,
1530 PREP_DISPATCH_NO_TAG,
1531 PREP_DISPATCH_NO_BUDGET,
1532};
1533
1534static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
1535 bool need_budget)
1536{
1537 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
Ming Lei2a5a24a2021-01-22 10:33:12 +08001538 int budget_token = -1;
Ming Lei75383522020-06-30 18:24:58 +08001539
Ming Lei2a5a24a2021-01-22 10:33:12 +08001540 if (need_budget) {
1541 budget_token = blk_mq_get_dispatch_budget(rq->q);
1542 if (budget_token < 0) {
1543 blk_mq_put_driver_tag(rq);
1544 return PREP_DISPATCH_NO_BUDGET;
1545 }
1546 blk_mq_set_rq_budget_token(rq, budget_token);
Ming Lei75383522020-06-30 18:24:58 +08001547 }
1548
1549 if (!blk_mq_get_driver_tag(rq)) {
1550 /*
1551 * The initial allocation attempt failed, so we need to
1552 * rerun the hardware queue when a tag is freed. The
1553 * waitqueue takes care of that. If the queue is run
1554 * before we add this entry back on the dispatch list,
1555 * we'll re-run it below.
1556 */
1557 if (!blk_mq_mark_tag_wait(hctx, rq)) {
Ming Lei1fd40b52020-06-30 18:25:00 +08001558 /*
1559 * All budgets not got from this function will be put
1560 * together during handling partial dispatch
1561 */
1562 if (need_budget)
Ming Lei2a5a24a2021-01-22 10:33:12 +08001563 blk_mq_put_dispatch_budget(rq->q, budget_token);
Ming Lei75383522020-06-30 18:24:58 +08001564 return PREP_DISPATCH_NO_TAG;
1565 }
1566 }
1567
1568 return PREP_DISPATCH_OK;
1569}
1570
Ming Lei1fd40b52020-06-30 18:25:00 +08001571/* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
1572static void blk_mq_release_budgets(struct request_queue *q,
Ming Lei2a5a24a2021-01-22 10:33:12 +08001573 struct list_head *list)
Ming Lei1fd40b52020-06-30 18:25:00 +08001574{
Ming Lei2a5a24a2021-01-22 10:33:12 +08001575 struct request *rq;
Ming Lei1fd40b52020-06-30 18:25:00 +08001576
Ming Lei2a5a24a2021-01-22 10:33:12 +08001577 list_for_each_entry(rq, list, queuelist) {
1578 int budget_token = blk_mq_get_rq_budget_token(rq);
1579
1580 if (budget_token >= 0)
1581 blk_mq_put_dispatch_budget(q, budget_token);
1582 }
Ming Lei1fd40b52020-06-30 18:25:00 +08001583}
1584
Jens Axboe1f57f8d2018-06-28 11:54:01 -06001585/*
1586 * Returns true if we did some work AND can potentially do more.
1587 */
Ming Lei445874e2020-06-30 18:24:57 +08001588bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
Ming Lei1fd40b52020-06-30 18:25:00 +08001589 unsigned int nr_budgets)
Jens Axboef04c3df2016-12-07 08:41:17 -07001590{
Ming Lei75383522020-06-30 18:24:58 +08001591 enum prep_dispatch prep;
Ming Lei445874e2020-06-30 18:24:57 +08001592 struct request_queue *q = hctx->queue;
Jianchao Wang6d6f167c2017-11-02 23:24:32 +08001593 struct request *rq, *nxt;
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001594 int errors, queued;
Ming Lei86ff7c22018-01-30 22:04:57 -05001595 blk_status_t ret = BLK_STS_OK;
Keith Busch0512a752020-05-12 17:55:47 +09001596 LIST_HEAD(zone_list);
Jens Axboef04c3df2016-12-07 08:41:17 -07001597
Omar Sandoval81380ca2017-04-07 08:56:26 -06001598 if (list_empty(list))
1599 return false;
1600
Jens Axboef04c3df2016-12-07 08:41:17 -07001601 /*
Jens Axboef04c3df2016-12-07 08:41:17 -07001602 * Now process all the entries, sending them to the driver.
1603 */
Jens Axboe93efe982017-03-24 12:04:19 -06001604 errors = queued = 0;
Omar Sandoval81380ca2017-04-07 08:56:26 -06001605 do {
Jens Axboef04c3df2016-12-07 08:41:17 -07001606 struct blk_mq_queue_data bd;
1607
1608 rq = list_first_entry(list, struct request, queuelist);
Ming Lei0bca7992018-04-05 00:35:21 +08001609
Ming Lei445874e2020-06-30 18:24:57 +08001610 WARN_ON_ONCE(hctx != rq->mq_hctx);
Ming Lei1fd40b52020-06-30 18:25:00 +08001611 prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
Ming Lei75383522020-06-30 18:24:58 +08001612 if (prep != PREP_DISPATCH_OK)
Ming Lei0bca7992018-04-05 00:35:21 +08001613 break;
Ming Leide148292017-10-14 17:22:29 +08001614
Jens Axboef04c3df2016-12-07 08:41:17 -07001615 list_del_init(&rq->queuelist);
1616
1617 bd.rq = rq;
Jens Axboe113285b2017-03-02 13:26:04 -07001618
1619 /*
1620 * Flag last if we have no more requests, or if we have more
1621 * but can't assign a driver tag to it.
1622 */
1623 if (list_empty(list))
1624 bd.last = true;
1625 else {
Jens Axboe113285b2017-03-02 13:26:04 -07001626 nxt = list_first_entry(list, struct request, queuelist);
Ming Lei8ab6bb9e2018-06-25 19:31:45 +08001627 bd.last = !blk_mq_get_driver_tag(nxt);
Jens Axboe113285b2017-03-02 13:26:04 -07001628 }
Jens Axboef04c3df2016-12-07 08:41:17 -07001629
Ming Lei1fd40b52020-06-30 18:25:00 +08001630 /*
1631 * once the request is queued to lld, no need to cover the
1632 * budget any more
1633 */
1634 if (nr_budgets)
1635 nr_budgets--;
Jens Axboef04c3df2016-12-07 08:41:17 -07001636 ret = q->mq_ops->queue_rq(hctx, &bd);
Ming Lei7bf13722020-07-01 21:58:57 +08001637 switch (ret) {
1638 case BLK_STS_OK:
1639 queued++;
Jens Axboef04c3df2016-12-07 08:41:17 -07001640 break;
Ming Lei7bf13722020-07-01 21:58:57 +08001641 case BLK_STS_RESOURCE:
1642 case BLK_STS_DEV_RESOURCE:
1643 blk_mq_handle_dev_resource(rq, list);
1644 goto out;
1645 case BLK_STS_ZONE_RESOURCE:
Keith Busch0512a752020-05-12 17:55:47 +09001646 /*
1647 * Move the request to zone_list and keep going through
1648 * the dispatch list to find more requests the drive can
1649 * accept.
1650 */
1651 blk_mq_handle_zone_resource(rq, &zone_list);
Ming Lei7bf13722020-07-01 21:58:57 +08001652 break;
1653 default:
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001654 errors++;
Hannes Reineckee21ee5a2020-09-30 10:02:53 +02001655 blk_mq_end_request(rq, ret);
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001656 }
Omar Sandoval81380ca2017-04-07 08:56:26 -06001657 } while (!list_empty(list));
Ming Lei7bf13722020-07-01 21:58:57 +08001658out:
Keith Busch0512a752020-05-12 17:55:47 +09001659 if (!list_empty(&zone_list))
1660 list_splice_tail_init(&zone_list, list);
1661
yangerkun632bfb62020-09-05 19:25:56 +08001662 /* If we didn't flush the entire list, we could have told the driver
1663 * there was more coming, but that turned out to be a lie.
1664 */
1665 if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
1666 q->mq_ops->commit_rqs(hctx);
Jens Axboef04c3df2016-12-07 08:41:17 -07001667 /*
1668 * Any items that need requeuing? Stuff them into hctx->dispatch,
1669 * that is where we will continue on next queue run.
1670 */
1671 if (!list_empty(list)) {
Ming Lei86ff7c22018-01-30 22:04:57 -05001672 bool needs_restart;
Ming Lei75383522020-06-30 18:24:58 +08001673 /* For non-shared tags, the RESTART check will suffice */
1674 bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
Ming Lei51db1c32020-08-19 23:20:19 +08001675 (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
Ming Lei75383522020-06-30 18:24:58 +08001676 bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
Ming Lei86ff7c22018-01-30 22:04:57 -05001677
Ming Lei2a5a24a2021-01-22 10:33:12 +08001678 if (nr_budgets)
1679 blk_mq_release_budgets(q, list);
Jens Axboef04c3df2016-12-07 08:41:17 -07001680
1681 spin_lock(&hctx->lock);
Ming Lei01e99ae2020-02-25 09:04:32 +08001682 list_splice_tail_init(list, &hctx->dispatch);
Jens Axboef04c3df2016-12-07 08:41:17 -07001683 spin_unlock(&hctx->lock);
1684
1685 /*
Ming Leid7d85352020-08-17 18:01:15 +08001686 * Order adding requests to hctx->dispatch and checking
1687 * SCHED_RESTART flag. The pair of this smp_mb() is the one
1688 * in blk_mq_sched_restart(). Avoid restart code path to
1689 * miss the new added requests to hctx->dispatch, meantime
1690 * SCHED_RESTART is observed here.
1691 */
1692 smp_mb();
1693
1694 /*
Bart Van Assche710c7852017-04-07 11:16:51 -07001695 * If SCHED_RESTART was set by the caller of this function and
1696 * it is no longer set that means that it was cleared by another
1697 * thread and hence that a queue rerun is needed.
Jens Axboef04c3df2016-12-07 08:41:17 -07001698 *
Jens Axboeeb619fd2017-11-09 08:32:43 -07001699 * If 'no_tag' is set, that means that we failed getting
1700 * a driver tag with an I/O scheduler attached. If our dispatch
1701 * waitqueue is no longer active, ensure that we run the queue
1702 * AFTER adding our entries back to the list.
Jens Axboebd166ef2017-01-17 06:03:22 -07001703 *
Bart Van Assche710c7852017-04-07 11:16:51 -07001704 * If no I/O scheduler has been configured it is possible that
1705 * the hardware queue got stopped and restarted before requests
1706 * were pushed back onto the dispatch list. Rerun the queue to
1707 * avoid starvation. Notes:
1708 * - blk_mq_run_hw_queue() checks whether or not a queue has
1709 * been stopped before rerunning a queue.
1710 * - Some but not all block drivers stop a queue before
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001711 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
Bart Van Assche710c7852017-04-07 11:16:51 -07001712 * and dm-rq.
Ming Lei86ff7c22018-01-30 22:04:57 -05001713 *
1714 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
1715 * bit is set, run queue after a delay to avoid IO stalls
Douglas Andersonab3cee32020-04-20 09:24:51 -07001716 * that could otherwise occur if the queue is idle. We'll do
1717 * similar if we couldn't get budget and SCHED_RESTART is set.
Jens Axboebd166ef2017-01-17 06:03:22 -07001718 */
Ming Lei86ff7c22018-01-30 22:04:57 -05001719 needs_restart = blk_mq_sched_needs_restart(hctx);
1720 if (!needs_restart ||
Jens Axboeeb619fd2017-11-09 08:32:43 -07001721 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
Jens Axboebd166ef2017-01-17 06:03:22 -07001722 blk_mq_run_hw_queue(hctx, true);
Douglas Andersonab3cee32020-04-20 09:24:51 -07001723 else if (needs_restart && (ret == BLK_STS_RESOURCE ||
1724 no_budget_avail))
Ming Lei86ff7c22018-01-30 22:04:57 -05001725 blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
Jens Axboe1f57f8d2018-06-28 11:54:01 -06001726
Ming Lei6e7687172018-07-03 09:03:16 -06001727 blk_mq_update_dispatch_busy(hctx, true);
Jens Axboe1f57f8d2018-06-28 11:54:01 -06001728 return false;
Ming Lei6e7687172018-07-03 09:03:16 -06001729 } else
1730 blk_mq_update_dispatch_busy(hctx, false);
Jens Axboef04c3df2016-12-07 08:41:17 -07001731
Jens Axboe93efe982017-03-24 12:04:19 -06001732 return (queued + errors) != 0;
Jens Axboef04c3df2016-12-07 08:41:17 -07001733}
1734
André Almeida105663f2020-01-06 15:08:18 -03001735/**
1736 * __blk_mq_run_hw_queue - Run a hardware queue.
1737 * @hctx: Pointer to the hardware queue to run.
1738 *
1739 * Send pending requests to the hardware.
1740 */
Bart Van Assche6a83e742016-11-02 10:09:51 -06001741static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
1742{
1743 int srcu_idx;
1744
Jens Axboeb7a71e62017-08-01 09:28:24 -06001745 /*
Jens Axboeb7a71e62017-08-01 09:28:24 -06001746 * We can't run the queue inline with ints disabled. Ensure that
1747 * we catch bad users of this early.
1748 */
1749 WARN_ON_ONCE(in_interrupt());
1750
Jens Axboe04ced152018-01-09 08:29:46 -08001751 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
Jens Axboebf4907c2017-03-30 12:30:39 -06001752
Jens Axboe04ced152018-01-09 08:29:46 -08001753 hctx_lock(hctx, &srcu_idx);
1754 blk_mq_sched_dispatch_requests(hctx);
1755 hctx_unlock(hctx, srcu_idx);
Bart Van Assche6a83e742016-11-02 10:09:51 -06001756}
1757
Ming Leif82ddf12018-04-08 17:48:10 +08001758static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
1759{
1760 int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
1761
1762 if (cpu >= nr_cpu_ids)
1763 cpu = cpumask_first(hctx->cpumask);
1764 return cpu;
1765}
1766
Jens Axboe506e9312014-05-07 10:26:44 -06001767/*
1768 * It'd be great if the workqueue API had a way to pass
1769 * in a mask and had some smarts for more clever placement.
1770 * For now we just round-robin here, switching for every
1771 * BLK_MQ_CPU_WORK_BATCH queued items.
1772 */
1773static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
1774{
Ming Lei7bed4592018-01-18 00:41:51 +08001775 bool tried = false;
Ming Lei476f8c92018-04-08 17:48:09 +08001776 int next_cpu = hctx->next_cpu;
Ming Lei7bed4592018-01-18 00:41:51 +08001777
Christoph Hellwigb657d7e2014-11-24 09:27:23 +01001778 if (hctx->queue->nr_hw_queues == 1)
1779 return WORK_CPU_UNBOUND;
Jens Axboe506e9312014-05-07 10:26:44 -06001780
1781 if (--hctx->next_cpu_batch <= 0) {
Ming Lei7bed4592018-01-18 00:41:51 +08001782select_cpu:
Ming Lei476f8c92018-04-08 17:48:09 +08001783 next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
Christoph Hellwig20e4d8132018-01-12 10:53:06 +08001784 cpu_online_mask);
Jens Axboe506e9312014-05-07 10:26:44 -06001785 if (next_cpu >= nr_cpu_ids)
Ming Leif82ddf12018-04-08 17:48:10 +08001786 next_cpu = blk_mq_first_mapped_cpu(hctx);
Jens Axboe506e9312014-05-07 10:26:44 -06001787 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
1788 }
1789
Ming Lei7bed4592018-01-18 00:41:51 +08001790 /*
1791 * Do unbound schedule if we can't find a online CPU for this hctx,
1792 * and it should only happen in the path of handling CPU DEAD.
1793 */
Ming Lei476f8c92018-04-08 17:48:09 +08001794 if (!cpu_online(next_cpu)) {
Ming Lei7bed4592018-01-18 00:41:51 +08001795 if (!tried) {
1796 tried = true;
1797 goto select_cpu;
1798 }
1799
1800 /*
1801 * Make sure to re-select CPU next time once after CPUs
1802 * in hctx->cpumask become online again.
1803 */
Ming Lei476f8c92018-04-08 17:48:09 +08001804 hctx->next_cpu = next_cpu;
Ming Lei7bed4592018-01-18 00:41:51 +08001805 hctx->next_cpu_batch = 1;
1806 return WORK_CPU_UNBOUND;
1807 }
Ming Lei476f8c92018-04-08 17:48:09 +08001808
1809 hctx->next_cpu = next_cpu;
1810 return next_cpu;
Jens Axboe506e9312014-05-07 10:26:44 -06001811}
1812
André Almeida105663f2020-01-06 15:08:18 -03001813/**
1814 * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
1815 * @hctx: Pointer to the hardware queue to run.
1816 * @async: If we want to run the queue asynchronously.
Minwoo Imfa94ba82020-12-05 00:20:55 +09001817 * @msecs: Milliseconds of delay to wait before running the queue.
André Almeida105663f2020-01-06 15:08:18 -03001818 *
1819 * If !@async, try to run the queue now. Else, run the queue asynchronously and
1820 * with a delay of @msecs.
1821 */
Bart Van Assche7587a5a2017-04-07 11:16:52 -07001822static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
1823 unsigned long msecs)
Jens Axboe320ae512013-10-24 09:20:05 +01001824{
Bart Van Assche5435c022017-06-20 11:15:49 -07001825 if (unlikely(blk_mq_hctx_stopped(hctx)))
Jens Axboe320ae512013-10-24 09:20:05 +01001826 return;
1827
Jens Axboe1b792f22016-09-21 10:12:13 -06001828 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
Paolo Bonzini2a90d4a2014-11-07 23:04:00 +01001829 int cpu = get_cpu();
1830 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
Paolo Bonzini398205b2014-11-07 23:03:59 +01001831 __blk_mq_run_hw_queue(hctx);
Paolo Bonzini2a90d4a2014-11-07 23:04:00 +01001832 put_cpu();
Paolo Bonzini398205b2014-11-07 23:03:59 +01001833 return;
1834 }
Jens Axboee4043dc2014-04-09 10:18:23 -06001835
Paolo Bonzini2a90d4a2014-11-07 23:04:00 +01001836 put_cpu();
Jens Axboee4043dc2014-04-09 10:18:23 -06001837 }
Paolo Bonzini398205b2014-11-07 23:03:59 +01001838
Bart Van Asscheae943d22018-01-19 08:58:55 -08001839 kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
1840 msecs_to_jiffies(msecs));
Bart Van Assche7587a5a2017-04-07 11:16:52 -07001841}
1842
André Almeida105663f2020-01-06 15:08:18 -03001843/**
1844 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
1845 * @hctx: Pointer to the hardware queue to run.
Minwoo Imfa94ba82020-12-05 00:20:55 +09001846 * @msecs: Milliseconds of delay to wait before running the queue.
André Almeida105663f2020-01-06 15:08:18 -03001847 *
1848 * Run a hardware queue asynchronously with a delay of @msecs.
1849 */
Bart Van Assche7587a5a2017-04-07 11:16:52 -07001850void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
1851{
1852 __blk_mq_delay_run_hw_queue(hctx, true, msecs);
1853}
1854EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
1855
André Almeida105663f2020-01-06 15:08:18 -03001856/**
1857 * blk_mq_run_hw_queue - Start to run a hardware queue.
1858 * @hctx: Pointer to the hardware queue to run.
1859 * @async: If we want to run the queue asynchronously.
1860 *
1861 * Check if the request queue is not in a quiesced state and if there are
1862 * pending requests to be sent. If this is true, run the queue to send requests
1863 * to hardware.
1864 */
John Garry626fb732019-10-30 00:59:30 +08001865void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
Bart Van Assche7587a5a2017-04-07 11:16:52 -07001866{
Ming Lei24f5a902018-01-06 16:27:38 +08001867 int srcu_idx;
1868 bool need_run;
1869
1870 /*
1871 * When queue is quiesced, we may be switching io scheduler, or
1872 * updating nr_hw_queues, or other things, and we can't run queue
1873 * any more, even __blk_mq_hctx_has_pending() can't be called safely.
1874 *
1875 * And queue will be rerun in blk_mq_unquiesce_queue() if it is
1876 * quiesced.
1877 */
Jens Axboe04ced152018-01-09 08:29:46 -08001878 hctx_lock(hctx, &srcu_idx);
1879 need_run = !blk_queue_quiesced(hctx->queue) &&
1880 blk_mq_hctx_has_pending(hctx);
1881 hctx_unlock(hctx, srcu_idx);
Ming Lei24f5a902018-01-06 16:27:38 +08001882
John Garry626fb732019-10-30 00:59:30 +08001883 if (need_run)
Jens Axboe79f720a2017-11-10 09:13:21 -07001884 __blk_mq_delay_run_hw_queue(hctx, async, 0);
Jens Axboe320ae512013-10-24 09:20:05 +01001885}
Omar Sandoval5b727272017-04-14 01:00:00 -07001886EXPORT_SYMBOL(blk_mq_run_hw_queue);
Jens Axboe320ae512013-10-24 09:20:05 +01001887
Jan Karab6e68ee2021-01-11 17:47:17 +01001888/*
1889 * Is the request queue handled by an IO scheduler that does not respect
1890 * hardware queues when dispatching?
1891 */
1892static bool blk_mq_has_sqsched(struct request_queue *q)
1893{
1894 struct elevator_queue *e = q->elevator;
1895
1896 if (e && e->type->ops.dispatch_request &&
1897 !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
1898 return true;
1899 return false;
1900}
1901
1902/*
1903 * Return prefered queue to dispatch from (if any) for non-mq aware IO
1904 * scheduler.
1905 */
1906static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
1907{
1908 struct blk_mq_hw_ctx *hctx;
1909
1910 /*
1911 * If the IO scheduler does not respect hardware queues when
1912 * dispatching, we just don't bother with multiple HW queues and
1913 * dispatch from hctx for the current CPU since running multiple queues
1914 * just causes lock contention inside the scheduler and pointless cache
1915 * bouncing.
1916 */
1917 hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
1918 raw_smp_processor_id());
1919 if (!blk_mq_hctx_stopped(hctx))
1920 return hctx;
1921 return NULL;
1922}
1923
André Almeida105663f2020-01-06 15:08:18 -03001924/**
Mauro Carvalho Chehab24f7bb82020-10-23 18:32:54 +02001925 * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
André Almeida105663f2020-01-06 15:08:18 -03001926 * @q: Pointer to the request queue to run.
1927 * @async: If we want to run the queue asynchronously.
1928 */
Mike Snitzerb94ec292015-03-11 23:56:38 -04001929void blk_mq_run_hw_queues(struct request_queue *q, bool async)
Jens Axboe320ae512013-10-24 09:20:05 +01001930{
Jan Karab6e68ee2021-01-11 17:47:17 +01001931 struct blk_mq_hw_ctx *hctx, *sq_hctx;
Jens Axboe320ae512013-10-24 09:20:05 +01001932 int i;
1933
Jan Karab6e68ee2021-01-11 17:47:17 +01001934 sq_hctx = NULL;
1935 if (blk_mq_has_sqsched(q))
1936 sq_hctx = blk_mq_get_sq_hctx(q);
Jens Axboe320ae512013-10-24 09:20:05 +01001937 queue_for_each_hw_ctx(q, hctx, i) {
Jens Axboe79f720a2017-11-10 09:13:21 -07001938 if (blk_mq_hctx_stopped(hctx))
Jens Axboe320ae512013-10-24 09:20:05 +01001939 continue;
Jan Karab6e68ee2021-01-11 17:47:17 +01001940 /*
1941 * Dispatch from this hctx either if there's no hctx preferred
1942 * by IO scheduler or if it has requests that bypass the
1943 * scheduler.
1944 */
1945 if (!sq_hctx || sq_hctx == hctx ||
1946 !list_empty_careful(&hctx->dispatch))
1947 blk_mq_run_hw_queue(hctx, async);
Jens Axboe320ae512013-10-24 09:20:05 +01001948 }
1949}
Mike Snitzerb94ec292015-03-11 23:56:38 -04001950EXPORT_SYMBOL(blk_mq_run_hw_queues);
Jens Axboe320ae512013-10-24 09:20:05 +01001951
Bart Van Asschefd001442016-10-28 17:19:37 -07001952/**
Douglas Andersonb9151e72020-04-20 09:24:52 -07001953 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
1954 * @q: Pointer to the request queue to run.
Minwoo Imfa94ba82020-12-05 00:20:55 +09001955 * @msecs: Milliseconds of delay to wait before running the queues.
Douglas Andersonb9151e72020-04-20 09:24:52 -07001956 */
1957void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
1958{
Jan Karab6e68ee2021-01-11 17:47:17 +01001959 struct blk_mq_hw_ctx *hctx, *sq_hctx;
Douglas Andersonb9151e72020-04-20 09:24:52 -07001960 int i;
1961
Jan Karab6e68ee2021-01-11 17:47:17 +01001962 sq_hctx = NULL;
1963 if (blk_mq_has_sqsched(q))
1964 sq_hctx = blk_mq_get_sq_hctx(q);
Douglas Andersonb9151e72020-04-20 09:24:52 -07001965 queue_for_each_hw_ctx(q, hctx, i) {
1966 if (blk_mq_hctx_stopped(hctx))
1967 continue;
Jan Karab6e68ee2021-01-11 17:47:17 +01001968 /*
1969 * Dispatch from this hctx either if there's no hctx preferred
1970 * by IO scheduler or if it has requests that bypass the
1971 * scheduler.
1972 */
1973 if (!sq_hctx || sq_hctx == hctx ||
1974 !list_empty_careful(&hctx->dispatch))
1975 blk_mq_delay_run_hw_queue(hctx, msecs);
Douglas Andersonb9151e72020-04-20 09:24:52 -07001976 }
1977}
1978EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
1979
1980/**
Bart Van Asschefd001442016-10-28 17:19:37 -07001981 * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
1982 * @q: request queue.
1983 *
1984 * The caller is responsible for serializing this function against
1985 * blk_mq_{start,stop}_hw_queue().
1986 */
1987bool blk_mq_queue_stopped(struct request_queue *q)
1988{
1989 struct blk_mq_hw_ctx *hctx;
1990 int i;
1991
1992 queue_for_each_hw_ctx(q, hctx, i)
1993 if (blk_mq_hctx_stopped(hctx))
1994 return true;
1995
1996 return false;
1997}
1998EXPORT_SYMBOL(blk_mq_queue_stopped);
1999
Ming Lei39a70c72017-06-06 23:22:09 +08002000/*
2001 * This function is often used for pausing .queue_rq() by driver when
2002 * there isn't enough resource or some conditions aren't satisfied, and
Bart Van Assche4d606212017-08-17 16:23:00 -07002003 * BLK_STS_RESOURCE is usually returned.
Ming Lei39a70c72017-06-06 23:22:09 +08002004 *
2005 * We do not guarantee that dispatch can be drained or blocked
2006 * after blk_mq_stop_hw_queue() returns. Please use
2007 * blk_mq_quiesce_queue() for that requirement.
2008 */
Jens Axboe320ae512013-10-24 09:20:05 +01002009void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
2010{
Ming Lei641a9ed2017-06-06 23:22:10 +08002011 cancel_delayed_work(&hctx->run_work);
2012
2013 set_bit(BLK_MQ_S_STOPPED, &hctx->state);
Jens Axboe320ae512013-10-24 09:20:05 +01002014}
2015EXPORT_SYMBOL(blk_mq_stop_hw_queue);
2016
Ming Lei39a70c72017-06-06 23:22:09 +08002017/*
2018 * This function is often used for pausing .queue_rq() by driver when
2019 * there isn't enough resource or some conditions aren't satisfied, and
Bart Van Assche4d606212017-08-17 16:23:00 -07002020 * BLK_STS_RESOURCE is usually returned.
Ming Lei39a70c72017-06-06 23:22:09 +08002021 *
2022 * We do not guarantee that dispatch can be drained or blocked
2023 * after blk_mq_stop_hw_queues() returns. Please use
2024 * blk_mq_quiesce_queue() for that requirement.
2025 */
Jens Axboe2719aa22017-05-03 11:08:14 -06002026void blk_mq_stop_hw_queues(struct request_queue *q)
2027{
Ming Lei641a9ed2017-06-06 23:22:10 +08002028 struct blk_mq_hw_ctx *hctx;
2029 int i;
2030
2031 queue_for_each_hw_ctx(q, hctx, i)
2032 blk_mq_stop_hw_queue(hctx);
Christoph Hellwig280d45f2013-10-25 14:45:58 +01002033}
2034EXPORT_SYMBOL(blk_mq_stop_hw_queues);
2035
Jens Axboe320ae512013-10-24 09:20:05 +01002036void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
2037{
2038 clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
Jens Axboee4043dc2014-04-09 10:18:23 -06002039
Jens Axboe0ffbce82014-06-25 08:22:34 -06002040 blk_mq_run_hw_queue(hctx, false);
Jens Axboe320ae512013-10-24 09:20:05 +01002041}
2042EXPORT_SYMBOL(blk_mq_start_hw_queue);
2043
Christoph Hellwig2f268552014-04-16 09:44:56 +02002044void blk_mq_start_hw_queues(struct request_queue *q)
2045{
2046 struct blk_mq_hw_ctx *hctx;
2047 int i;
2048
2049 queue_for_each_hw_ctx(q, hctx, i)
2050 blk_mq_start_hw_queue(hctx);
2051}
2052EXPORT_SYMBOL(blk_mq_start_hw_queues);
2053
Jens Axboeae911c52016-12-08 13:19:30 -07002054void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
2055{
2056 if (!blk_mq_hctx_stopped(hctx))
2057 return;
2058
2059 clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
2060 blk_mq_run_hw_queue(hctx, async);
2061}
2062EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
2063
Christoph Hellwig1b4a3252014-04-16 09:44:54 +02002064void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
Jens Axboe320ae512013-10-24 09:20:05 +01002065{
2066 struct blk_mq_hw_ctx *hctx;
2067 int i;
2068
Jens Axboeae911c52016-12-08 13:19:30 -07002069 queue_for_each_hw_ctx(q, hctx, i)
2070 blk_mq_start_stopped_hw_queue(hctx, async);
Jens Axboe320ae512013-10-24 09:20:05 +01002071}
2072EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
2073
Christoph Hellwig70f4db62014-04-16 10:48:08 -06002074static void blk_mq_run_work_fn(struct work_struct *work)
Jens Axboe320ae512013-10-24 09:20:05 +01002075{
2076 struct blk_mq_hw_ctx *hctx;
2077
Jens Axboe9f993732017-04-10 09:54:54 -06002078 hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
Jens Axboe21c6e932017-04-10 09:54:56 -06002079
2080 /*
Ming Lei15fe8a902018-04-08 17:48:11 +08002081 * If we are stopped, don't run the queue.
Jens Axboe21c6e932017-04-10 09:54:56 -06002082 */
Yufen Yu08410312020-10-08 23:26:30 -04002083 if (blk_mq_hctx_stopped(hctx))
Jianchao Wang0196d6b2018-06-04 17:03:55 +08002084 return;
Jens Axboee4043dc2014-04-09 10:18:23 -06002085
Jens Axboe320ae512013-10-24 09:20:05 +01002086 __blk_mq_run_hw_queue(hctx);
2087}
2088
Ming Leicfd0c552015-10-20 23:13:57 +08002089static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
Ming Leicfd0c552015-10-20 23:13:57 +08002090 struct request *rq,
2091 bool at_head)
Jens Axboe320ae512013-10-24 09:20:05 +01002092{
Jens Axboee57690f2016-08-24 15:34:35 -06002093 struct blk_mq_ctx *ctx = rq->mq_ctx;
Ming Leic16d6b52018-12-17 08:44:05 -07002094 enum hctx_type type = hctx->type;
Jens Axboee57690f2016-08-24 15:34:35 -06002095
Bart Van Assche7b607812017-06-20 11:15:47 -07002096 lockdep_assert_held(&ctx->lock);
2097
Christoph Hellwiga54895f2020-12-03 17:21:39 +01002098 trace_block_rq_insert(rq);
Jens Axboe01b983c2013-11-19 18:59:10 -07002099
Christoph Hellwig72a0a362014-02-07 10:22:36 -08002100 if (at_head)
Ming Leic16d6b52018-12-17 08:44:05 -07002101 list_add(&rq->queuelist, &ctx->rq_lists[type]);
Christoph Hellwig72a0a362014-02-07 10:22:36 -08002102 else
Ming Leic16d6b52018-12-17 08:44:05 -07002103 list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
Ming Leicfd0c552015-10-20 23:13:57 +08002104}
Jens Axboe4bb659b2014-05-09 09:36:49 -06002105
Jens Axboe2c3ad662016-12-14 14:34:47 -07002106void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
2107 bool at_head)
Ming Leicfd0c552015-10-20 23:13:57 +08002108{
2109 struct blk_mq_ctx *ctx = rq->mq_ctx;
2110
Bart Van Assche7b607812017-06-20 11:15:47 -07002111 lockdep_assert_held(&ctx->lock);
2112
Jens Axboee57690f2016-08-24 15:34:35 -06002113 __blk_mq_insert_req_list(hctx, rq, at_head);
Jens Axboe320ae512013-10-24 09:20:05 +01002114 blk_mq_hctx_mark_pending(hctx, ctx);
Jens Axboe320ae512013-10-24 09:20:05 +01002115}
2116
André Almeida105663f2020-01-06 15:08:18 -03002117/**
2118 * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2119 * @rq: Pointer to request to be inserted.
Randy Dunlap26bfeb22020-08-16 16:39:34 -07002120 * @at_head: true if the request should be inserted at the head of the list.
André Almeida105663f2020-01-06 15:08:18 -03002121 * @run_queue: If we should run the hardware queue after inserting the request.
2122 *
Jens Axboe157f3772017-09-11 16:43:57 -06002123 * Should only be used carefully, when the caller knows we want to
2124 * bypass a potential IO scheduler on the target device.
2125 */
Ming Lei01e99ae2020-02-25 09:04:32 +08002126void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
2127 bool run_queue)
Jens Axboe157f3772017-09-11 16:43:57 -06002128{
Jens Axboeea4f9952018-10-29 15:06:13 -06002129 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
Jens Axboe157f3772017-09-11 16:43:57 -06002130
2131 spin_lock(&hctx->lock);
Ming Lei01e99ae2020-02-25 09:04:32 +08002132 if (at_head)
2133 list_add(&rq->queuelist, &hctx->dispatch);
2134 else
2135 list_add_tail(&rq->queuelist, &hctx->dispatch);
Jens Axboe157f3772017-09-11 16:43:57 -06002136 spin_unlock(&hctx->lock);
2137
Ming Leib0850292017-11-02 23:24:34 +08002138 if (run_queue)
2139 blk_mq_run_hw_queue(hctx, false);
Jens Axboe157f3772017-09-11 16:43:57 -06002140}
2141
Jens Axboebd166ef2017-01-17 06:03:22 -07002142void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
2143 struct list_head *list)
Jens Axboe320ae512013-10-24 09:20:05 +01002144
2145{
Ming Lei3f0cedc2018-07-02 17:35:58 +08002146 struct request *rq;
Ming Leic16d6b52018-12-17 08:44:05 -07002147 enum hctx_type type = hctx->type;
Ming Lei3f0cedc2018-07-02 17:35:58 +08002148
Jens Axboe320ae512013-10-24 09:20:05 +01002149 /*
2150 * preemption doesn't flush plug list, so it's possible ctx->cpu is
2151 * offline now
2152 */
Ming Lei3f0cedc2018-07-02 17:35:58 +08002153 list_for_each_entry(rq, list, queuelist) {
Jens Axboee57690f2016-08-24 15:34:35 -06002154 BUG_ON(rq->mq_ctx != ctx);
Christoph Hellwiga54895f2020-12-03 17:21:39 +01002155 trace_block_rq_insert(rq);
Jens Axboe320ae512013-10-24 09:20:05 +01002156 }
Ming Lei3f0cedc2018-07-02 17:35:58 +08002157
2158 spin_lock(&ctx->lock);
Ming Leic16d6b52018-12-17 08:44:05 -07002159 list_splice_tail_init(list, &ctx->rq_lists[type]);
Ming Leicfd0c552015-10-20 23:13:57 +08002160 blk_mq_hctx_mark_pending(hctx, ctx);
Jens Axboe320ae512013-10-24 09:20:05 +01002161 spin_unlock(&ctx->lock);
Jens Axboe320ae512013-10-24 09:20:05 +01002162}
2163
Sami Tolvanen4f0f5862021-04-08 11:28:34 -07002164static int plug_rq_cmp(void *priv, const struct list_head *a,
2165 const struct list_head *b)
Jens Axboe320ae512013-10-24 09:20:05 +01002166{
2167 struct request *rqa = container_of(a, struct request, queuelist);
2168 struct request *rqb = container_of(b, struct request, queuelist);
2169
Pavel Begunkov7d30a622019-11-29 00:11:53 +03002170 if (rqa->mq_ctx != rqb->mq_ctx)
2171 return rqa->mq_ctx > rqb->mq_ctx;
2172 if (rqa->mq_hctx != rqb->mq_hctx)
2173 return rqa->mq_hctx > rqb->mq_hctx;
Jens Axboe3110fc72018-10-30 12:24:04 -06002174
2175 return blk_rq_pos(rqa) > blk_rq_pos(rqb);
Jens Axboe320ae512013-10-24 09:20:05 +01002176}
2177
2178void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2179{
Jens Axboe320ae512013-10-24 09:20:05 +01002180 LIST_HEAD(list);
Jens Axboe320ae512013-10-24 09:20:05 +01002181
Pavel Begunkov95ed0c52019-11-29 00:11:55 +03002182 if (list_empty(&plug->mq_list))
2183 return;
Jens Axboe320ae512013-10-24 09:20:05 +01002184 list_splice_init(&plug->mq_list, &list);
2185
Jens Axboece5b0092018-11-27 17:13:56 -07002186 if (plug->rq_count > 2 && plug->multiple_queues)
2187 list_sort(NULL, &list, plug_rq_cmp);
Jens Axboe320ae512013-10-24 09:20:05 +01002188
Dongli Zhangbcc816d2019-04-04 10:57:44 +08002189 plug->rq_count = 0;
2190
Pavel Begunkov95ed0c52019-11-29 00:11:55 +03002191 do {
2192 struct list_head rq_list;
2193 struct request *rq, *head_rq = list_entry_rq(list.next);
2194 struct list_head *pos = &head_rq->queuelist; /* skip first */
2195 struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
2196 struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
2197 unsigned int depth = 1;
Jens Axboe320ae512013-10-24 09:20:05 +01002198
Pavel Begunkov95ed0c52019-11-29 00:11:55 +03002199 list_for_each_continue(pos, &list) {
2200 rq = list_entry_rq(pos);
2201 BUG_ON(!rq->q);
2202 if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
2203 break;
2204 depth++;
Jens Axboe320ae512013-10-24 09:20:05 +01002205 }
2206
Pavel Begunkov95ed0c52019-11-29 00:11:55 +03002207 list_cut_before(&rq_list, &list, pos);
2208 trace_block_unplug(head_rq->q, depth, !from_schedule);
Jens Axboe67cae4c2018-10-30 11:31:51 -06002209 blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
Jens Axboebd166ef2017-01-17 06:03:22 -07002210 from_schedule);
Pavel Begunkov95ed0c52019-11-29 00:11:55 +03002211 } while(!list_empty(&list));
Jens Axboe320ae512013-10-24 09:20:05 +01002212}
2213
Christoph Hellwig14ccb662019-06-06 12:29:01 +02002214static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
2215 unsigned int nr_segs)
Jens Axboe320ae512013-10-24 09:20:05 +01002216{
Eric Biggers93f221a2020-09-15 20:53:14 -07002217 int err;
2218
Christoph Hellwigf924cdd2019-06-06 12:29:00 +02002219 if (bio->bi_opf & REQ_RAHEAD)
2220 rq->cmd_flags |= REQ_FAILFAST_MASK;
2221
2222 rq->__sector = bio->bi_iter.bi_sector;
2223 rq->write_hint = bio->bi_write_hint;
Christoph Hellwig14ccb662019-06-06 12:29:01 +02002224 blk_rq_bio_prep(rq, bio, nr_segs);
Eric Biggers93f221a2020-09-15 20:53:14 -07002225
2226 /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
2227 err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
2228 WARN_ON_ONCE(err);
Jens Axboe4b570522014-05-29 11:00:11 -06002229
Konstantin Khlebnikovb5af37a2020-05-27 07:24:16 +02002230 blk_account_io_start(rq);
Jens Axboe320ae512013-10-24 09:20:05 +01002231}
2232
Mike Snitzer0f955492018-01-17 11:25:56 -05002233static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
Christoph Hellwig3e087732021-10-12 13:12:24 +02002234 struct request *rq, bool last)
Shaohua Lif984df12015-05-08 10:51:32 -07002235{
Shaohua Lif984df12015-05-08 10:51:32 -07002236 struct request_queue *q = rq->q;
Shaohua Lif984df12015-05-08 10:51:32 -07002237 struct blk_mq_queue_data bd = {
2238 .rq = rq,
Jens Axboebe94f052018-11-24 10:15:46 -07002239 .last = last,
Shaohua Lif984df12015-05-08 10:51:32 -07002240 };
Jens Axboef06345a2017-06-12 11:22:46 -06002241 blk_status_t ret;
Mike Snitzer0f955492018-01-17 11:25:56 -05002242
Mike Snitzer0f955492018-01-17 11:25:56 -05002243 /*
2244 * For OK queue, we are done. For error, caller may kill it.
2245 * Any other error (busy), just add it to our list as we
2246 * previously would have done.
2247 */
2248 ret = q->mq_ops->queue_rq(hctx, &bd);
2249 switch (ret) {
2250 case BLK_STS_OK:
Ming Lei6ce3dd62018-07-10 09:03:31 +08002251 blk_mq_update_dispatch_busy(hctx, false);
Mike Snitzer0f955492018-01-17 11:25:56 -05002252 break;
2253 case BLK_STS_RESOURCE:
Ming Lei86ff7c22018-01-30 22:04:57 -05002254 case BLK_STS_DEV_RESOURCE:
Ming Lei6ce3dd62018-07-10 09:03:31 +08002255 blk_mq_update_dispatch_busy(hctx, true);
Mike Snitzer0f955492018-01-17 11:25:56 -05002256 __blk_mq_requeue_request(rq);
2257 break;
2258 default:
Ming Lei6ce3dd62018-07-10 09:03:31 +08002259 blk_mq_update_dispatch_busy(hctx, false);
Mike Snitzer0f955492018-01-17 11:25:56 -05002260 break;
2261 }
2262
2263 return ret;
2264}
2265
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002266static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
Mike Snitzer0f955492018-01-17 11:25:56 -05002267 struct request *rq,
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002268 bool bypass_insert, bool last)
Mike Snitzer0f955492018-01-17 11:25:56 -05002269{
2270 struct request_queue *q = rq->q;
Ming Leid964f042017-06-06 23:22:00 +08002271 bool run_queue = true;
Ming Lei2a5a24a2021-01-22 10:33:12 +08002272 int budget_token;
Ming Leid964f042017-06-06 23:22:00 +08002273
Ming Lei23d4ee12018-01-18 12:06:59 +08002274 /*
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002275 * RCU or SRCU read lock is needed before checking quiesced flag.
Ming Lei23d4ee12018-01-18 12:06:59 +08002276 *
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002277 * When queue is stopped or quiesced, ignore 'bypass_insert' from
2278 * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
2279 * and avoid driver to try to dispatch again.
Ming Lei23d4ee12018-01-18 12:06:59 +08002280 */
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002281 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
Ming Leid964f042017-06-06 23:22:00 +08002282 run_queue = false;
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002283 bypass_insert = false;
2284 goto insert;
Ming Leid964f042017-06-06 23:22:00 +08002285 }
Shaohua Lif984df12015-05-08 10:51:32 -07002286
Jens Axboe2ff06822021-10-15 09:44:38 -06002287 if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002288 goto insert;
Bart Van Assche2253efc2016-10-28 17:20:02 -07002289
Ming Lei2a5a24a2021-01-22 10:33:12 +08002290 budget_token = blk_mq_get_dispatch_budget(q);
2291 if (budget_token < 0)
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002292 goto insert;
Jens Axboebd166ef2017-01-17 06:03:22 -07002293
Ming Lei2a5a24a2021-01-22 10:33:12 +08002294 blk_mq_set_rq_budget_token(rq, budget_token);
2295
Ming Lei8ab6bb9e2018-06-25 19:31:45 +08002296 if (!blk_mq_get_driver_tag(rq)) {
Ming Lei2a5a24a2021-01-22 10:33:12 +08002297 blk_mq_put_dispatch_budget(q, budget_token);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002298 goto insert;
Ming Lei88022d72017-11-05 02:21:12 +08002299 }
Ming Leide148292017-10-14 17:22:29 +08002300
Christoph Hellwig3e087732021-10-12 13:12:24 +02002301 return __blk_mq_issue_directly(hctx, rq, last);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002302insert:
2303 if (bypass_insert)
2304 return BLK_STS_RESOURCE;
2305
Ming Leidb03f882020-08-18 17:07:28 +08002306 blk_mq_sched_insert_request(rq, false, run_queue, false);
2307
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002308 return BLK_STS_OK;
2309}
2310
André Almeida105663f2020-01-06 15:08:18 -03002311/**
2312 * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2313 * @hctx: Pointer of the associated hardware queue.
2314 * @rq: Pointer to request to be sent.
André Almeida105663f2020-01-06 15:08:18 -03002315 *
2316 * If the device has enough resources to accept a new request now, send the
2317 * request directly to device driver. Else, insert at hctx->dispatch queue, so
2318 * we can try send it another time in the future. Requests inserted at this
2319 * queue have higher priority.
2320 */
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002321static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
Christoph Hellwig3e087732021-10-12 13:12:24 +02002322 struct request *rq)
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002323{
2324 blk_status_t ret;
2325 int srcu_idx;
2326
2327 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
2328
2329 hctx_lock(hctx, &srcu_idx);
2330
Christoph Hellwig3e087732021-10-12 13:12:24 +02002331 ret = __blk_mq_try_issue_directly(hctx, rq, false, true);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002332 if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
Ming Lei01e99ae2020-02-25 09:04:32 +08002333 blk_mq_request_bypass_insert(rq, false, true);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002334 else if (ret != BLK_STS_OK)
2335 blk_mq_end_request(rq, ret);
2336
Jens Axboe04ced152018-01-09 08:29:46 -08002337 hctx_unlock(hctx, srcu_idx);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002338}
2339
2340blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
2341{
2342 blk_status_t ret;
2343 int srcu_idx;
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002344 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
2345
2346 hctx_lock(hctx, &srcu_idx);
Christoph Hellwig3e087732021-10-12 13:12:24 +02002347 ret = __blk_mq_try_issue_directly(hctx, rq, true, last);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002348 hctx_unlock(hctx, srcu_idx);
Jianchao Wang7f556a42018-12-14 09:28:18 +08002349
2350 return ret;
Christoph Hellwig5eb61262017-03-22 15:01:51 -04002351}
2352
Ming Lei6ce3dd62018-07-10 09:03:31 +08002353void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
2354 struct list_head *list)
2355{
Keith Busch536167d42020-04-07 03:13:48 +09002356 int queued = 0;
yangerkun632bfb62020-09-05 19:25:56 +08002357 int errors = 0;
Keith Busch536167d42020-04-07 03:13:48 +09002358
Ming Lei6ce3dd62018-07-10 09:03:31 +08002359 while (!list_empty(list)) {
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002360 blk_status_t ret;
Ming Lei6ce3dd62018-07-10 09:03:31 +08002361 struct request *rq = list_first_entry(list, struct request,
2362 queuelist);
2363
2364 list_del_init(&rq->queuelist);
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002365 ret = blk_mq_request_issue_directly(rq, list_empty(list));
2366 if (ret != BLK_STS_OK) {
2367 if (ret == BLK_STS_RESOURCE ||
2368 ret == BLK_STS_DEV_RESOURCE) {
Ming Lei01e99ae2020-02-25 09:04:32 +08002369 blk_mq_request_bypass_insert(rq, false,
Jens Axboec616cbe2018-12-06 22:17:44 -07002370 list_empty(list));
Bart Van Asschefd9c40f2019-04-04 10:08:43 -07002371 break;
2372 }
2373 blk_mq_end_request(rq, ret);
yangerkun632bfb62020-09-05 19:25:56 +08002374 errors++;
Keith Busch536167d42020-04-07 03:13:48 +09002375 } else
2376 queued++;
Ming Lei6ce3dd62018-07-10 09:03:31 +08002377 }
Jens Axboed666ba92018-11-27 17:02:25 -07002378
2379 /*
2380 * If we didn't flush the entire list, we could have told
2381 * the driver there was more coming, but that turned out to
2382 * be a lie.
2383 */
yangerkun632bfb62020-09-05 19:25:56 +08002384 if ((!list_empty(list) || errors) &&
2385 hctx->queue->mq_ops->commit_rqs && queued)
Jens Axboed666ba92018-11-27 17:02:25 -07002386 hctx->queue->mq_ops->commit_rqs(hctx);
Ming Lei6ce3dd62018-07-10 09:03:31 +08002387}
2388
Jens Axboece5b0092018-11-27 17:13:56 -07002389static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
2390{
2391 list_add_tail(&rq->queuelist, &plug->mq_list);
2392 plug->rq_count++;
2393 if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
2394 struct request *tmp;
2395
2396 tmp = list_first_entry(&plug->mq_list, struct request,
2397 queuelist);
2398 if (tmp->q != rq->q)
2399 plug->multiple_queues = true;
2400 }
2401}
2402
Song Liu7f2a6a62021-09-07 16:03:38 -07002403/*
Jens Axboeba0ffdd2021-10-06 12:01:07 -06002404 * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
Song Liu7f2a6a62021-09-07 16:03:38 -07002405 * queues. This is important for md arrays to benefit from merging
2406 * requests.
2407 */
2408static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
2409{
2410 if (plug->multiple_queues)
Jens Axboeba0ffdd2021-10-06 12:01:07 -06002411 return BLK_MAX_REQUEST_COUNT * 2;
Song Liu7f2a6a62021-09-07 16:03:38 -07002412 return BLK_MAX_REQUEST_COUNT;
2413}
2414
André Almeida105663f2020-01-06 15:08:18 -03002415/**
Christoph Hellwigc62b37d2020-07-01 10:59:43 +02002416 * blk_mq_submit_bio - Create and send a request to block device.
André Almeida105663f2020-01-06 15:08:18 -03002417 * @bio: Bio pointer.
2418 *
2419 * Builds up a request structure from @q and @bio and send to the device. The
2420 * request may not be queued directly to hardware if:
2421 * * This request can be merged with another one
2422 * * We want to place request at plug queue for possible future merging
2423 * * There is an IO scheduler active at this queue
2424 *
2425 * It will not queue the request if there is an error with the bio, or at the
2426 * request creation.
André Almeida105663f2020-01-06 15:08:18 -03002427 */
Christoph Hellwig3e087732021-10-12 13:12:24 +02002428void blk_mq_submit_bio(struct bio *bio)
Jens Axboe07068d52014-05-22 10:40:51 -06002429{
Pavel Begunkoved6cdde2021-10-14 15:03:30 +01002430 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002431 const int is_sync = op_is_sync(bio->bi_opf);
Christoph Hellwigf73f44e2017-01-27 08:30:47 -07002432 const int is_flush_fua = op_is_flush(bio->bi_opf);
Jens Axboe07068d52014-05-22 10:40:51 -06002433 struct request *rq;
Shaohua Lif984df12015-05-08 10:51:32 -07002434 struct blk_plug *plug;
Shaohua Li5b3f3412015-05-08 10:51:33 -07002435 struct request *same_queue_rq = NULL;
Jens Axboeabd45c12021-10-13 12:43:41 -06002436 unsigned int nr_segs = 1;
Satya Tangiralaa892c8d2020-05-14 00:37:18 +00002437 blk_status_t ret;
Jens Axboe07068d52014-05-22 10:40:51 -06002438
2439 blk_queue_bounce(q, &bio);
Jens Axboeabd45c12021-10-13 12:43:41 -06002440 if (blk_may_split(q, bio))
2441 __blk_queue_split(q, &bio, &nr_segs);
Wen Xiongf36ea502017-05-10 08:54:11 -05002442
Dmitry Monakhove23947b2017-06-29 11:31:11 -07002443 if (!bio_integrity_prep(bio))
Christoph Hellwigac7c5672020-05-16 20:28:01 +02002444 goto queue_exit;
Jens Axboe07068d52014-05-22 10:40:51 -06002445
Omar Sandoval87c279e2016-06-01 22:18:48 -07002446 if (!is_flush_fua && !blk_queue_nomerges(q) &&
Christoph Hellwig14ccb662019-06-06 12:29:01 +02002447 blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
Christoph Hellwigac7c5672020-05-16 20:28:01 +02002448 goto queue_exit;
Shaohua Lif984df12015-05-08 10:51:32 -07002449
Christoph Hellwig14ccb662019-06-06 12:29:01 +02002450 if (blk_mq_sched_bio_merge(q, bio, nr_segs))
Christoph Hellwigac7c5672020-05-16 20:28:01 +02002451 goto queue_exit;
Jens Axboebd166ef2017-01-17 06:03:22 -07002452
Christoph Hellwigd5337562018-11-14 17:02:09 +01002453 rq_qos_throttle(q, bio);
Jens Axboe87760e52016-11-09 12:38:14 -07002454
Jens Axboe47c122e2021-10-06 06:34:11 -06002455 plug = blk_mq_plug(q, bio);
2456 if (plug && plug->cached_rq) {
Jens Axboe013a7f92021-10-13 07:58:52 -06002457 rq = rq_list_pop(&plug->cached_rq);
Jens Axboe47c122e2021-10-06 06:34:11 -06002458 INIT_LIST_HEAD(&rq->queuelist);
Jens Axboe47c122e2021-10-06 06:34:11 -06002459 } else {
Christoph Hellwig0f38d762021-10-12 12:40:45 +02002460 struct blk_mq_alloc_data data = {
2461 .q = q,
2462 .nr_tags = 1,
2463 .cmd_flags = bio->bi_opf,
2464 };
2465
Jens Axboe47c122e2021-10-06 06:34:11 -06002466 if (plug) {
2467 data.nr_tags = plug->nr_ios;
2468 plug->nr_ios = 1;
2469 data.cached_rq = &plug->cached_rq;
2470 }
Christoph Hellwigb90cfae2021-10-12 12:40:44 +02002471 rq = __blk_mq_alloc_requests(&data);
Jens Axboe47c122e2021-10-06 06:34:11 -06002472 if (unlikely(!rq)) {
2473 rq_qos_cleanup(q, bio);
2474 if (bio->bi_opf & REQ_NOWAIT)
2475 bio_wouldblock_error(bio);
2476 goto queue_exit;
2477 }
Jens Axboe87760e52016-11-09 12:38:14 -07002478 }
2479
Christoph Hellwige8a676d2020-12-03 17:21:36 +01002480 trace_block_getrq(bio);
Xiaoguang Wangd6f1dda2018-10-23 22:30:50 +08002481
Josef Bacikc1c80382018-07-03 11:14:59 -04002482 rq_qos_track(q, rq, bio);
Jens Axboe07068d52014-05-22 10:40:51 -06002483
Bart Van Assche970d1682019-07-01 08:47:30 -07002484 blk_mq_bio_to_request(rq, bio, nr_segs);
2485
Satya Tangiralaa892c8d2020-05-14 00:37:18 +00002486 ret = blk_crypto_init_request(rq);
2487 if (ret != BLK_STS_OK) {
2488 bio->bi_status = ret;
2489 bio_endio(bio);
2490 blk_mq_free_request(rq);
Christoph Hellwig3e087732021-10-12 13:12:24 +02002491 return;
Satya Tangiralaa892c8d2020-05-14 00:37:18 +00002492 }
2493
Christoph Hellwiga4d907b2017-03-22 15:01:53 -04002494 if (unlikely(is_flush_fua)) {
Jens Axboe4a60f362021-10-16 07:34:49 -06002495 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
André Almeida105663f2020-01-06 15:08:18 -03002496 /* Bypass scheduler for flush requests */
Ming Lei923218f2017-11-02 23:24:38 +08002497 blk_insert_flush(rq);
Jens Axboe4a60f362021-10-16 07:34:49 -06002498 blk_mq_run_hw_queue(hctx, true);
Ming Lei03f26d82021-05-14 10:20:52 +08002499 } else if (plug && (q->nr_hw_queues == 1 ||
John Garry079a2e32021-10-05 18:23:39 +08002500 blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
Ming Lei03f26d82021-05-14 10:20:52 +08002501 q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
Jens Axboeb2c5d162018-11-29 10:03:42 -07002502 /*
2503 * Use plugging if we have a ->commit_rqs() hook as well, as
2504 * we know the driver uses bd->last in a smart fashion.
Ming Lei3154df22019-09-27 15:24:31 +08002505 *
2506 * Use normal plugging if this disk is slow HDD, as sequential
2507 * IO may benefit a lot from plug merging.
Jens Axboeb2c5d162018-11-29 10:03:42 -07002508 */
Jens Axboe5f0ed772018-11-23 22:04:33 -07002509 unsigned int request_count = plug->rq_count;
Shaohua Li600271d2016-11-03 17:03:54 -07002510 struct request *last = NULL;
2511
Ming Lei676d0602015-10-20 23:13:56 +08002512 if (!request_count)
Jeff Moyere6c44382015-05-08 10:51:30 -07002513 trace_block_plug(q);
Shaohua Li600271d2016-11-03 17:03:54 -07002514 else
2515 last = list_entry_rq(plug->mq_list.prev);
Jens Axboeb094f892015-11-20 20:29:45 -07002516
Song Liu7f2a6a62021-09-07 16:03:38 -07002517 if (request_count >= blk_plug_max_rq_count(plug) || (last &&
Shaohua Li600271d2016-11-03 17:03:54 -07002518 blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
Jeff Moyere6c44382015-05-08 10:51:30 -07002519 blk_flush_plug_list(plug, false);
2520 trace_block_plug(q);
Jens Axboe320ae512013-10-24 09:20:05 +01002521 }
Jens Axboeb094f892015-11-20 20:29:45 -07002522
Jens Axboece5b0092018-11-27 17:13:56 -07002523 blk_add_rq_to_plug(plug, rq);
Jens Axboe2ff06822021-10-15 09:44:38 -06002524 } else if (rq->rq_flags & RQF_ELV) {
André Almeida105663f2020-01-06 15:08:18 -03002525 /* Insert the request at the IO scheduler queue */
Ming Leia12de1d2019-09-27 15:24:30 +08002526 blk_mq_sched_insert_request(rq, false, true, true);
Christoph Hellwig22997222017-03-22 15:01:52 -04002527 } else if (plug && !blk_queue_nomerges(q)) {
Jens Axboe320ae512013-10-24 09:20:05 +01002528 /*
2529 * We do limited plugging. If the bio can be merged, do that.
2530 * Otherwise the existing request in the plug list will be
2531 * issued. So the plug list will have one request at most
Christoph Hellwig22997222017-03-22 15:01:52 -04002532 * The plug list might get flushed before this. If that happens,
2533 * the plug list is empty, and same_queue_rq is invalid.
Jens Axboe320ae512013-10-24 09:20:05 +01002534 */
Christoph Hellwig22997222017-03-22 15:01:52 -04002535 if (list_empty(&plug->mq_list))
2536 same_queue_rq = NULL;
Jens Axboe4711b572018-11-27 17:07:17 -07002537 if (same_queue_rq) {
Christoph Hellwig22997222017-03-22 15:01:52 -04002538 list_del_init(&same_queue_rq->queuelist);
Jens Axboe4711b572018-11-27 17:07:17 -07002539 plug->rq_count--;
2540 }
Jens Axboece5b0092018-11-27 17:13:56 -07002541 blk_add_rq_to_plug(plug, rq);
Yufen Yuff3b74b2019-03-26 21:19:25 +08002542 trace_block_plug(q);
Christoph Hellwig22997222017-03-22 15:01:52 -04002543
Ming Leidad7a3b2017-06-06 23:21:59 +08002544 if (same_queue_rq) {
Yufen Yuff3b74b2019-03-26 21:19:25 +08002545 trace_block_unplug(q, 1, true);
Christoph Hellwig0f38d762021-10-12 12:40:45 +02002546 blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
Christoph Hellwig3e087732021-10-12 13:12:24 +02002547 same_queue_rq);
Ming Leidad7a3b2017-06-06 23:21:59 +08002548 }
Ming Leia12de1d2019-09-27 15:24:30 +08002549 } else if ((q->nr_hw_queues > 1 && is_sync) ||
Christoph Hellwig0f38d762021-10-12 12:40:45 +02002550 !rq->mq_hctx->dispatch_busy) {
André Almeida105663f2020-01-06 15:08:18 -03002551 /*
2552 * There is no scheduler and we can try to send directly
2553 * to the hardware.
2554 */
Christoph Hellwig3e087732021-10-12 13:12:24 +02002555 blk_mq_try_issue_directly(rq->mq_hctx, rq);
Ming Leiab42f352017-05-26 19:53:19 +08002556 } else {
André Almeida105663f2020-01-06 15:08:18 -03002557 /* Default case. */
huhai8fa9f552018-05-16 08:21:21 -06002558 blk_mq_sched_insert_request(rq, false, true, true);
Ming Leiab42f352017-05-26 19:53:19 +08002559 }
Jens Axboe320ae512013-10-24 09:20:05 +01002560
Christoph Hellwig3e087732021-10-12 13:12:24 +02002561 return;
Christoph Hellwigac7c5672020-05-16 20:28:01 +02002562queue_exit:
2563 blk_queue_exit(q);
Jens Axboe320ae512013-10-24 09:20:05 +01002564}
2565
Ming Leibd631412021-05-11 23:22:35 +08002566static size_t order_to_size(unsigned int order)
2567{
2568 return (size_t)PAGE_SIZE << order;
2569}
2570
2571/* called before freeing request pool in @tags */
John Garryf32e4ea2021-10-05 18:23:32 +08002572static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
2573 struct blk_mq_tags *tags)
Ming Leibd631412021-05-11 23:22:35 +08002574{
Ming Leibd631412021-05-11 23:22:35 +08002575 struct page *page;
2576 unsigned long flags;
2577
John Garry4f245d52021-10-05 18:23:33 +08002578 /* There is no need to clear a driver tags own mapping */
2579 if (drv_tags == tags)
2580 return;
2581
Ming Leibd631412021-05-11 23:22:35 +08002582 list_for_each_entry(page, &tags->page_list, lru) {
2583 unsigned long start = (unsigned long)page_address(page);
2584 unsigned long end = start + order_to_size(page->private);
2585 int i;
2586
John Garryf32e4ea2021-10-05 18:23:32 +08002587 for (i = 0; i < drv_tags->nr_tags; i++) {
Ming Leibd631412021-05-11 23:22:35 +08002588 struct request *rq = drv_tags->rqs[i];
2589 unsigned long rq_addr = (unsigned long)rq;
2590
2591 if (rq_addr >= start && rq_addr < end) {
2592 WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
2593 cmpxchg(&drv_tags->rqs[i], rq, NULL);
2594 }
2595 }
2596 }
2597
2598 /*
2599 * Wait until all pending iteration is done.
2600 *
2601 * Request reference is cleared and it is guaranteed to be observed
2602 * after the ->lock is released.
2603 */
2604 spin_lock_irqsave(&drv_tags->lock, flags);
2605 spin_unlock_irqrestore(&drv_tags->lock, flags);
2606}
2607
Jens Axboecc71a6f2017-01-11 14:29:56 -07002608void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
2609 unsigned int hctx_idx)
Jens Axboe320ae512013-10-24 09:20:05 +01002610{
John Garryf32e4ea2021-10-05 18:23:32 +08002611 struct blk_mq_tags *drv_tags;
Jens Axboe320ae512013-10-24 09:20:05 +01002612 struct page *page;
2613
John Garry079a2e32021-10-05 18:23:39 +08002614 if (blk_mq_is_shared_tags(set->flags))
2615 drv_tags = set->shared_tags;
John Garrye155b0c2021-10-05 18:23:37 +08002616 else
2617 drv_tags = set->tags[hctx_idx];
John Garryf32e4ea2021-10-05 18:23:32 +08002618
John Garry65de57b2021-10-05 18:23:26 +08002619 if (tags->static_rqs && set->ops->exit_request) {
Christoph Hellwige9b267d2014-04-15 13:59:10 -06002620 int i;
2621
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002622 for (i = 0; i < tags->nr_tags; i++) {
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002623 struct request *rq = tags->static_rqs[i];
2624
2625 if (!rq)
Christoph Hellwige9b267d2014-04-15 13:59:10 -06002626 continue;
Christoph Hellwigd6296d392017-05-01 10:19:08 -06002627 set->ops->exit_request(set, rq, hctx_idx);
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002628 tags->static_rqs[i] = NULL;
Christoph Hellwige9b267d2014-04-15 13:59:10 -06002629 }
2630 }
2631
John Garryf32e4ea2021-10-05 18:23:32 +08002632 blk_mq_clear_rq_mapping(drv_tags, tags);
Ming Leibd631412021-05-11 23:22:35 +08002633
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002634 while (!list_empty(&tags->page_list)) {
2635 page = list_first_entry(&tags->page_list, struct page, lru);
Dave Hansen67534712014-01-08 20:17:46 -07002636 list_del_init(&page->lru);
Catalin Marinasf75782e2015-09-14 18:16:02 +01002637 /*
2638 * Remove kmemleak object previously allocated in
Raul E Rangel273938b2019-05-02 13:48:11 -06002639 * blk_mq_alloc_rqs().
Catalin Marinasf75782e2015-09-14 18:16:02 +01002640 */
2641 kmemleak_free(page_address(page));
Jens Axboe320ae512013-10-24 09:20:05 +01002642 __free_pages(page, page->private);
2643 }
Jens Axboecc71a6f2017-01-11 14:29:56 -07002644}
Jens Axboe320ae512013-10-24 09:20:05 +01002645
John Garrye155b0c2021-10-05 18:23:37 +08002646void blk_mq_free_rq_map(struct blk_mq_tags *tags)
Jens Axboecc71a6f2017-01-11 14:29:56 -07002647{
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002648 kfree(tags->rqs);
Jens Axboecc71a6f2017-01-11 14:29:56 -07002649 tags->rqs = NULL;
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002650 kfree(tags->static_rqs);
2651 tags->static_rqs = NULL;
Jens Axboe320ae512013-10-24 09:20:05 +01002652
John Garrye155b0c2021-10-05 18:23:37 +08002653 blk_mq_free_tags(tags);
Jens Axboe320ae512013-10-24 09:20:05 +01002654}
2655
John Garry63064be2021-10-05 18:23:35 +08002656static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
2657 unsigned int hctx_idx,
2658 unsigned int nr_tags,
John Garrye155b0c2021-10-05 18:23:37 +08002659 unsigned int reserved_tags)
Jens Axboe320ae512013-10-24 09:20:05 +01002660{
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002661 struct blk_mq_tags *tags;
Shaohua Li59f082e2017-02-01 09:53:14 -08002662 int node;
Jens Axboe320ae512013-10-24 09:20:05 +01002663
Dongli Zhang7d76f852019-02-27 21:35:01 +08002664 node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
Shaohua Li59f082e2017-02-01 09:53:14 -08002665 if (node == NUMA_NO_NODE)
2666 node = set->numa_node;
2667
John Garrye155b0c2021-10-05 18:23:37 +08002668 tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
2669 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002670 if (!tags)
2671 return NULL;
Jens Axboe320ae512013-10-24 09:20:05 +01002672
Kees Cook590b5b72018-06-12 14:04:20 -07002673 tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
Gabriel Krisman Bertazi36e1f3d12016-12-06 13:31:44 -02002674 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
Shaohua Li59f082e2017-02-01 09:53:14 -08002675 node);
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002676 if (!tags->rqs) {
John Garrye155b0c2021-10-05 18:23:37 +08002677 blk_mq_free_tags(tags);
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002678 return NULL;
2679 }
Jens Axboe320ae512013-10-24 09:20:05 +01002680
Kees Cook590b5b72018-06-12 14:04:20 -07002681 tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),
2682 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2683 node);
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002684 if (!tags->static_rqs) {
2685 kfree(tags->rqs);
John Garrye155b0c2021-10-05 18:23:37 +08002686 blk_mq_free_tags(tags);
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002687 return NULL;
2688 }
2689
Jens Axboecc71a6f2017-01-11 14:29:56 -07002690 return tags;
2691}
2692
Tejun Heo1d9bd512018-01-09 08:29:48 -08002693static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
2694 unsigned int hctx_idx, int node)
2695{
2696 int ret;
2697
2698 if (set->ops->init_request) {
2699 ret = set->ops->init_request(set, rq, hctx_idx, node);
2700 if (ret)
2701 return ret;
2702 }
2703
Keith Busch12f5b932018-05-29 15:52:28 +02002704 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
Tejun Heo1d9bd512018-01-09 08:29:48 -08002705 return 0;
2706}
2707
John Garry63064be2021-10-05 18:23:35 +08002708static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
2709 struct blk_mq_tags *tags,
2710 unsigned int hctx_idx, unsigned int depth)
Jens Axboecc71a6f2017-01-11 14:29:56 -07002711{
2712 unsigned int i, j, entries_per_page, max_order = 4;
2713 size_t rq_size, left;
Shaohua Li59f082e2017-02-01 09:53:14 -08002714 int node;
2715
Dongli Zhang7d76f852019-02-27 21:35:01 +08002716 node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
Shaohua Li59f082e2017-02-01 09:53:14 -08002717 if (node == NUMA_NO_NODE)
2718 node = set->numa_node;
Jens Axboecc71a6f2017-01-11 14:29:56 -07002719
2720 INIT_LIST_HEAD(&tags->page_list);
2721
Jens Axboe320ae512013-10-24 09:20:05 +01002722 /*
2723 * rq_size is the size of the request plus driver payload, rounded
2724 * to the cacheline size
2725 */
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002726 rq_size = round_up(sizeof(struct request) + set->cmd_size,
Jens Axboe320ae512013-10-24 09:20:05 +01002727 cache_line_size());
Jens Axboecc71a6f2017-01-11 14:29:56 -07002728 left = rq_size * depth;
Jens Axboe320ae512013-10-24 09:20:05 +01002729
Jens Axboecc71a6f2017-01-11 14:29:56 -07002730 for (i = 0; i < depth; ) {
Jens Axboe320ae512013-10-24 09:20:05 +01002731 int this_order = max_order;
2732 struct page *page;
2733 int to_do;
2734 void *p;
2735
Bartlomiej Zolnierkiewiczb3a834b2016-05-16 09:54:47 -06002736 while (this_order && left < order_to_size(this_order - 1))
Jens Axboe320ae512013-10-24 09:20:05 +01002737 this_order--;
2738
2739 do {
Shaohua Li59f082e2017-02-01 09:53:14 -08002740 page = alloc_pages_node(node,
Gabriel Krisman Bertazi36e1f3d12016-12-06 13:31:44 -02002741 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
Jens Axboea5164402014-09-10 09:02:03 -06002742 this_order);
Jens Axboe320ae512013-10-24 09:20:05 +01002743 if (page)
2744 break;
2745 if (!this_order--)
2746 break;
2747 if (order_to_size(this_order) < rq_size)
2748 break;
2749 } while (1);
2750
2751 if (!page)
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002752 goto fail;
Jens Axboe320ae512013-10-24 09:20:05 +01002753
2754 page->private = this_order;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002755 list_add_tail(&page->lru, &tags->page_list);
Jens Axboe320ae512013-10-24 09:20:05 +01002756
2757 p = page_address(page);
Catalin Marinasf75782e2015-09-14 18:16:02 +01002758 /*
2759 * Allow kmemleak to scan these pages as they contain pointers
2760 * to additional allocations like via ops->init_request().
2761 */
Gabriel Krisman Bertazi36e1f3d12016-12-06 13:31:44 -02002762 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
Jens Axboe320ae512013-10-24 09:20:05 +01002763 entries_per_page = order_to_size(this_order) / rq_size;
Jens Axboecc71a6f2017-01-11 14:29:56 -07002764 to_do = min(entries_per_page, depth - i);
Jens Axboe320ae512013-10-24 09:20:05 +01002765 left -= to_do * rq_size;
2766 for (j = 0; j < to_do; j++) {
Jens Axboe2af8cbe2017-01-13 14:39:30 -07002767 struct request *rq = p;
2768
2769 tags->static_rqs[i] = rq;
Tejun Heo1d9bd512018-01-09 08:29:48 -08002770 if (blk_mq_init_request(set, rq, hctx_idx, node)) {
2771 tags->static_rqs[i] = NULL;
2772 goto fail;
Christoph Hellwige9b267d2014-04-15 13:59:10 -06002773 }
2774
Jens Axboe320ae512013-10-24 09:20:05 +01002775 p += rq_size;
2776 i++;
2777 }
2778 }
Jens Axboecc71a6f2017-01-11 14:29:56 -07002779 return 0;
Jens Axboe320ae512013-10-24 09:20:05 +01002780
Christoph Hellwig24d2f902014-04-15 14:14:00 -06002781fail:
Jens Axboecc71a6f2017-01-11 14:29:56 -07002782 blk_mq_free_rqs(set, tags, hctx_idx);
2783 return -ENOMEM;
Jens Axboe320ae512013-10-24 09:20:05 +01002784}
2785
Ming Leibf0beec2020-05-29 15:53:15 +02002786struct rq_iter_data {
2787 struct blk_mq_hw_ctx *hctx;
2788 bool has_rq;
2789};
2790
2791static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
2792{
2793 struct rq_iter_data *iter_data = data;
2794
2795 if (rq->mq_hctx != iter_data->hctx)
2796 return true;
2797 iter_data->has_rq = true;
2798 return false;
2799}
2800
2801static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
2802{
2803 struct blk_mq_tags *tags = hctx->sched_tags ?
2804 hctx->sched_tags : hctx->tags;
2805 struct rq_iter_data data = {
2806 .hctx = hctx,
2807 };
2808
2809 blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
2810 return data.has_rq;
2811}
2812
2813static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
2814 struct blk_mq_hw_ctx *hctx)
2815{
2816 if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
2817 return false;
2818 if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
2819 return false;
2820 return true;
2821}
2822
2823static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
2824{
2825 struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
2826 struct blk_mq_hw_ctx, cpuhp_online);
2827
2828 if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
2829 !blk_mq_last_cpu_in_hctx(cpu, hctx))
2830 return 0;
2831
2832 /*
2833 * Prevent new request from being allocated on the current hctx.
2834 *
2835 * The smp_mb__after_atomic() Pairs with the implied barrier in
2836 * test_and_set_bit_lock in sbitmap_get(). Ensures the inactive flag is
2837 * seen once we return from the tag allocator.
2838 */
2839 set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
2840 smp_mb__after_atomic();
2841
2842 /*
2843 * Try to grab a reference to the queue and wait for any outstanding
2844 * requests. If we could not grab a reference the queue has been
2845 * frozen and there are no requests.
2846 */
2847 if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
2848 while (blk_mq_hctx_has_requests(hctx))
2849 msleep(5);
2850 percpu_ref_put(&hctx->queue->q_usage_counter);
2851 }
2852
2853 return 0;
2854}
2855
2856static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
2857{
2858 struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
2859 struct blk_mq_hw_ctx, cpuhp_online);
2860
2861 if (cpumask_test_cpu(cpu, hctx->cpumask))
2862 clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
2863 return 0;
2864}
2865
Jens Axboee57690f2016-08-24 15:34:35 -06002866/*
2867 * 'cpu' is going away. splice any existing rq_list entries from this
2868 * software queue to the hw queue dispatch list, and ensure that it
2869 * gets run.
2870 */
Thomas Gleixner9467f852016-09-22 08:05:17 -06002871static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
Jens Axboe484b4062014-05-21 14:01:15 -06002872{
Thomas Gleixner9467f852016-09-22 08:05:17 -06002873 struct blk_mq_hw_ctx *hctx;
Jens Axboe484b4062014-05-21 14:01:15 -06002874 struct blk_mq_ctx *ctx;
2875 LIST_HEAD(tmp);
Ming Leic16d6b52018-12-17 08:44:05 -07002876 enum hctx_type type;
Jens Axboe484b4062014-05-21 14:01:15 -06002877
Thomas Gleixner9467f852016-09-22 08:05:17 -06002878 hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
Ming Leibf0beec2020-05-29 15:53:15 +02002879 if (!cpumask_test_cpu(cpu, hctx->cpumask))
2880 return 0;
2881
Jens Axboee57690f2016-08-24 15:34:35 -06002882 ctx = __blk_mq_get_ctx(hctx->queue, cpu);
Ming Leic16d6b52018-12-17 08:44:05 -07002883 type = hctx->type;
Jens Axboe484b4062014-05-21 14:01:15 -06002884
2885 spin_lock(&ctx->lock);
Ming Leic16d6b52018-12-17 08:44:05 -07002886 if (!list_empty(&ctx->rq_lists[type])) {
2887 list_splice_init(&ctx->rq_lists[type], &tmp);
Jens Axboe484b4062014-05-21 14:01:15 -06002888 blk_mq_hctx_clear_pending(hctx, ctx);
2889 }
2890 spin_unlock(&ctx->lock);
2891
2892 if (list_empty(&tmp))
Thomas Gleixner9467f852016-09-22 08:05:17 -06002893 return 0;
Jens Axboe484b4062014-05-21 14:01:15 -06002894
Jens Axboee57690f2016-08-24 15:34:35 -06002895 spin_lock(&hctx->lock);
2896 list_splice_tail_init(&tmp, &hctx->dispatch);
2897 spin_unlock(&hctx->lock);
Jens Axboe484b4062014-05-21 14:01:15 -06002898
2899 blk_mq_run_hw_queue(hctx, true);
Thomas Gleixner9467f852016-09-22 08:05:17 -06002900 return 0;
Jens Axboe484b4062014-05-21 14:01:15 -06002901}
2902
Thomas Gleixner9467f852016-09-22 08:05:17 -06002903static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
Jens Axboe484b4062014-05-21 14:01:15 -06002904{
Ming Leibf0beec2020-05-29 15:53:15 +02002905 if (!(hctx->flags & BLK_MQ_F_STACKING))
2906 cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
2907 &hctx->cpuhp_online);
Thomas Gleixner9467f852016-09-22 08:05:17 -06002908 cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
2909 &hctx->cpuhp_dead);
Jens Axboe484b4062014-05-21 14:01:15 -06002910}
2911
Ming Lei364b6182021-05-11 23:22:36 +08002912/*
2913 * Before freeing hw queue, clearing the flush request reference in
2914 * tags->rqs[] for avoiding potential UAF.
2915 */
2916static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
2917 unsigned int queue_depth, struct request *flush_rq)
2918{
2919 int i;
2920 unsigned long flags;
2921
2922 /* The hw queue may not be mapped yet */
2923 if (!tags)
2924 return;
2925
2926 WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
2927
2928 for (i = 0; i < queue_depth; i++)
2929 cmpxchg(&tags->rqs[i], flush_rq, NULL);
2930
2931 /*
2932 * Wait until all pending iteration is done.
2933 *
2934 * Request reference is cleared and it is guaranteed to be observed
2935 * after the ->lock is released.
2936 */
2937 spin_lock_irqsave(&tags->lock, flags);
2938 spin_unlock_irqrestore(&tags->lock, flags);
2939}
2940
Ming Leic3b4afc2015-06-04 22:25:04 +08002941/* hctx->ctxs will be freed in queue's release handler */
Ming Lei08e98fc2014-09-25 23:23:38 +08002942static void blk_mq_exit_hctx(struct request_queue *q,
2943 struct blk_mq_tag_set *set,
2944 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
2945{
Ming Lei364b6182021-05-11 23:22:36 +08002946 struct request *flush_rq = hctx->fq->flush_rq;
2947
Ming Lei8ab0b7d2018-01-09 21:28:29 +08002948 if (blk_mq_hw_queue_mapped(hctx))
2949 blk_mq_tag_idle(hctx);
Ming Lei08e98fc2014-09-25 23:23:38 +08002950
Ming Lei364b6182021-05-11 23:22:36 +08002951 blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
2952 set->queue_depth, flush_rq);
Ming Leif70ced02014-09-25 23:23:47 +08002953 if (set->ops->exit_request)
Ming Lei364b6182021-05-11 23:22:36 +08002954 set->ops->exit_request(set, flush_rq, hctx_idx);
Ming Leif70ced02014-09-25 23:23:47 +08002955
Ming Lei08e98fc2014-09-25 23:23:38 +08002956 if (set->ops->exit_hctx)
2957 set->ops->exit_hctx(hctx, hctx_idx);
2958
Thomas Gleixner9467f852016-09-22 08:05:17 -06002959 blk_mq_remove_cpuhp(hctx);
Ming Lei2f8f1332019-04-30 09:52:27 +08002960
2961 spin_lock(&q->unused_hctx_lock);
2962 list_add(&hctx->hctx_list, &q->unused_hctx_list);
2963 spin_unlock(&q->unused_hctx_lock);
Ming Lei08e98fc2014-09-25 23:23:38 +08002964}
2965
Ming Lei624dbe42014-05-27 23:35:13 +08002966static void blk_mq_exit_hw_queues(struct request_queue *q,
2967 struct blk_mq_tag_set *set, int nr_queue)
2968{
2969 struct blk_mq_hw_ctx *hctx;
2970 unsigned int i;
2971
2972 queue_for_each_hw_ctx(q, hctx, i) {
2973 if (i == nr_queue)
2974 break;
Jianchao Wang477e19d2018-10-12 18:07:25 +08002975 blk_mq_debugfs_unregister_hctx(hctx);
Ming Lei08e98fc2014-09-25 23:23:38 +08002976 blk_mq_exit_hctx(q, set, hctx, i);
Ming Lei624dbe42014-05-27 23:35:13 +08002977 }
Ming Lei624dbe42014-05-27 23:35:13 +08002978}
2979
Ming Lei7c6c5b72019-04-30 09:52:26 +08002980static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
2981{
2982 int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
2983
2984 BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
2985 __alignof__(struct blk_mq_hw_ctx)) !=
2986 sizeof(struct blk_mq_hw_ctx));
2987
2988 if (tag_set->flags & BLK_MQ_F_BLOCKING)
2989 hw_ctx_size += sizeof(struct srcu_struct);
2990
2991 return hw_ctx_size;
2992}
2993
Ming Lei08e98fc2014-09-25 23:23:38 +08002994static int blk_mq_init_hctx(struct request_queue *q,
2995 struct blk_mq_tag_set *set,
2996 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
2997{
Ming Lei7c6c5b72019-04-30 09:52:26 +08002998 hctx->queue_num = hctx_idx;
Ming Lei08e98fc2014-09-25 23:23:38 +08002999
Ming Leibf0beec2020-05-29 15:53:15 +02003000 if (!(hctx->flags & BLK_MQ_F_STACKING))
3001 cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
3002 &hctx->cpuhp_online);
Ming Lei7c6c5b72019-04-30 09:52:26 +08003003 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
3004
3005 hctx->tags = set->tags[hctx_idx];
3006
3007 if (set->ops->init_hctx &&
3008 set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
3009 goto unregister_cpu_notifier;
3010
3011 if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
3012 hctx->numa_node))
3013 goto exit_hctx;
3014 return 0;
3015
3016 exit_hctx:
3017 if (set->ops->exit_hctx)
3018 set->ops->exit_hctx(hctx, hctx_idx);
3019 unregister_cpu_notifier:
3020 blk_mq_remove_cpuhp(hctx);
3021 return -1;
3022}
3023
3024static struct blk_mq_hw_ctx *
3025blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
3026 int node)
3027{
3028 struct blk_mq_hw_ctx *hctx;
3029 gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
3030
3031 hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
3032 if (!hctx)
3033 goto fail_alloc_hctx;
3034
3035 if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
3036 goto free_hctx;
3037
3038 atomic_set(&hctx->nr_active, 0);
Ming Lei08e98fc2014-09-25 23:23:38 +08003039 if (node == NUMA_NO_NODE)
Ming Lei7c6c5b72019-04-30 09:52:26 +08003040 node = set->numa_node;
3041 hctx->numa_node = node;
Ming Lei08e98fc2014-09-25 23:23:38 +08003042
Jens Axboe9f993732017-04-10 09:54:54 -06003043 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
Ming Lei08e98fc2014-09-25 23:23:38 +08003044 spin_lock_init(&hctx->lock);
3045 INIT_LIST_HEAD(&hctx->dispatch);
3046 hctx->queue = q;
Ming Lei51db1c32020-08-19 23:20:19 +08003047 hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
Ming Lei08e98fc2014-09-25 23:23:38 +08003048
Ming Lei2f8f1332019-04-30 09:52:27 +08003049 INIT_LIST_HEAD(&hctx->hctx_list);
3050
Ming Lei08e98fc2014-09-25 23:23:38 +08003051 /*
3052 * Allocate space for all possible cpus to avoid allocation at
3053 * runtime
3054 */
Johannes Thumshirnd904bfa2017-11-15 17:32:33 -08003055 hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
Ming Lei7c6c5b72019-04-30 09:52:26 +08003056 gfp, node);
Ming Lei08e98fc2014-09-25 23:23:38 +08003057 if (!hctx->ctxs)
Ming Lei7c6c5b72019-04-30 09:52:26 +08003058 goto free_cpumask;
Ming Lei08e98fc2014-09-25 23:23:38 +08003059
Jianchao Wang5b202852018-10-12 18:07:26 +08003060 if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
Ming Leic548e622021-01-22 10:33:08 +08003061 gfp, node, false, false))
Ming Lei08e98fc2014-09-25 23:23:38 +08003062 goto free_ctxs;
Ming Lei08e98fc2014-09-25 23:23:38 +08003063 hctx->nr_ctx = 0;
3064
Ming Lei5815839b2018-06-25 19:31:47 +08003065 spin_lock_init(&hctx->dispatch_wait_lock);
Jens Axboeeb619fd2017-11-09 08:32:43 -07003066 init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
3067 INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
3068
Guoqing Jiang754a1572020-03-09 22:41:37 +01003069 hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
Ming Leif70ced02014-09-25 23:23:47 +08003070 if (!hctx->fq)
Ming Lei7c6c5b72019-04-30 09:52:26 +08003071 goto free_bitmap;
Ming Leif70ced02014-09-25 23:23:47 +08003072
Bart Van Assche6a83e742016-11-02 10:09:51 -06003073 if (hctx->flags & BLK_MQ_F_BLOCKING)
Tejun Heo05707b62018-01-09 08:29:53 -08003074 init_srcu_struct(hctx->srcu);
Ming Lei7c6c5b72019-04-30 09:52:26 +08003075 blk_mq_hctx_kobj_init(hctx);
Bart Van Assche6a83e742016-11-02 10:09:51 -06003076
Ming Lei7c6c5b72019-04-30 09:52:26 +08003077 return hctx;
Ming Lei08e98fc2014-09-25 23:23:38 +08003078
3079 free_bitmap:
Omar Sandoval88459642016-09-17 08:38:44 -06003080 sbitmap_free(&hctx->ctx_map);
Ming Lei08e98fc2014-09-25 23:23:38 +08003081 free_ctxs:
3082 kfree(hctx->ctxs);
Ming Lei7c6c5b72019-04-30 09:52:26 +08003083 free_cpumask:
3084 free_cpumask_var(hctx->cpumask);
3085 free_hctx:
3086 kfree(hctx);
3087 fail_alloc_hctx:
3088 return NULL;
Ming Lei08e98fc2014-09-25 23:23:38 +08003089}
3090
Jens Axboe320ae512013-10-24 09:20:05 +01003091static void blk_mq_init_cpu_queues(struct request_queue *q,
3092 unsigned int nr_hw_queues)
3093{
Jens Axboeb3c661b2018-10-30 10:36:06 -06003094 struct blk_mq_tag_set *set = q->tag_set;
3095 unsigned int i, j;
Jens Axboe320ae512013-10-24 09:20:05 +01003096
3097 for_each_possible_cpu(i) {
3098 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
3099 struct blk_mq_hw_ctx *hctx;
Ming Leic16d6b52018-12-17 08:44:05 -07003100 int k;
Jens Axboe320ae512013-10-24 09:20:05 +01003101
Jens Axboe320ae512013-10-24 09:20:05 +01003102 __ctx->cpu = i;
3103 spin_lock_init(&__ctx->lock);
Ming Leic16d6b52018-12-17 08:44:05 -07003104 for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
3105 INIT_LIST_HEAD(&__ctx->rq_lists[k]);
3106
Jens Axboe320ae512013-10-24 09:20:05 +01003107 __ctx->queue = q;
3108
Jens Axboe320ae512013-10-24 09:20:05 +01003109 /*
3110 * Set local node, IFF we have more than one hw queue. If
3111 * not, we remain on the home node of the device
3112 */
Jens Axboeb3c661b2018-10-30 10:36:06 -06003113 for (j = 0; j < set->nr_maps; j++) {
3114 hctx = blk_mq_map_queue_type(q, j, i);
3115 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
Xianting Tian576e85c2020-10-19 16:20:47 +08003116 hctx->numa_node = cpu_to_node(i);
Jens Axboeb3c661b2018-10-30 10:36:06 -06003117 }
Jens Axboe320ae512013-10-24 09:20:05 +01003118 }
3119}
3120
John Garry63064be2021-10-05 18:23:35 +08003121struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
3122 unsigned int hctx_idx,
3123 unsigned int depth)
Jens Axboecc71a6f2017-01-11 14:29:56 -07003124{
John Garry63064be2021-10-05 18:23:35 +08003125 struct blk_mq_tags *tags;
3126 int ret;
Jens Axboecc71a6f2017-01-11 14:29:56 -07003127
John Garrye155b0c2021-10-05 18:23:37 +08003128 tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);
John Garry63064be2021-10-05 18:23:35 +08003129 if (!tags)
3130 return NULL;
Jens Axboecc71a6f2017-01-11 14:29:56 -07003131
John Garry63064be2021-10-05 18:23:35 +08003132 ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth);
3133 if (ret) {
John Garrye155b0c2021-10-05 18:23:37 +08003134 blk_mq_free_rq_map(tags);
John Garry63064be2021-10-05 18:23:35 +08003135 return NULL;
3136 }
Jens Axboecc71a6f2017-01-11 14:29:56 -07003137
John Garry63064be2021-10-05 18:23:35 +08003138 return tags;
3139}
3140
3141static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
3142 int hctx_idx)
3143{
John Garry079a2e32021-10-05 18:23:39 +08003144 if (blk_mq_is_shared_tags(set->flags)) {
3145 set->tags[hctx_idx] = set->shared_tags;
John Garrye155b0c2021-10-05 18:23:37 +08003146
3147 return true;
3148 }
3149
John Garry63064be2021-10-05 18:23:35 +08003150 set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,
3151 set->queue_depth);
3152
3153 return set->tags[hctx_idx];
Jens Axboecc71a6f2017-01-11 14:29:56 -07003154}
3155
John Garry645db342021-10-05 18:23:36 +08003156void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
3157 struct blk_mq_tags *tags,
3158 unsigned int hctx_idx)
Jens Axboecc71a6f2017-01-11 14:29:56 -07003159{
John Garry645db342021-10-05 18:23:36 +08003160 if (tags) {
3161 blk_mq_free_rqs(set, tags, hctx_idx);
John Garrye155b0c2021-10-05 18:23:37 +08003162 blk_mq_free_rq_map(tags);
Jens Axboebd166ef2017-01-17 06:03:22 -07003163 }
Jens Axboecc71a6f2017-01-11 14:29:56 -07003164}
3165
John Garrye155b0c2021-10-05 18:23:37 +08003166static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
3167 unsigned int hctx_idx)
3168{
John Garry079a2e32021-10-05 18:23:39 +08003169 if (!blk_mq_is_shared_tags(set->flags))
John Garrye155b0c2021-10-05 18:23:37 +08003170 blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);
3171
3172 set->tags[hctx_idx] = NULL;
3173}
3174
Christoph Hellwig4b855ad2017-06-26 12:20:57 +02003175static void blk_mq_map_swqueue(struct request_queue *q)
Jens Axboe320ae512013-10-24 09:20:05 +01003176{
Jens Axboeb3c661b2018-10-30 10:36:06 -06003177 unsigned int i, j, hctx_idx;
Jens Axboe320ae512013-10-24 09:20:05 +01003178 struct blk_mq_hw_ctx *hctx;
3179 struct blk_mq_ctx *ctx;
Ming Lei2a34c082015-04-21 10:00:20 +08003180 struct blk_mq_tag_set *set = q->tag_set;
Jens Axboe320ae512013-10-24 09:20:05 +01003181
3182 queue_for_each_hw_ctx(q, hctx, i) {
Jens Axboee4043dc2014-04-09 10:18:23 -06003183 cpumask_clear(hctx->cpumask);
Jens Axboe320ae512013-10-24 09:20:05 +01003184 hctx->nr_ctx = 0;
huhaid416c922018-05-18 08:32:30 -06003185 hctx->dispatch_from = NULL;
Jens Axboe320ae512013-10-24 09:20:05 +01003186 }
3187
3188 /*
Christoph Hellwig4b855ad2017-06-26 12:20:57 +02003189 * Map software to hardware queues.
Ming Lei4412efe2018-04-25 04:01:44 +08003190 *
3191 * If the cpu isn't present, the cpu is mapped to first hctx.
Jens Axboe320ae512013-10-24 09:20:05 +01003192 */
Christoph Hellwig20e4d8132018-01-12 10:53:06 +08003193 for_each_possible_cpu(i) {
Ming Lei4412efe2018-04-25 04:01:44 +08003194
Thomas Gleixner897bb0c2016-03-19 11:30:33 +01003195 ctx = per_cpu_ptr(q->queue_ctx, i);
Jens Axboeb3c661b2018-10-30 10:36:06 -06003196 for (j = 0; j < set->nr_maps; j++) {
Jianchao Wangbb94aea2019-01-24 18:25:33 +08003197 if (!set->map[j].nr_queues) {
3198 ctx->hctxs[j] = blk_mq_map_queue_type(q,
3199 HCTX_TYPE_DEFAULT, i);
Ming Leie5edd5f2018-12-18 01:28:56 +08003200 continue;
Jianchao Wangbb94aea2019-01-24 18:25:33 +08003201 }
Ming Leifd689872020-05-07 21:04:08 +08003202 hctx_idx = set->map[j].mq_map[i];
3203 /* unmapped hw queue can be remapped after CPU topo changed */
3204 if (!set->tags[hctx_idx] &&
John Garry63064be2021-10-05 18:23:35 +08003205 !__blk_mq_alloc_map_and_rqs(set, hctx_idx)) {
Ming Leifd689872020-05-07 21:04:08 +08003206 /*
3207 * If tags initialization fail for some hctx,
3208 * that hctx won't be brought online. In this
3209 * case, remap the current ctx to hctx[0] which
3210 * is guaranteed to always have tags allocated
3211 */
3212 set->map[j].mq_map[i] = 0;
3213 }
Ming Leie5edd5f2018-12-18 01:28:56 +08003214
Jens Axboeb3c661b2018-10-30 10:36:06 -06003215 hctx = blk_mq_map_queue_type(q, j, i);
Jianchao Wang8ccdf4a2019-01-24 18:25:32 +08003216 ctx->hctxs[j] = hctx;
Jens Axboeb3c661b2018-10-30 10:36:06 -06003217 /*
3218 * If the CPU is already set in the mask, then we've
3219 * mapped this one already. This can happen if
3220 * devices share queues across queue maps.
3221 */
3222 if (cpumask_test_cpu(i, hctx->cpumask))
3223 continue;
3224
3225 cpumask_set_cpu(i, hctx->cpumask);
3226 hctx->type = j;
3227 ctx->index_hw[hctx->type] = hctx->nr_ctx;
3228 hctx->ctxs[hctx->nr_ctx++] = ctx;
3229
3230 /*
3231 * If the nr_ctx type overflows, we have exceeded the
3232 * amount of sw queues we can support.
3233 */
3234 BUG_ON(!hctx->nr_ctx);
3235 }
Jianchao Wangbb94aea2019-01-24 18:25:33 +08003236
3237 for (; j < HCTX_MAX_TYPES; j++)
3238 ctx->hctxs[j] = blk_mq_map_queue_type(q,
3239 HCTX_TYPE_DEFAULT, i);
Jens Axboe320ae512013-10-24 09:20:05 +01003240 }
Jens Axboe506e9312014-05-07 10:26:44 -06003241
3242 queue_for_each_hw_ctx(q, hctx, i) {
Ming Lei4412efe2018-04-25 04:01:44 +08003243 /*
3244 * If no software queues are mapped to this hardware queue,
3245 * disable it and free the request entries.
3246 */
3247 if (!hctx->nr_ctx) {
3248 /* Never unmap queue 0. We need it as a
3249 * fallback in case of a new remap fails
3250 * allocation
3251 */
John Garrye155b0c2021-10-05 18:23:37 +08003252 if (i)
3253 __blk_mq_free_map_and_rqs(set, i);
Ming Lei4412efe2018-04-25 04:01:44 +08003254
3255 hctx->tags = NULL;
3256 continue;
3257 }
Jens Axboe484b4062014-05-21 14:01:15 -06003258
Ming Lei2a34c082015-04-21 10:00:20 +08003259 hctx->tags = set->tags[i];
3260 WARN_ON(!hctx->tags);
3261
Jens Axboe484b4062014-05-21 14:01:15 -06003262 /*
Chong Yuan889fa312015-04-15 11:39:29 -06003263 * Set the map size to the number of mapped software queues.
3264 * This is more accurate and more efficient than looping
3265 * over all possibly mapped software queues.
3266 */
Omar Sandoval88459642016-09-17 08:38:44 -06003267 sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
Chong Yuan889fa312015-04-15 11:39:29 -06003268
3269 /*
Jens Axboe484b4062014-05-21 14:01:15 -06003270 * Initialize batch roundrobin counts
3271 */
Ming Leif82ddf12018-04-08 17:48:10 +08003272 hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
Jens Axboe506e9312014-05-07 10:26:44 -06003273 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
3274 }
Jens Axboe320ae512013-10-24 09:20:05 +01003275}
3276
Jens Axboe8e8320c2017-06-20 17:56:13 -06003277/*
3278 * Caller needs to ensure that we're either frozen/quiesced, or that
3279 * the queue isn't live yet.
3280 */
Jeff Moyer2404e602015-11-03 10:40:06 -05003281static void queue_set_hctx_shared(struct request_queue *q, bool shared)
Jens Axboe0d2602c2014-05-13 15:10:52 -06003282{
3283 struct blk_mq_hw_ctx *hctx;
Jens Axboe0d2602c2014-05-13 15:10:52 -06003284 int i;
3285
Jeff Moyer2404e602015-11-03 10:40:06 -05003286 queue_for_each_hw_ctx(q, hctx, i) {
Yu Kuai454bb672021-07-31 14:21:30 +08003287 if (shared) {
Ming Lei51db1c32020-08-19 23:20:19 +08003288 hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
Yu Kuai454bb672021-07-31 14:21:30 +08003289 } else {
3290 blk_mq_tag_idle(hctx);
Ming Lei51db1c32020-08-19 23:20:19 +08003291 hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
Yu Kuai454bb672021-07-31 14:21:30 +08003292 }
Jeff Moyer2404e602015-11-03 10:40:06 -05003293 }
3294}
3295
Hannes Reinecke655ac302020-08-19 23:20:20 +08003296static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
3297 bool shared)
Jeff Moyer2404e602015-11-03 10:40:06 -05003298{
3299 struct request_queue *q;
Jens Axboe0d2602c2014-05-13 15:10:52 -06003300
Bart Van Assche705cda92017-04-07 11:16:49 -07003301 lockdep_assert_held(&set->tag_list_lock);
3302
Jens Axboe0d2602c2014-05-13 15:10:52 -06003303 list_for_each_entry(q, &set->tag_list, tag_set_list) {
3304 blk_mq_freeze_queue(q);
Jeff Moyer2404e602015-11-03 10:40:06 -05003305 queue_set_hctx_shared(q, shared);
Jens Axboe0d2602c2014-05-13 15:10:52 -06003306 blk_mq_unfreeze_queue(q);
3307 }
3308}
3309
3310static void blk_mq_del_queue_tag_set(struct request_queue *q)
3311{
3312 struct blk_mq_tag_set *set = q->tag_set;
3313
Jens Axboe0d2602c2014-05-13 15:10:52 -06003314 mutex_lock(&set->tag_list_lock);
Daniel Wagner08c875c2020-07-28 15:29:51 +02003315 list_del(&q->tag_set_list);
Jeff Moyer2404e602015-11-03 10:40:06 -05003316 if (list_is_singular(&set->tag_list)) {
3317 /* just transitioned to unshared */
Ming Lei51db1c32020-08-19 23:20:19 +08003318 set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
Jeff Moyer2404e602015-11-03 10:40:06 -05003319 /* update existing queue */
Hannes Reinecke655ac302020-08-19 23:20:20 +08003320 blk_mq_update_tag_set_shared(set, false);
Jeff Moyer2404e602015-11-03 10:40:06 -05003321 }
Jens Axboe0d2602c2014-05-13 15:10:52 -06003322 mutex_unlock(&set->tag_list_lock);
Roman Pena347c7a2018-06-10 22:38:24 +02003323 INIT_LIST_HEAD(&q->tag_set_list);
Jens Axboe0d2602c2014-05-13 15:10:52 -06003324}
3325
3326static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
3327 struct request_queue *q)
3328{
Jens Axboe0d2602c2014-05-13 15:10:52 -06003329 mutex_lock(&set->tag_list_lock);
Jeff Moyer2404e602015-11-03 10:40:06 -05003330
Jens Axboeff821d22017-11-10 22:05:12 -07003331 /*
3332 * Check to see if we're transitioning to shared (from 1 to 2 queues).
3333 */
3334 if (!list_empty(&set->tag_list) &&
Ming Lei51db1c32020-08-19 23:20:19 +08003335 !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
3336 set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
Jeff Moyer2404e602015-11-03 10:40:06 -05003337 /* update existing queue */
Hannes Reinecke655ac302020-08-19 23:20:20 +08003338 blk_mq_update_tag_set_shared(set, true);
Jeff Moyer2404e602015-11-03 10:40:06 -05003339 }
Ming Lei51db1c32020-08-19 23:20:19 +08003340 if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
Jeff Moyer2404e602015-11-03 10:40:06 -05003341 queue_set_hctx_shared(q, true);
Daniel Wagner08c875c2020-07-28 15:29:51 +02003342 list_add_tail(&q->tag_set_list, &set->tag_list);
Jeff Moyer2404e602015-11-03 10:40:06 -05003343
Jens Axboe0d2602c2014-05-13 15:10:52 -06003344 mutex_unlock(&set->tag_list_lock);
3345}
3346
Ming Lei1db49092018-11-20 09:44:35 +08003347/* All allocations will be freed in release handler of q->mq_kobj */
3348static int blk_mq_alloc_ctxs(struct request_queue *q)
3349{
3350 struct blk_mq_ctxs *ctxs;
3351 int cpu;
3352
3353 ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
3354 if (!ctxs)
3355 return -ENOMEM;
3356
3357 ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
3358 if (!ctxs->queue_ctx)
3359 goto fail;
3360
3361 for_each_possible_cpu(cpu) {
3362 struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
3363 ctx->ctxs = ctxs;
3364 }
3365
3366 q->mq_kobj = &ctxs->kobj;
3367 q->queue_ctx = ctxs->queue_ctx;
3368
3369 return 0;
3370 fail:
3371 kfree(ctxs);
3372 return -ENOMEM;
3373}
3374
Ming Leie09aae72015-01-29 20:17:27 +08003375/*
3376 * It is the actual release handler for mq, but we do it from
3377 * request queue's release handler for avoiding use-after-free
3378 * and headache because q->mq_kobj shouldn't have been introduced,
3379 * but we can't group ctx/kctx kobj without it.
3380 */
3381void blk_mq_release(struct request_queue *q)
3382{
Ming Lei2f8f1332019-04-30 09:52:27 +08003383 struct blk_mq_hw_ctx *hctx, *next;
3384 int i;
Ming Leie09aae72015-01-29 20:17:27 +08003385
Ming Lei2f8f1332019-04-30 09:52:27 +08003386 queue_for_each_hw_ctx(q, hctx, i)
3387 WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
3388
3389 /* all hctx are in .unused_hctx_list now */
3390 list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
3391 list_del_init(&hctx->hctx_list);
Ming Lei6c8b2322017-02-22 18:14:01 +08003392 kobject_put(&hctx->kobj);
Ming Leic3b4afc2015-06-04 22:25:04 +08003393 }
Ming Leie09aae72015-01-29 20:17:27 +08003394
3395 kfree(q->queue_hw_ctx);
3396
Ming Lei7ea5fe32017-02-22 18:14:00 +08003397 /*
3398 * release .mq_kobj and sw queue's kobject now because
3399 * both share lifetime with request queue.
3400 */
3401 blk_mq_sysfs_deinit(q);
Ming Leie09aae72015-01-29 20:17:27 +08003402}
3403
Christoph Hellwig5ec780a2021-06-24 10:10:12 +02003404static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
Christoph Hellwig2f227bb2020-03-27 09:30:08 +01003405 void *queuedata)
Jens Axboe320ae512013-10-24 09:20:05 +01003406{
Christoph Hellwig26a97502021-06-02 09:53:17 +03003407 struct request_queue *q;
3408 int ret;
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003409
Christoph Hellwig26a97502021-06-02 09:53:17 +03003410 q = blk_alloc_queue(set->numa_node);
3411 if (!q)
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003412 return ERR_PTR(-ENOMEM);
Christoph Hellwig26a97502021-06-02 09:53:17 +03003413 q->queuedata = queuedata;
3414 ret = blk_mq_init_allocated_queue(set, q);
3415 if (ret) {
3416 blk_cleanup_queue(q);
3417 return ERR_PTR(ret);
3418 }
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003419 return q;
3420}
Christoph Hellwig2f227bb2020-03-27 09:30:08 +01003421
3422struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
3423{
3424 return blk_mq_init_queue_data(set, NULL);
3425}
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003426EXPORT_SYMBOL(blk_mq_init_queue);
3427
Christoph Hellwig4dcc4872021-08-16 15:19:05 +02003428struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
3429 struct lock_class_key *lkclass)
Jens Axboe9316a9e2018-10-15 08:40:37 -06003430{
3431 struct request_queue *q;
Christoph Hellwigb461dfc2021-06-02 09:53:18 +03003432 struct gendisk *disk;
Jens Axboe9316a9e2018-10-15 08:40:37 -06003433
Christoph Hellwigb461dfc2021-06-02 09:53:18 +03003434 q = blk_mq_init_queue_data(set, queuedata);
3435 if (IS_ERR(q))
3436 return ERR_CAST(q);
Jens Axboe9316a9e2018-10-15 08:40:37 -06003437
Christoph Hellwig4a1fa412021-08-16 15:19:08 +02003438 disk = __alloc_disk_node(q, set->numa_node, lkclass);
Christoph Hellwigb461dfc2021-06-02 09:53:18 +03003439 if (!disk) {
3440 blk_cleanup_queue(q);
3441 return ERR_PTR(-ENOMEM);
Jens Axboe9316a9e2018-10-15 08:40:37 -06003442 }
Christoph Hellwigb461dfc2021-06-02 09:53:18 +03003443 return disk;
Jens Axboe9316a9e2018-10-15 08:40:37 -06003444}
Christoph Hellwigb461dfc2021-06-02 09:53:18 +03003445EXPORT_SYMBOL(__blk_mq_alloc_disk);
Jens Axboe9316a9e2018-10-15 08:40:37 -06003446
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003447static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
3448 struct blk_mq_tag_set *set, struct request_queue *q,
3449 int hctx_idx, int node)
3450{
Ming Lei2f8f1332019-04-30 09:52:27 +08003451 struct blk_mq_hw_ctx *hctx = NULL, *tmp;
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003452
Ming Lei2f8f1332019-04-30 09:52:27 +08003453 /* reuse dead hctx first */
3454 spin_lock(&q->unused_hctx_lock);
3455 list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
3456 if (tmp->numa_node == node) {
3457 hctx = tmp;
3458 break;
3459 }
3460 }
3461 if (hctx)
3462 list_del_init(&hctx->hctx_list);
3463 spin_unlock(&q->unused_hctx_lock);
3464
3465 if (!hctx)
3466 hctx = blk_mq_alloc_hctx(q, set, node);
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003467 if (!hctx)
Ming Lei7c6c5b72019-04-30 09:52:26 +08003468 goto fail;
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003469
Ming Lei7c6c5b72019-04-30 09:52:26 +08003470 if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
3471 goto free_hctx;
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003472
3473 return hctx;
Ming Lei7c6c5b72019-04-30 09:52:26 +08003474
3475 free_hctx:
3476 kobject_put(&hctx->kobj);
3477 fail:
3478 return NULL;
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003479}
3480
Keith Busch868f2f02015-12-17 17:08:14 -07003481static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
3482 struct request_queue *q)
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003483{
Jianchao Wange01ad462018-10-12 18:07:28 +08003484 int i, j, end;
Keith Busch868f2f02015-12-17 17:08:14 -07003485 struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
Jens Axboe320ae512013-10-24 09:20:05 +01003486
Bart Van Asscheac0d6b92019-10-25 09:50:09 -07003487 if (q->nr_hw_queues < set->nr_hw_queues) {
3488 struct blk_mq_hw_ctx **new_hctxs;
3489
3490 new_hctxs = kcalloc_node(set->nr_hw_queues,
3491 sizeof(*new_hctxs), GFP_KERNEL,
3492 set->numa_node);
3493 if (!new_hctxs)
3494 return;
3495 if (hctxs)
3496 memcpy(new_hctxs, hctxs, q->nr_hw_queues *
3497 sizeof(*hctxs));
3498 q->queue_hw_ctx = new_hctxs;
Bart Van Asscheac0d6b92019-10-25 09:50:09 -07003499 kfree(hctxs);
3500 hctxs = new_hctxs;
3501 }
3502
Ming Leifb350e02018-01-06 16:27:40 +08003503 /* protect against switching io scheduler */
3504 mutex_lock(&q->sysfs_lock);
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003505 for (i = 0; i < set->nr_hw_queues; i++) {
Keith Busch868f2f02015-12-17 17:08:14 -07003506 int node;
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003507 struct blk_mq_hw_ctx *hctx;
Keith Busch868f2f02015-12-17 17:08:14 -07003508
Dongli Zhang7d76f852019-02-27 21:35:01 +08003509 node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003510 /*
3511 * If the hw queue has been mapped to another numa node,
3512 * we need to realloc the hctx. If allocation fails, fallback
3513 * to use the previous one.
3514 */
3515 if (hctxs[i] && (hctxs[i]->numa_node == node))
3516 continue;
Jens Axboe320ae512013-10-24 09:20:05 +01003517
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003518 hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
3519 if (hctx) {
Ming Lei2f8f1332019-04-30 09:52:27 +08003520 if (hctxs[i])
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003521 blk_mq_exit_hctx(q, set, hctxs[i], i);
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003522 hctxs[i] = hctx;
3523 } else {
3524 if (hctxs[i])
3525 pr_warn("Allocate new hctx on node %d fails,\
3526 fallback to previous one on node %d\n",
3527 node, hctxs[i]->numa_node);
3528 else
3529 break;
Keith Busch868f2f02015-12-17 17:08:14 -07003530 }
Jens Axboe320ae512013-10-24 09:20:05 +01003531 }
Jianchao Wange01ad462018-10-12 18:07:28 +08003532 /*
3533 * Increasing nr_hw_queues fails. Free the newly allocated
3534 * hctxs and keep the previous q->nr_hw_queues.
3535 */
3536 if (i != set->nr_hw_queues) {
3537 j = q->nr_hw_queues;
3538 end = i;
3539 } else {
3540 j = i;
3541 end = q->nr_hw_queues;
3542 q->nr_hw_queues = set->nr_hw_queues;
3543 }
Jianchao Wang34d11ff2018-10-12 18:07:27 +08003544
Jianchao Wange01ad462018-10-12 18:07:28 +08003545 for (; j < end; j++) {
Keith Busch868f2f02015-12-17 17:08:14 -07003546 struct blk_mq_hw_ctx *hctx = hctxs[j];
3547
3548 if (hctx) {
John Garrye155b0c2021-10-05 18:23:37 +08003549 __blk_mq_free_map_and_rqs(set, j);
Keith Busch868f2f02015-12-17 17:08:14 -07003550 blk_mq_exit_hctx(q, set, hctx, j);
Keith Busch868f2f02015-12-17 17:08:14 -07003551 hctxs[j] = NULL;
Keith Busch868f2f02015-12-17 17:08:14 -07003552 }
3553 }
Ming Leifb350e02018-01-06 16:27:40 +08003554 mutex_unlock(&q->sysfs_lock);
Keith Busch868f2f02015-12-17 17:08:14 -07003555}
3556
Christoph Hellwig26a97502021-06-02 09:53:17 +03003557int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
3558 struct request_queue *q)
Keith Busch868f2f02015-12-17 17:08:14 -07003559{
Ming Lei66841672016-02-12 15:27:00 +08003560 /* mark the queue as mq asap */
3561 q->mq_ops = set->ops;
3562
Omar Sandoval34dbad52017-03-21 08:56:08 -07003563 q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
Stephen Bates720b8cc2017-04-07 06:24:03 -06003564 blk_mq_poll_stats_bkt,
3565 BLK_MQ_POLL_STATS_BKTS, q);
Omar Sandoval34dbad52017-03-21 08:56:08 -07003566 if (!q->poll_cb)
3567 goto err_exit;
3568
Ming Lei1db49092018-11-20 09:44:35 +08003569 if (blk_mq_alloc_ctxs(q))
Jes Sorensen41de54c2019-04-19 16:35:44 -04003570 goto err_poll;
Keith Busch868f2f02015-12-17 17:08:14 -07003571
Ming Lei737f98c2017-02-22 18:13:59 +08003572 /* init q->mq_kobj and sw queues' kobjects */
3573 blk_mq_sysfs_init(q);
3574
Ming Lei2f8f1332019-04-30 09:52:27 +08003575 INIT_LIST_HEAD(&q->unused_hctx_list);
3576 spin_lock_init(&q->unused_hctx_lock);
3577
Keith Busch868f2f02015-12-17 17:08:14 -07003578 blk_mq_realloc_hw_ctxs(set, q);
3579 if (!q->nr_hw_queues)
3580 goto err_hctxs;
Jens Axboe320ae512013-10-24 09:20:05 +01003581
Christoph Hellwig287922e2015-10-30 20:57:30 +08003582 INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
Ming Leie56f6982015-07-16 19:53:22 +08003583 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
Jens Axboe320ae512013-10-24 09:20:05 +01003584
Jens Axboea8908932018-10-16 14:23:06 -06003585 q->tag_set = set;
Jens Axboe320ae512013-10-24 09:20:05 +01003586
Jens Axboe94eddfb2013-11-19 09:25:07 -07003587 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
Ming Leicd191812018-12-18 12:15:29 +08003588 if (set->nr_maps > HCTX_TYPE_POLL &&
3589 set->map[HCTX_TYPE_POLL].nr_queues)
Christoph Hellwig6544d222018-12-02 17:46:28 +01003590 blk_queue_flag_set(QUEUE_FLAG_POLL, q);
Jens Axboe320ae512013-10-24 09:20:05 +01003591
Mike Snitzer28494502016-09-14 13:28:30 -04003592 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
Christoph Hellwig6fca6a62014-05-28 08:08:02 -06003593 INIT_LIST_HEAD(&q->requeue_list);
3594 spin_lock_init(&q->requeue_lock);
3595
Jens Axboeeba71762014-05-20 15:17:27 -06003596 q->nr_requests = set->queue_depth;
3597
Jens Axboe64f1c212016-11-14 13:03:03 -07003598 /*
3599 * Default to classic polling
3600 */
Yufen Yu29ece8b2019-03-18 22:44:41 +08003601 q->poll_nsec = BLK_MQ_POLL_CLASSIC;
Jens Axboe64f1c212016-11-14 13:03:03 -07003602
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003603 blk_mq_init_cpu_queues(q, set->nr_hw_queues);
Jens Axboe0d2602c2014-05-13 15:10:52 -06003604 blk_mq_add_queue_tag_set(set, q);
Christoph Hellwig4b855ad2017-06-26 12:20:57 +02003605 blk_mq_map_swqueue(q);
Christoph Hellwig26a97502021-06-02 09:53:17 +03003606 return 0;
Christoph Hellwig18741982014-02-10 09:29:00 -07003607
Jens Axboe320ae512013-10-24 09:20:05 +01003608err_hctxs:
Keith Busch868f2f02015-12-17 17:08:14 -07003609 kfree(q->queue_hw_ctx);
zhengbin73d9c8d2019-07-23 22:10:42 +08003610 q->nr_hw_queues = 0;
Ming Lei1db49092018-11-20 09:44:35 +08003611 blk_mq_sysfs_deinit(q);
Jes Sorensen41de54c2019-04-19 16:35:44 -04003612err_poll:
3613 blk_stat_free_callback(q->poll_cb);
3614 q->poll_cb = NULL;
Ming Linc7de5722016-05-25 23:23:27 -07003615err_exit:
3616 q->mq_ops = NULL;
Christoph Hellwig26a97502021-06-02 09:53:17 +03003617 return -ENOMEM;
Jens Axboe320ae512013-10-24 09:20:05 +01003618}
Mike Snitzerb62c21b2015-03-12 23:56:02 -04003619EXPORT_SYMBOL(blk_mq_init_allocated_queue);
Jens Axboe320ae512013-10-24 09:20:05 +01003620
Ming Leic7e2d942019-04-30 09:52:25 +08003621/* tags can _not_ be used after returning from blk_mq_exit_queue */
3622void blk_mq_exit_queue(struct request_queue *q)
Jens Axboe320ae512013-10-24 09:20:05 +01003623{
Bart Van Assche630ef622021-05-13 10:15:29 -07003624 struct blk_mq_tag_set *set = q->tag_set;
Jens Axboe320ae512013-10-24 09:20:05 +01003625
Bart Van Assche630ef622021-05-13 10:15:29 -07003626 /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
Ming Lei624dbe42014-05-27 23:35:13 +08003627 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
Bart Van Assche630ef622021-05-13 10:15:29 -07003628 /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
3629 blk_mq_del_queue_tag_set(q);
Jens Axboe320ae512013-10-24 09:20:05 +01003630}
Jens Axboe320ae512013-10-24 09:20:05 +01003631
Jens Axboea5164402014-09-10 09:02:03 -06003632static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
3633{
3634 int i;
3635
John Garry079a2e32021-10-05 18:23:39 +08003636 if (blk_mq_is_shared_tags(set->flags)) {
3637 set->shared_tags = blk_mq_alloc_map_and_rqs(set,
John Garrye155b0c2021-10-05 18:23:37 +08003638 BLK_MQ_NO_HCTX_IDX,
3639 set->queue_depth);
John Garry079a2e32021-10-05 18:23:39 +08003640 if (!set->shared_tags)
John Garrye155b0c2021-10-05 18:23:37 +08003641 return -ENOMEM;
3642 }
3643
Xianting Tian8229cca2020-09-26 10:39:47 +08003644 for (i = 0; i < set->nr_hw_queues; i++) {
John Garry63064be2021-10-05 18:23:35 +08003645 if (!__blk_mq_alloc_map_and_rqs(set, i))
Jens Axboea5164402014-09-10 09:02:03 -06003646 goto out_unwind;
Xianting Tian8229cca2020-09-26 10:39:47 +08003647 cond_resched();
3648 }
Jens Axboea5164402014-09-10 09:02:03 -06003649
3650 return 0;
3651
3652out_unwind:
John Garrye155b0c2021-10-05 18:23:37 +08003653 while (--i >= 0)
3654 __blk_mq_free_map_and_rqs(set, i);
3655
John Garry079a2e32021-10-05 18:23:39 +08003656 if (blk_mq_is_shared_tags(set->flags)) {
3657 blk_mq_free_map_and_rqs(set, set->shared_tags,
John Garrye155b0c2021-10-05 18:23:37 +08003658 BLK_MQ_NO_HCTX_IDX);
John Garry645db342021-10-05 18:23:36 +08003659 }
Jens Axboea5164402014-09-10 09:02:03 -06003660
Jens Axboea5164402014-09-10 09:02:03 -06003661 return -ENOMEM;
3662}
3663
3664/*
3665 * Allocate the request maps associated with this tag_set. Note that this
3666 * may reduce the depth asked for, if memory is tight. set->queue_depth
3667 * will be updated to reflect the allocated depth.
3668 */
John Garry63064be2021-10-05 18:23:35 +08003669static int blk_mq_alloc_set_map_and_rqs(struct blk_mq_tag_set *set)
Jens Axboea5164402014-09-10 09:02:03 -06003670{
3671 unsigned int depth;
3672 int err;
3673
3674 depth = set->queue_depth;
3675 do {
3676 err = __blk_mq_alloc_rq_maps(set);
3677 if (!err)
3678 break;
3679
3680 set->queue_depth >>= 1;
3681 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
3682 err = -ENOMEM;
3683 break;
3684 }
3685 } while (set->queue_depth);
3686
3687 if (!set->queue_depth || err) {
3688 pr_err("blk-mq: failed to allocate request map\n");
3689 return -ENOMEM;
3690 }
3691
3692 if (depth != set->queue_depth)
3693 pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
3694 depth, set->queue_depth);
3695
3696 return 0;
3697}
3698
Omar Sandovalebe8bdd2017-04-07 08:53:11 -06003699static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
3700{
Bart Van Assche6e66b492020-03-09 21:26:17 -07003701 /*
3702 * blk_mq_map_queues() and multiple .map_queues() implementations
3703 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
3704 * number of hardware queues.
3705 */
3706 if (set->nr_maps == 1)
3707 set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
3708
Ming Lei59388702018-12-07 11:03:53 +08003709 if (set->ops->map_queues && !is_kdump_kernel()) {
Jens Axboeb3c661b2018-10-30 10:36:06 -06003710 int i;
3711
Ming Lei7d4901a2018-01-06 16:27:39 +08003712 /*
3713 * transport .map_queues is usually done in the following
3714 * way:
3715 *
3716 * for (queue = 0; queue < set->nr_hw_queues; queue++) {
3717 * mask = get_cpu_mask(queue)
3718 * for_each_cpu(cpu, mask)
Jens Axboeb3c661b2018-10-30 10:36:06 -06003719 * set->map[x].mq_map[cpu] = queue;
Ming Lei7d4901a2018-01-06 16:27:39 +08003720 * }
3721 *
3722 * When we need to remap, the table has to be cleared for
3723 * killing stale mapping since one CPU may not be mapped
3724 * to any hw queue.
3725 */
Jens Axboeb3c661b2018-10-30 10:36:06 -06003726 for (i = 0; i < set->nr_maps; i++)
3727 blk_mq_clear_mq_map(&set->map[i]);
Ming Lei7d4901a2018-01-06 16:27:39 +08003728
Omar Sandovalebe8bdd2017-04-07 08:53:11 -06003729 return set->ops->map_queues(set);
Jens Axboeb3c661b2018-10-30 10:36:06 -06003730 } else {
3731 BUG_ON(set->nr_maps > 1);
Dongli Zhang7d76f852019-02-27 21:35:01 +08003732 return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
Jens Axboeb3c661b2018-10-30 10:36:06 -06003733 }
Omar Sandovalebe8bdd2017-04-07 08:53:11 -06003734}
3735
Bart Van Asschef7e76db2019-10-25 09:50:10 -07003736static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
3737 int cur_nr_hw_queues, int new_nr_hw_queues)
3738{
3739 struct blk_mq_tags **new_tags;
3740
3741 if (cur_nr_hw_queues >= new_nr_hw_queues)
3742 return 0;
3743
3744 new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
3745 GFP_KERNEL, set->numa_node);
3746 if (!new_tags)
3747 return -ENOMEM;
3748
3749 if (set->tags)
3750 memcpy(new_tags, set->tags, cur_nr_hw_queues *
3751 sizeof(*set->tags));
3752 kfree(set->tags);
3753 set->tags = new_tags;
3754 set->nr_hw_queues = new_nr_hw_queues;
3755
3756 return 0;
3757}
3758
Minwoo Im91cdf262020-12-05 00:20:53 +09003759static int blk_mq_alloc_tag_set_tags(struct blk_mq_tag_set *set,
3760 int new_nr_hw_queues)
3761{
3762 return blk_mq_realloc_tag_set_tags(set, 0, new_nr_hw_queues);
3763}
3764
Jens Axboea4391c62014-06-05 15:21:56 -06003765/*
3766 * Alloc a tag set to be associated with one or more request queues.
3767 * May fail with EINVAL for various error conditions. May adjust the
Minwoo Imc018c842018-06-30 22:12:41 +09003768 * requested depth down, if it's too large. In that case, the set
Jens Axboea4391c62014-06-05 15:21:56 -06003769 * value will be stored in set->queue_depth.
3770 */
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003771int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
3772{
Jens Axboeb3c661b2018-10-30 10:36:06 -06003773 int i, ret;
Christoph Hellwigda695ba2016-09-14 16:18:55 +02003774
Bart Van Assche205fb5f2014-10-30 14:45:11 +01003775 BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
3776
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003777 if (!set->nr_hw_queues)
3778 return -EINVAL;
Jens Axboea4391c62014-06-05 15:21:56 -06003779 if (!set->queue_depth)
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003780 return -EINVAL;
3781 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
3782 return -EINVAL;
3783
Christoph Hellwig7d7e0f92016-09-14 16:18:54 +02003784 if (!set->ops->queue_rq)
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003785 return -EINVAL;
3786
Ming Leide148292017-10-14 17:22:29 +08003787 if (!set->ops->get_budget ^ !set->ops->put_budget)
3788 return -EINVAL;
3789
Jens Axboea4391c62014-06-05 15:21:56 -06003790 if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
3791 pr_info("blk-mq: reduced tag depth to %u\n",
3792 BLK_MQ_MAX_DEPTH);
3793 set->queue_depth = BLK_MQ_MAX_DEPTH;
3794 }
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003795
Jens Axboeb3c661b2018-10-30 10:36:06 -06003796 if (!set->nr_maps)
3797 set->nr_maps = 1;
3798 else if (set->nr_maps > HCTX_MAX_TYPES)
3799 return -EINVAL;
3800
Shaohua Li6637fad2014-11-30 16:00:58 -08003801 /*
3802 * If a crashdump is active, then we are potentially in a very
3803 * memory constrained environment. Limit us to 1 queue and
3804 * 64 tags to prevent using too much memory.
3805 */
3806 if (is_kdump_kernel()) {
3807 set->nr_hw_queues = 1;
Ming Lei59388702018-12-07 11:03:53 +08003808 set->nr_maps = 1;
Shaohua Li6637fad2014-11-30 16:00:58 -08003809 set->queue_depth = min(64U, set->queue_depth);
3810 }
Keith Busch868f2f02015-12-17 17:08:14 -07003811 /*
Jens Axboe392546a2018-10-29 13:25:27 -06003812 * There is no use for more h/w queues than cpus if we just have
3813 * a single map
Keith Busch868f2f02015-12-17 17:08:14 -07003814 */
Jens Axboe392546a2018-10-29 13:25:27 -06003815 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
Keith Busch868f2f02015-12-17 17:08:14 -07003816 set->nr_hw_queues = nr_cpu_ids;
Shaohua Li6637fad2014-11-30 16:00:58 -08003817
Minwoo Im91cdf262020-12-05 00:20:53 +09003818 if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0)
Jens Axboea5164402014-09-10 09:02:03 -06003819 return -ENOMEM;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003820
Christoph Hellwigda695ba2016-09-14 16:18:55 +02003821 ret = -ENOMEM;
Jens Axboeb3c661b2018-10-30 10:36:06 -06003822 for (i = 0; i < set->nr_maps; i++) {
3823 set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
Ming Lei07b35eb2018-12-17 18:42:45 +08003824 sizeof(set->map[i].mq_map[0]),
Jens Axboeb3c661b2018-10-30 10:36:06 -06003825 GFP_KERNEL, set->numa_node);
3826 if (!set->map[i].mq_map)
3827 goto out_free_mq_map;
Ming Lei59388702018-12-07 11:03:53 +08003828 set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
Jens Axboeb3c661b2018-10-30 10:36:06 -06003829 }
Christoph Hellwigbdd17e72016-09-14 16:18:53 +02003830
Omar Sandovalebe8bdd2017-04-07 08:53:11 -06003831 ret = blk_mq_update_queue_map(set);
Christoph Hellwigda695ba2016-09-14 16:18:55 +02003832 if (ret)
3833 goto out_free_mq_map;
3834
John Garry63064be2021-10-05 18:23:35 +08003835 ret = blk_mq_alloc_set_map_and_rqs(set);
Christoph Hellwigda695ba2016-09-14 16:18:55 +02003836 if (ret)
Christoph Hellwigbdd17e72016-09-14 16:18:53 +02003837 goto out_free_mq_map;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003838
Jens Axboe0d2602c2014-05-13 15:10:52 -06003839 mutex_init(&set->tag_list_lock);
3840 INIT_LIST_HEAD(&set->tag_list);
3841
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003842 return 0;
Christoph Hellwigbdd17e72016-09-14 16:18:53 +02003843
3844out_free_mq_map:
Jens Axboeb3c661b2018-10-30 10:36:06 -06003845 for (i = 0; i < set->nr_maps; i++) {
3846 kfree(set->map[i].mq_map);
3847 set->map[i].mq_map = NULL;
3848 }
Robert Elliott5676e7b2014-09-02 11:38:44 -05003849 kfree(set->tags);
3850 set->tags = NULL;
Christoph Hellwigda695ba2016-09-14 16:18:55 +02003851 return ret;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003852}
3853EXPORT_SYMBOL(blk_mq_alloc_tag_set);
3854
Christoph Hellwigcdb14e02021-06-02 09:53:16 +03003855/* allocate and initialize a tagset for a simple single-queue device */
3856int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
3857 const struct blk_mq_ops *ops, unsigned int queue_depth,
3858 unsigned int set_flags)
3859{
3860 memset(set, 0, sizeof(*set));
3861 set->ops = ops;
3862 set->nr_hw_queues = 1;
3863 set->nr_maps = 1;
3864 set->queue_depth = queue_depth;
3865 set->numa_node = NUMA_NO_NODE;
3866 set->flags = set_flags;
3867 return blk_mq_alloc_tag_set(set);
3868}
3869EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set);
3870
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003871void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
3872{
Jens Axboeb3c661b2018-10-30 10:36:06 -06003873 int i, j;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003874
John Garrye155b0c2021-10-05 18:23:37 +08003875 for (i = 0; i < set->nr_hw_queues; i++)
3876 __blk_mq_free_map_and_rqs(set, i);
Jens Axboe484b4062014-05-21 14:01:15 -06003877
John Garry079a2e32021-10-05 18:23:39 +08003878 if (blk_mq_is_shared_tags(set->flags)) {
3879 blk_mq_free_map_and_rqs(set, set->shared_tags,
John Garrye155b0c2021-10-05 18:23:37 +08003880 BLK_MQ_NO_HCTX_IDX);
3881 }
John Garry32bc15a2020-08-19 23:20:24 +08003882
Jens Axboeb3c661b2018-10-30 10:36:06 -06003883 for (j = 0; j < set->nr_maps; j++) {
3884 kfree(set->map[j].mq_map);
3885 set->map[j].mq_map = NULL;
3886 }
Christoph Hellwigbdd17e72016-09-14 16:18:53 +02003887
Ming Lei981bd182014-04-24 00:07:34 +08003888 kfree(set->tags);
Robert Elliott5676e7b2014-09-02 11:38:44 -05003889 set->tags = NULL;
Christoph Hellwig24d2f902014-04-15 14:14:00 -06003890}
3891EXPORT_SYMBOL(blk_mq_free_tag_set);
3892
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003893int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
3894{
3895 struct blk_mq_tag_set *set = q->tag_set;
3896 struct blk_mq_hw_ctx *hctx;
3897 int i, ret;
3898
Jens Axboebd166ef2017-01-17 06:03:22 -07003899 if (!set)
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003900 return -EINVAL;
3901
Aleksei Zakharove5fa8142019-02-08 19:14:05 +03003902 if (q->nr_requests == nr)
3903 return 0;
3904
Jens Axboe70f36b62017-01-19 10:59:07 -07003905 blk_mq_freeze_queue(q);
Ming Lei24f5a902018-01-06 16:27:38 +08003906 blk_mq_quiesce_queue(q);
Jens Axboe70f36b62017-01-19 10:59:07 -07003907
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003908 ret = 0;
3909 queue_for_each_hw_ctx(q, hctx, i) {
Keith Busche9137d42016-02-18 14:56:35 -07003910 if (!hctx->tags)
3911 continue;
Jens Axboebd166ef2017-01-17 06:03:22 -07003912 /*
3913 * If we're using an MQ scheduler, just update the scheduler
3914 * queue depth. This is similar to what the old code would do.
3915 */
John Garryf6adcef2021-10-05 18:23:29 +08003916 if (hctx->sched_tags) {
Jens Axboe70f36b62017-01-19 10:59:07 -07003917 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
John Garryf6adcef2021-10-05 18:23:29 +08003918 nr, true);
John Garryf6adcef2021-10-05 18:23:29 +08003919 } else {
3920 ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
3921 false);
Jens Axboe70f36b62017-01-19 10:59:07 -07003922 }
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003923 if (ret)
3924 break;
Jens Axboe77f1e0a2019-01-18 10:34:16 -07003925 if (q->elevator && q->elevator->type->ops.depth_updated)
3926 q->elevator->type->ops.depth_updated(hctx);
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003927 }
John Garryd97e5942021-05-13 20:00:58 +08003928 if (!ret) {
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003929 q->nr_requests = nr;
John Garry079a2e32021-10-05 18:23:39 +08003930 if (blk_mq_is_shared_tags(set->flags)) {
John Garry8fa04462021-10-05 18:23:28 +08003931 if (q->elevator)
John Garry079a2e32021-10-05 18:23:39 +08003932 blk_mq_tag_update_sched_shared_tags(q);
John Garry8fa04462021-10-05 18:23:28 +08003933 else
John Garry079a2e32021-10-05 18:23:39 +08003934 blk_mq_tag_resize_shared_tags(set, nr);
John Garry8fa04462021-10-05 18:23:28 +08003935 }
John Garryd97e5942021-05-13 20:00:58 +08003936 }
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003937
Ming Lei24f5a902018-01-06 16:27:38 +08003938 blk_mq_unquiesce_queue(q);
Jens Axboe70f36b62017-01-19 10:59:07 -07003939 blk_mq_unfreeze_queue(q);
Jens Axboe70f36b62017-01-19 10:59:07 -07003940
Jens Axboee3a2b3f2014-05-20 11:49:02 -06003941 return ret;
3942}
3943
Jianchao Wangd48ece22018-08-21 15:15:03 +08003944/*
3945 * request_queue and elevator_type pair.
3946 * It is just used by __blk_mq_update_nr_hw_queues to cache
3947 * the elevator_type associated with a request_queue.
3948 */
3949struct blk_mq_qe_pair {
3950 struct list_head node;
3951 struct request_queue *q;
3952 struct elevator_type *type;
3953};
3954
3955/*
3956 * Cache the elevator_type in qe pair list and switch the
3957 * io scheduler to 'none'
3958 */
3959static bool blk_mq_elv_switch_none(struct list_head *head,
3960 struct request_queue *q)
3961{
3962 struct blk_mq_qe_pair *qe;
3963
3964 if (!q->elevator)
3965 return true;
3966
3967 qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
3968 if (!qe)
3969 return false;
3970
3971 INIT_LIST_HEAD(&qe->node);
3972 qe->q = q;
3973 qe->type = q->elevator->type;
3974 list_add(&qe->node, head);
3975
3976 mutex_lock(&q->sysfs_lock);
3977 /*
3978 * After elevator_switch_mq, the previous elevator_queue will be
3979 * released by elevator_release. The reference of the io scheduler
3980 * module get by elevator_get will also be put. So we need to get
3981 * a reference of the io scheduler module here to prevent it to be
3982 * removed.
3983 */
3984 __module_get(qe->type->elevator_owner);
3985 elevator_switch_mq(q, NULL);
3986 mutex_unlock(&q->sysfs_lock);
3987
3988 return true;
3989}
3990
3991static void blk_mq_elv_switch_back(struct list_head *head,
3992 struct request_queue *q)
3993{
3994 struct blk_mq_qe_pair *qe;
3995 struct elevator_type *t = NULL;
3996
3997 list_for_each_entry(qe, head, node)
3998 if (qe->q == q) {
3999 t = qe->type;
4000 break;
4001 }
4002
4003 if (!t)
4004 return;
4005
4006 list_del(&qe->node);
4007 kfree(qe);
4008
4009 mutex_lock(&q->sysfs_lock);
4010 elevator_switch_mq(q, t);
4011 mutex_unlock(&q->sysfs_lock);
4012}
4013
Keith Busche4dc2b32017-05-30 14:39:11 -04004014static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
4015 int nr_hw_queues)
Keith Busch868f2f02015-12-17 17:08:14 -07004016{
4017 struct request_queue *q;
Jianchao Wangd48ece22018-08-21 15:15:03 +08004018 LIST_HEAD(head);
Jianchao Wange01ad462018-10-12 18:07:28 +08004019 int prev_nr_hw_queues;
Keith Busch868f2f02015-12-17 17:08:14 -07004020
Bart Van Assche705cda92017-04-07 11:16:49 -07004021 lockdep_assert_held(&set->tag_list_lock);
4022
Jens Axboe392546a2018-10-29 13:25:27 -06004023 if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
Keith Busch868f2f02015-12-17 17:08:14 -07004024 nr_hw_queues = nr_cpu_ids;
Weiping Zhangfe35ec52020-06-17 14:18:37 +08004025 if (nr_hw_queues < 1)
4026 return;
4027 if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
Keith Busch868f2f02015-12-17 17:08:14 -07004028 return;
4029
4030 list_for_each_entry(q, &set->tag_list, tag_set_list)
4031 blk_mq_freeze_queue(q);
Jianchao Wangd48ece22018-08-21 15:15:03 +08004032 /*
4033 * Switch IO scheduler to 'none', cleaning up the data associated
4034 * with the previous scheduler. We will switch back once we are done
4035 * updating the new sw to hw queue mappings.
4036 */
4037 list_for_each_entry(q, &set->tag_list, tag_set_list)
4038 if (!blk_mq_elv_switch_none(&head, q))
4039 goto switch_back;
Keith Busch868f2f02015-12-17 17:08:14 -07004040
Jianchao Wang477e19d2018-10-12 18:07:25 +08004041 list_for_each_entry(q, &set->tag_list, tag_set_list) {
4042 blk_mq_debugfs_unregister_hctxs(q);
4043 blk_mq_sysfs_unregister(q);
4044 }
4045
Weiping Zhanga2584e42020-05-07 21:03:56 +08004046 prev_nr_hw_queues = set->nr_hw_queues;
Bart Van Asschef7e76db2019-10-25 09:50:10 -07004047 if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
4048 0)
4049 goto reregister;
4050
Keith Busch868f2f02015-12-17 17:08:14 -07004051 set->nr_hw_queues = nr_hw_queues;
Jianchao Wange01ad462018-10-12 18:07:28 +08004052fallback:
Weiping Zhangaa880ad2020-05-13 08:44:05 +08004053 blk_mq_update_queue_map(set);
Keith Busch868f2f02015-12-17 17:08:14 -07004054 list_for_each_entry(q, &set->tag_list, tag_set_list) {
4055 blk_mq_realloc_hw_ctxs(set, q);
Jianchao Wange01ad462018-10-12 18:07:28 +08004056 if (q->nr_hw_queues != set->nr_hw_queues) {
4057 pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
4058 nr_hw_queues, prev_nr_hw_queues);
4059 set->nr_hw_queues = prev_nr_hw_queues;
Dongli Zhang7d76f852019-02-27 21:35:01 +08004060 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
Jianchao Wange01ad462018-10-12 18:07:28 +08004061 goto fallback;
4062 }
Jianchao Wang477e19d2018-10-12 18:07:25 +08004063 blk_mq_map_swqueue(q);
4064 }
4065
Bart Van Asschef7e76db2019-10-25 09:50:10 -07004066reregister:
Jianchao Wang477e19d2018-10-12 18:07:25 +08004067 list_for_each_entry(q, &set->tag_list, tag_set_list) {
4068 blk_mq_sysfs_register(q);
4069 blk_mq_debugfs_register_hctxs(q);
Keith Busch868f2f02015-12-17 17:08:14 -07004070 }
4071
Jianchao Wangd48ece22018-08-21 15:15:03 +08004072switch_back:
4073 list_for_each_entry(q, &set->tag_list, tag_set_list)
4074 blk_mq_elv_switch_back(&head, q);
4075
Keith Busch868f2f02015-12-17 17:08:14 -07004076 list_for_each_entry(q, &set->tag_list, tag_set_list)
4077 blk_mq_unfreeze_queue(q);
4078}
Keith Busche4dc2b32017-05-30 14:39:11 -04004079
4080void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
4081{
4082 mutex_lock(&set->tag_list_lock);
4083 __blk_mq_update_nr_hw_queues(set, nr_hw_queues);
4084 mutex_unlock(&set->tag_list_lock);
4085}
Keith Busch868f2f02015-12-17 17:08:14 -07004086EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
4087
Omar Sandoval34dbad52017-03-21 08:56:08 -07004088/* Enable polling stats and return whether they were already enabled. */
4089static bool blk_poll_stats_enable(struct request_queue *q)
4090{
4091 if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
Bart Van Assche7dfdbc72018-03-07 17:10:05 -08004092 blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
Omar Sandoval34dbad52017-03-21 08:56:08 -07004093 return true;
4094 blk_stat_add_callback(q, q->poll_cb);
4095 return false;
4096}
4097
4098static void blk_mq_poll_stats_start(struct request_queue *q)
4099{
4100 /*
4101 * We don't arm the callback if polling stats are not enabled or the
4102 * callback is already active.
4103 */
4104 if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
4105 blk_stat_is_active(q->poll_cb))
4106 return;
4107
4108 blk_stat_activate_msecs(q->poll_cb, 100);
4109}
4110
4111static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
4112{
4113 struct request_queue *q = cb->data;
Stephen Bates720b8cc2017-04-07 06:24:03 -06004114 int bucket;
Omar Sandoval34dbad52017-03-21 08:56:08 -07004115
Stephen Bates720b8cc2017-04-07 06:24:03 -06004116 for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
4117 if (cb->stat[bucket].nr_samples)
4118 q->poll_stat[bucket] = cb->stat[bucket];
4119 }
Omar Sandoval34dbad52017-03-21 08:56:08 -07004120}
4121
Jens Axboe64f1c212016-11-14 13:03:03 -07004122static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
Jens Axboe64f1c212016-11-14 13:03:03 -07004123 struct request *rq)
4124{
Jens Axboe64f1c212016-11-14 13:03:03 -07004125 unsigned long ret = 0;
Stephen Bates720b8cc2017-04-07 06:24:03 -06004126 int bucket;
Jens Axboe64f1c212016-11-14 13:03:03 -07004127
4128 /*
4129 * If stats collection isn't on, don't sleep but turn it on for
4130 * future users
4131 */
Omar Sandoval34dbad52017-03-21 08:56:08 -07004132 if (!blk_poll_stats_enable(q))
Jens Axboe64f1c212016-11-14 13:03:03 -07004133 return 0;
4134
4135 /*
Jens Axboe64f1c212016-11-14 13:03:03 -07004136 * As an optimistic guess, use half of the mean service time
4137 * for this type of request. We can (and should) make this smarter.
4138 * For instance, if the completion latencies are tight, we can
4139 * get closer than just half the mean. This is especially
4140 * important on devices where the completion latencies are longer
Stephen Bates720b8cc2017-04-07 06:24:03 -06004141 * than ~10 usec. We do use the stats for the relevant IO size
4142 * if available which does lead to better estimates.
Jens Axboe64f1c212016-11-14 13:03:03 -07004143 */
Stephen Bates720b8cc2017-04-07 06:24:03 -06004144 bucket = blk_mq_poll_stats_bkt(rq);
4145 if (bucket < 0)
4146 return ret;
4147
4148 if (q->poll_stat[bucket].nr_samples)
4149 ret = (q->poll_stat[bucket].mean + 1) / 2;
Jens Axboe64f1c212016-11-14 13:03:03 -07004150
4151 return ret;
4152}
4153
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004154static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
Jens Axboe06426ad2016-11-14 13:01:59 -07004155{
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004156 struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
4157 struct request *rq = blk_qc_to_rq(hctx, qc);
Jens Axboe06426ad2016-11-14 13:01:59 -07004158 struct hrtimer_sleeper hs;
4159 enum hrtimer_mode mode;
Jens Axboe64f1c212016-11-14 13:03:03 -07004160 unsigned int nsecs;
Jens Axboe06426ad2016-11-14 13:01:59 -07004161 ktime_t kt;
4162
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004163 /*
4164 * If a request has completed on queue that uses an I/O scheduler, we
4165 * won't get back a request from blk_qc_to_rq.
4166 */
4167 if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
Jens Axboe64f1c212016-11-14 13:03:03 -07004168 return false;
4169
4170 /*
Jens Axboe1052b8a2018-11-26 08:21:49 -07004171 * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
Jens Axboe64f1c212016-11-14 13:03:03 -07004172 *
Jens Axboe64f1c212016-11-14 13:03:03 -07004173 * 0: use half of prev avg
4174 * >0: use this specific value
4175 */
Jens Axboe1052b8a2018-11-26 08:21:49 -07004176 if (q->poll_nsec > 0)
Jens Axboe64f1c212016-11-14 13:03:03 -07004177 nsecs = q->poll_nsec;
4178 else
John Garrycae740a2020-02-26 20:10:15 +08004179 nsecs = blk_mq_poll_nsecs(q, rq);
Jens Axboe64f1c212016-11-14 13:03:03 -07004180
4181 if (!nsecs)
Jens Axboe06426ad2016-11-14 13:01:59 -07004182 return false;
4183
Jens Axboe76a86f92018-01-10 11:30:56 -07004184 rq->rq_flags |= RQF_MQ_POLL_SLEPT;
Jens Axboe06426ad2016-11-14 13:01:59 -07004185
4186 /*
4187 * This will be replaced with the stats tracking code, using
4188 * 'avg_completion_time / 2' as the pre-sleep target.
4189 */
Thomas Gleixner8b0e1952016-12-25 12:30:41 +01004190 kt = nsecs;
Jens Axboe06426ad2016-11-14 13:01:59 -07004191
4192 mode = HRTIMER_MODE_REL;
Sebastian Andrzej Siewiordbc16252019-07-26 20:30:50 +02004193 hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
Jens Axboe06426ad2016-11-14 13:01:59 -07004194 hrtimer_set_expires(&hs.timer, kt);
4195
Jens Axboe06426ad2016-11-14 13:01:59 -07004196 do {
Tejun Heo5a61c362018-01-09 08:29:52 -08004197 if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
Jens Axboe06426ad2016-11-14 13:01:59 -07004198 break;
4199 set_current_state(TASK_UNINTERRUPTIBLE);
Thomas Gleixner9dd88132019-07-30 21:16:55 +02004200 hrtimer_sleeper_start_expires(&hs, mode);
Jens Axboe06426ad2016-11-14 13:01:59 -07004201 if (hs.task)
4202 io_schedule();
4203 hrtimer_cancel(&hs.timer);
4204 mode = HRTIMER_MODE_ABS;
4205 } while (hs.task && !signal_pending(current));
4206
4207 __set_current_state(TASK_RUNNING);
4208 destroy_hrtimer_on_stack(&hs.timer);
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004209
4210 /*
4211 * If we sleep, have the caller restart the poll loop to reset the
4212 * state. Like for the other success return cases, the caller is
4213 * responsible for checking if the IO completed. If the IO isn't
4214 * complete, we'll get called again and will go straight to the busy
4215 * poll loop.
4216 */
Jens Axboe06426ad2016-11-14 13:01:59 -07004217 return true;
4218}
4219
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004220static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
Jens Axboe5a72e892021-10-12 09:24:29 -06004221 struct io_comp_batch *iob, unsigned int flags)
Jens Axboebbd7bb72016-11-04 09:34:34 -06004222{
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004223 struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
4224 long state = get_current_state();
4225 int ret;
Jens Axboe1052b8a2018-11-26 08:21:49 -07004226
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004227 do {
Jens Axboe5a72e892021-10-12 09:24:29 -06004228 ret = q->mq_ops->poll(hctx, iob);
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004229 if (ret > 0) {
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004230 __set_current_state(TASK_RUNNING);
4231 return ret;
4232 }
4233
4234 if (signal_pending_state(state, current))
4235 __set_current_state(TASK_RUNNING);
4236 if (task_is_running(current))
4237 return 1;
4238
Christoph Hellwigef99b2d2021-10-12 13:12:19 +02004239 if (ret < 0 || (flags & BLK_POLL_ONESHOT))
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004240 break;
4241 cpu_relax();
4242 } while (!need_resched());
4243
4244 __set_current_state(TASK_RUNNING);
4245 return 0;
Jens Axboe1052b8a2018-11-26 08:21:49 -07004246}
4247
Jens Axboe5a72e892021-10-12 09:24:29 -06004248int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
4249 unsigned int flags)
Jens Axboe1052b8a2018-11-26 08:21:49 -07004250{
Christoph Hellwigd729cf92021-10-12 13:12:20 +02004251 if (!(flags & BLK_POLL_NOSLEEP) &&
4252 q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004253 if (blk_mq_poll_hybrid(q, cookie))
Jens Axboe85f4d4b2018-11-06 13:30:55 -07004254 return 1;
Christoph Hellwigc6699d62021-10-12 13:12:16 +02004255 }
Jens Axboe5a72e892021-10-12 09:24:29 -06004256 return blk_mq_poll_classic(q, cookie, iob, flags);
Jens Axboebbd7bb72016-11-04 09:34:34 -06004257}
4258
Jens Axboe9cf2bab2018-10-31 17:01:22 -06004259unsigned int blk_mq_rq_cpu(struct request *rq)
4260{
4261 return rq->mq_ctx->cpu;
4262}
4263EXPORT_SYMBOL(blk_mq_rq_cpu);
4264
Jens Axboe320ae512013-10-24 09:20:05 +01004265static int __init blk_mq_init(void)
4266{
Christoph Hellwigc3077b52020-06-11 08:44:41 +02004267 int i;
4268
4269 for_each_possible_cpu(i)
Sebastian Andrzej Siewiorf9ab4912021-01-23 21:10:27 +01004270 init_llist_head(&per_cpu(blk_cpu_done, i));
Christoph Hellwigc3077b52020-06-11 08:44:41 +02004271 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
4272
4273 cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
4274 "block/softirq:dead", NULL,
4275 blk_softirq_cpu_dead);
Thomas Gleixner9467f852016-09-22 08:05:17 -06004276 cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
4277 blk_mq_hctx_notify_dead);
Ming Leibf0beec2020-05-29 15:53:15 +02004278 cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
4279 blk_mq_hctx_notify_online,
4280 blk_mq_hctx_notify_offline);
Jens Axboe320ae512013-10-24 09:20:05 +01004281 return 0;
4282}
4283subsys_initcall(blk_mq_init);