Blame - block/blk-mq-sched.c - SHIFTPHONES/mainline/linux

blob: 8e525e66a0d971aaa50d5b57cd27471f7cc96e6e [file] [log] [blame]

Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	1	/*
				2	* blk-mq scheduling framework
				3	*
				4	* Copyright (C) 2016 Jens Axboe
				5	*/
				6	#include <linux/kernel.h>
				7	#include <linux/module.h>
				8	#include <linux/blk-mq.h>
				9
				10	#include <trace/events/block.h>
				11
				12	#include "blk.h"
				13	#include "blk-mq.h"
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	14	#include "blk-mq-debugfs.h"
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	15	#include "blk-mq-sched.h"
				16	#include "blk-mq-tag.h"
				17	#include "blk-wbt.h"
				18
				19	void blk_mq_sched_free_hctx_data(struct request_queue *q,
				20	void (exit)(struct blk_mq_hw_ctx ))
				21	{
				22	struct blk_mq_hw_ctx *hctx;
				23	int i;
				24
				25	queue_for_each_hw_ctx(q, hctx, i) {
				26	if (exit && hctx->sched_data)
				27	exit(hctx);
				28	kfree(hctx->sched_data);
				29	hctx->sched_data = NULL;
				30	}
				31	}
				32	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
				33
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	34	void blk_mq_sched_assign_ioc(struct request rq, struct bio bio)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	35	{
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	36	struct request_queue *q = rq->q;
				37	struct io_context *ioc = rq_ioc(bio);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	38	struct io_cq *icq;
				39
				40	spin_lock_irq(q->queue_lock);
				41	icq = ioc_lookup_icq(ioc, q);
				42	spin_unlock_irq(q->queue_lock);
				43
				44	if (!icq) {
				45	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
				46	if (!icq)
				47	return;
				48	}
Christoph Hellwig	ea511e3	2017-06-16 18:15:20 +0200	[diff] [blame]	49	get_io_context(icq->ioc);
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	50	rq->elv.icq = icq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	51	}
				52
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	53	/*
				54	* Mark a hardware queue as needing a restart. For shared queues, maintain
				55	* a count of how many hardware queues are marked for restart.
				56	*/
				57	static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
				58	{
				59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				60	return;
				61
				62	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				63	struct request_queue *q = hctx->queue;
				64
				65	if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				66	atomic_inc(&q->shared_hctx_restart);
				67	} else
				68	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				69	}
				70
				71	static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
				72	{
				73	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				74	return false;
				75
				76	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				77	struct request_queue *q = hctx->queue;
				78
				79	if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				80	atomic_dec(&q->shared_hctx_restart);
				81	} else
				82	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				83
				84	if (blk_mq_hctx_has_pending(hctx)) {
				85	blk_mq_run_hw_queue(hctx, true);
				86	return true;
				87	}
				88
				89	return false;
				90	}
				91
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	92	/* return true if hctx need to run again */
				93	static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	94	{
				95	struct request_queue *q = hctx->queue;
				96	struct elevator_queue *e = q->elevator;
				97	LIST_HEAD(rq_list);
				98
				99	do {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	100	struct request *rq;
				101	blk_status_t ret;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	102
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	103	if (e->type->ops.mq.has_work &&
				104	!e->type->ops.mq.has_work(hctx))
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	105	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	106
				107	ret = blk_mq_get_dispatch_budget(hctx);
				108	if (ret == BLK_STS_RESOURCE)
				109	return true;
				110
				111	rq = e->type->ops.mq.dispatch_request(hctx);
				112	if (!rq) {
				113	blk_mq_put_dispatch_budget(hctx);
				114	break;
				115	} else if (ret != BLK_STS_OK) {
				116	blk_mq_end_request(rq, ret);
				117	continue;
				118	}
				119
				120	/*
				121	* Now this rq owns the budget which has to be released
				122	* if this rq won't be queued to driver via .queue_rq()
				123	* in blk_mq_dispatch_rq_list().
				124	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	125	list_add(&rq->queuelist, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	126	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
				127
				128	return false;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	129	}
				130
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	131	/* return true if hw queue need to be run again */
				132	bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	133	{
Omar Sandoval	81380ca	2017-04-07 08:56:26 -0600	[diff] [blame]	134	struct request_queue *q = hctx->queue;
				135	struct elevator_queue *e = q->elevator;
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	136	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	137	LIST_HEAD(rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	138	bool run_queue = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	139
Ming Lei	f4560ff	2017-06-18 14:24:27 -0600	[diff] [blame]	140	/* RCU or SRCU read lock is needed before checking quiesced flag */
				141	if (unlikely(blk_mq_hctx_stopped(hctx) \|\| blk_queue_quiesced(q)))
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	142	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	143
				144	hctx->run++;
				145
				146	/*
				147	* If we have previous entries on our dispatch list, grab them first for
				148	* more fair dispatch.
				149	*/
				150	if (!list_empty_careful(&hctx->dispatch)) {
				151	spin_lock(&hctx->lock);
				152	if (!list_empty(&hctx->dispatch))
				153	list_splice_init(&hctx->dispatch, &rq_list);
				154	spin_unlock(&hctx->lock);
				155	}
				156
				157	/*
				158	* Only ask the scheduler for requests, if we didn't have residual
				159	* requests from the dispatch list. This is to avoid the case where
				160	* we only ever dispatch a fraction of the requests available because
				161	* of low device queue depth. Once we pull requests out of the IO
				162	* scheduler, we can no longer merge or sort them. So it's best to
				163	* leave them there for as long as we can. Mark the hw queue as
				164	* needing a restart in that case.
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	165	*
Ming Lei	5e3d02b	2017-10-14 17:22:25 +0800	[diff] [blame]	166	* We want to dispatch from the scheduler if there was nothing
				167	* on the dispatch list or we were able to dispatch from the
				168	* dispatch list.
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	169	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	170	if (!list_empty(&rq_list)) {
				171	blk_mq_sched_mark_restart_hctx(hctx);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	172	if (blk_mq_dispatch_rq_list(q, &rq_list, false) &&
				173	has_sched_dispatch)
				174	run_queue = blk_mq_do_dispatch_sched(hctx);
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	175	} else if (has_sched_dispatch) {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	176	run_queue = blk_mq_do_dispatch_sched(hctx);
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	177	} else {
				178	blk_mq_flush_busy_ctxs(hctx, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	179	blk_mq_dispatch_rq_list(q, &rq_list, false);
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	180	}
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame^]	181
				182	if (run_queue && !blk_mq_sched_needs_restart(hctx) &&
				183	!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) {
				184	blk_mq_sched_mark_restart_hctx(hctx);
				185	return true;
				186	}
				187
				188	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	189	}
				190
Jens Axboe	e4d750c	2017-02-03 09:48:28 -0700	[diff] [blame]	191	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio,
				192	struct request **merged_request)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	193	{
				194	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	195
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	196	switch (elv_merge(q, &rq, bio)) {
				197	case ELEVATOR_BACK_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	198	if (!blk_mq_sched_allow_merge(q, rq, bio))
				199	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	200	if (!bio_attempt_back_merge(q, rq, bio))
				201	return false;
				202	*merged_request = attempt_back_merge(q, rq);
				203	if (!*merged_request)
				204	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
				205	return true;
				206	case ELEVATOR_FRONT_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	207	if (!blk_mq_sched_allow_merge(q, rq, bio))
				208	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	209	if (!bio_attempt_front_merge(q, rq, bio))
				210	return false;
				211	*merged_request = attempt_front_merge(q, rq);
				212	if (!*merged_request)
				213	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
				214	return true;
				215	default:
				216	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	217	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	218	}
				219	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
				220
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	221	/*
				222	* Reverse check our software queue for entries that we could potentially
				223	* merge with. Currently includes a hand-wavy stop count of 8, to not spend
				224	* too much time checking for merges.
				225	*/
				226	static bool blk_mq_attempt_merge(struct request_queue *q,
				227	struct blk_mq_ctx ctx, struct bio bio)
				228	{
				229	struct request *rq;
				230	int checked = 8;
				231
Bart Van Assche	7b60781	2017-06-20 11:15:47 -0700	[diff] [blame]	232	lockdep_assert_held(&ctx->lock);
				233
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	234	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
				235	bool merged = false;
				236
				237	if (!checked--)
				238	break;
				239
				240	if (!blk_rq_merge_ok(rq, bio))
				241	continue;
				242
				243	switch (blk_try_merge(rq, bio)) {
				244	case ELEVATOR_BACK_MERGE:
				245	if (blk_mq_sched_allow_merge(q, rq, bio))
				246	merged = bio_attempt_back_merge(q, rq, bio);
				247	break;
				248	case ELEVATOR_FRONT_MERGE:
				249	if (blk_mq_sched_allow_merge(q, rq, bio))
				250	merged = bio_attempt_front_merge(q, rq, bio);
				251	break;
				252	case ELEVATOR_DISCARD_MERGE:
				253	merged = bio_attempt_discard_merge(q, rq, bio);
				254	break;
				255	default:
				256	continue;
				257	}
				258
				259	if (merged)
				260	ctx->rq_merged++;
				261	return merged;
				262	}
				263
				264	return false;
				265	}
				266
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	267	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
				268	{
				269	struct elevator_queue *e = q->elevator;
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	270	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				271	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				272	bool ret = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	273
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	274	if (e && e->type->ops.mq.bio_merge) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	275	blk_mq_put_ctx(ctx);
				276	return e->type->ops.mq.bio_merge(hctx, bio);
				277	}
				278
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	279	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
				280	/* default per sw-queue merge */
				281	spin_lock(&ctx->lock);
				282	ret = blk_mq_attempt_merge(q, ctx, bio);
				283	spin_unlock(&ctx->lock);
				284	}
				285
				286	blk_mq_put_ctx(ctx);
				287	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	288	}
				289
				290	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
				291	{
				292	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
				293	}
				294	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
				295
				296	void blk_mq_sched_request_inserted(struct request *rq)
				297	{
				298	trace_block_rq_insert(rq->q, rq);
				299	}
				300	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
				301
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	302	static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
				303	struct request *rq)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	304	{
				305	if (rq->tag == -1) {
				306	rq->rq_flags \|= RQF_SORTED;
				307	return false;
				308	}
				309
				310	/*
				311	* If we already have a real request tag, send directly to
				312	* the dispatch list.
				313	*/
				314	spin_lock(&hctx->lock);
				315	list_add(&rq->queuelist, &hctx->dispatch);
				316	spin_unlock(&hctx->lock);
				317	return true;
				318	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	319
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	320	/**
				321	* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
				322	* @pos: loop cursor.
				323	* @skip: the list element that will not be examined. Iteration starts at
				324	* @skip->next.
				325	* @head: head of the list to examine. This list must have at least one
				326	* element, namely @skip.
				327	* @member: name of the list_head structure within typeof(*pos).
				328	*/
				329	#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
				330	for ((pos) = (skip); \
				331	(pos = (pos)->member.next != (head) ? list_entry_rcu( \
				332	(pos)->member.next, typeof(*pos), member) : \
				333	list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
				334	(pos) != (skip); )
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	335
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	336	/*
				337	* Called after a driver tag has been freed to check whether a hctx needs to
				338	* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
				339	* queues in a round-robin fashion if the tag set of @hctx is shared with other
				340	* hardware queues.
				341	*/
				342	void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
				343	{
				344	struct blk_mq_tags *const tags = hctx->tags;
				345	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
				346	struct request_queue const queue = hctx->queue, q;
				347	struct blk_mq_hw_ctx *hctx2;
				348	unsigned int i, j;
				349
				350	if (set->flags & BLK_MQ_F_TAG_SHARED) {
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	351	/*
				352	* If this is 0, then we know that no hardware queues
				353	* have RESTART marked. We're done.
				354	*/
				355	if (!atomic_read(&queue->shared_hctx_restart))
				356	return;
				357
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	358	rcu_read_lock();
				359	list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
				360	tag_set_list) {
				361	queue_for_each_hw_ctx(q, hctx2, i)
				362	if (hctx2->tags == tags &&
				363	blk_mq_sched_restart_hctx(hctx2))
				364	goto done;
Omar Sandoval	d38d351	2017-02-22 10:58:30 -0800	[diff] [blame]	365	}
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	366	j = hctx->queue_num + 1;
				367	for (i = 0; i < queue->nr_hw_queues; i++, j++) {
				368	if (j == queue->nr_hw_queues)
				369	j = 0;
				370	hctx2 = queue->queue_hw_ctx[j];
				371	if (hctx2->tags == tags &&
				372	blk_mq_sched_restart_hctx(hctx2))
				373	break;
				374	}
				375	done:
				376	rcu_read_unlock();
Omar Sandoval	d38d351	2017-02-22 10:58:30 -0800	[diff] [blame]	377	} else {
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	378	blk_mq_sched_restart_hctx(hctx);
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	379	}
				380	}
				381
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	382	/*
				383	* Add flush/fua to the queue. If we fail getting a driver tag, then
				384	* punt to the requeue list. Requeue will re-invoke us from a context
				385	* that's safe to block from.
				386	*/
				387	static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
				388	struct request *rq, bool can_block)
				389	{
				390	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
				391	blk_insert_flush(rq);
				392	blk_mq_run_hw_queue(hctx, true);
				393	} else
Jens Axboe	c7a571b	2017-02-17 11:37:14 -0700	[diff] [blame]	394	blk_mq_add_to_requeue_list(rq, false, true);
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	395	}
				396
				397	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
				398	bool run_queue, bool async, bool can_block)
				399	{
				400	struct request_queue *q = rq->q;
				401	struct elevator_queue *e = q->elevator;
				402	struct blk_mq_ctx *ctx = rq->mq_ctx;
				403	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				404
Jens Axboe	f3a8ab7	2017-01-27 09:08:23 -0700	[diff] [blame]	405	if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	406	blk_mq_sched_insert_flush(hctx, rq, can_block);
				407	return;
				408	}
				409
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	410	if (e && blk_mq_sched_bypass_insert(hctx, rq))
				411	goto run;
				412
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	413	if (e && e->type->ops.mq.insert_requests) {
				414	LIST_HEAD(list);
				415
				416	list_add(&rq->queuelist, &list);
				417	e->type->ops.mq.insert_requests(hctx, &list, at_head);
				418	} else {
				419	spin_lock(&ctx->lock);
				420	__blk_mq_insert_request(hctx, rq, at_head);
				421	spin_unlock(&ctx->lock);
				422	}
				423
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	424	run:
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	425	if (run_queue)
				426	blk_mq_run_hw_queue(hctx, async);
				427	}
				428
				429	void blk_mq_sched_insert_requests(struct request_queue *q,
				430	struct blk_mq_ctx *ctx,
				431	struct list_head *list, bool run_queue_async)
				432	{
				433	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				434	struct elevator_queue *e = hctx->queue->elevator;
				435
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	436	if (e) {
				437	struct request rq, next;
				438
				439	/*
				440	* We bypass requests that already have a driver tag assigned,
				441	* which should only be flushes. Flushes are only ever inserted
				442	* as single requests, so we shouldn't ever hit the
				443	* WARN_ON_ONCE() below (but let's handle it just in case).
				444	*/
				445	list_for_each_entry_safe(rq, next, list, queuelist) {
				446	if (WARN_ON_ONCE(rq->tag != -1)) {
				447	list_del_init(&rq->queuelist);
				448	blk_mq_sched_bypass_insert(hctx, rq);
				449	}
				450	}
				451	}
				452
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	453	if (e && e->type->ops.mq.insert_requests)
				454	e->type->ops.mq.insert_requests(hctx, list, false);
				455	else
				456	blk_mq_insert_requests(hctx, ctx, list);
				457
				458	blk_mq_run_hw_queue(hctx, run_queue_async);
				459	}
				460
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	461	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				462	struct blk_mq_hw_ctx *hctx,
				463	unsigned int hctx_idx)
				464	{
				465	if (hctx->sched_tags) {
				466	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
				467	blk_mq_free_rq_map(hctx->sched_tags);
				468	hctx->sched_tags = NULL;
				469	}
				470	}
				471
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	472	static int blk_mq_sched_alloc_tags(struct request_queue *q,
				473	struct blk_mq_hw_ctx *hctx,
				474	unsigned int hctx_idx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	475	{
				476	struct blk_mq_tag_set *set = q->tag_set;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	477	int ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	478
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	479	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
				480	set->reserved_tags);
				481	if (!hctx->sched_tags)
				482	return -ENOMEM;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	483
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	484	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
				485	if (ret)
				486	blk_mq_sched_free_tags(set, hctx, hctx_idx);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	487
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	488	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	489	}
				490
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	491	static void blk_mq_sched_tags_teardown(struct request_queue *q)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	492	{
				493	struct blk_mq_tag_set *set = q->tag_set;
				494	struct blk_mq_hw_ctx *hctx;
				495	int i;
				496
				497	queue_for_each_hw_ctx(q, hctx, i)
				498	blk_mq_sched_free_tags(set, hctx, i);
				499	}
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	500
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	501	int blk_mq_sched_init_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				502	unsigned int hctx_idx)
				503	{
				504	struct elevator_queue *e = q->elevator;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	505	int ret;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	506
				507	if (!e)
				508	return 0;
				509
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	510	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
				511	if (ret)
				512	return ret;
				513
				514	if (e->type->ops.mq.init_hctx) {
				515	ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
				516	if (ret) {
				517	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				518	return ret;
				519	}
				520	}
				521
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	522	blk_mq_debugfs_register_sched_hctx(q, hctx);
				523
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	524	return 0;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	525	}
				526
				527	void blk_mq_sched_exit_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				528	unsigned int hctx_idx)
				529	{
				530	struct elevator_queue *e = q->elevator;
				531
				532	if (!e)
				533	return;
				534
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	535	blk_mq_debugfs_unregister_sched_hctx(hctx);
				536
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	537	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				538	e->type->ops.mq.exit_hctx(hctx, hctx_idx);
				539	hctx->sched_data = NULL;
				540	}
				541
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	542	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				543	}
				544
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	545	int blk_mq_init_sched(struct request_queue q, struct elevator_type e)
				546	{
				547	struct blk_mq_hw_ctx *hctx;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	548	struct elevator_queue *eq;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	549	unsigned int i;
				550	int ret;
				551
				552	if (!e) {
				553	q->elevator = NULL;
				554	return 0;
				555	}
				556
				557	/*
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	558	* Default to double of smaller one between hw queue_depth and 128,
				559	* since we don't split into sync/async like the old code did.
				560	* Additionally, this is a per-hw queue depth.
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	561	*/
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	562	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
				563	BLKDEV_MAX_RQ);
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	564
				565	queue_for_each_hw_ctx(q, hctx, i) {
				566	ret = blk_mq_sched_alloc_tags(q, hctx, i);
				567	if (ret)
				568	goto err;
				569	}
				570
				571	ret = e->ops.mq.init_sched(q, e);
				572	if (ret)
				573	goto err;
				574
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	575	blk_mq_debugfs_register_sched(q);
				576
				577	queue_for_each_hw_ctx(q, hctx, i) {
				578	if (e->ops.mq.init_hctx) {
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	579	ret = e->ops.mq.init_hctx(hctx, i);
				580	if (ret) {
				581	eq = q->elevator;
				582	blk_mq_exit_sched(q, eq);
				583	kobject_put(&eq->kobj);
				584	return ret;
				585	}
				586	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	587	blk_mq_debugfs_register_sched_hctx(q, hctx);
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	588	}
				589
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	590	return 0;
				591
				592	err:
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	593	blk_mq_sched_tags_teardown(q);
				594	q->elevator = NULL;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	595	return ret;
				596	}
				597
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	598	void blk_mq_exit_sched(struct request_queue q, struct elevator_queue e)
				599	{
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	600	struct blk_mq_hw_ctx *hctx;
				601	unsigned int i;
				602
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	603	queue_for_each_hw_ctx(q, hctx, i) {
				604	blk_mq_debugfs_unregister_sched_hctx(hctx);
				605	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				606	e->type->ops.mq.exit_hctx(hctx, i);
				607	hctx->sched_data = NULL;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	608	}
				609	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	610	blk_mq_debugfs_unregister_sched(q);
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	611	if (e->type->ops.mq.exit_sched)
				612	e->type->ops.mq.exit_sched(e);
				613	blk_mq_sched_tags_teardown(q);
				614	q->elevator = NULL;
				615	}
				616
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	617	int blk_mq_sched_init(struct request_queue *q)
				618	{
				619	int ret;
				620
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	621	mutex_lock(&q->sysfs_lock);
				622	ret = elevator_init(q, NULL);
				623	mutex_unlock(&q->sysfs_lock);
				624
				625	return ret;
				626	}