Blame - block/blk-mq-sched.c - SHIFTPHONES/kernel/common

blob: df8581bb0a37083d3d579ee5f70b4e653fc1457e [file] [log] [blame]

Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	1	/*
				2	* blk-mq scheduling framework
				3	*
				4	* Copyright (C) 2016 Jens Axboe
				5	*/
				6	#include <linux/kernel.h>
				7	#include <linux/module.h>
				8	#include <linux/blk-mq.h>
				9
				10	#include <trace/events/block.h>
				11
				12	#include "blk.h"
				13	#include "blk-mq.h"
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	14	#include "blk-mq-debugfs.h"
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	15	#include "blk-mq-sched.h"
				16	#include "blk-mq-tag.h"
				17	#include "blk-wbt.h"
				18
				19	void blk_mq_sched_free_hctx_data(struct request_queue *q,
				20	void (exit)(struct blk_mq_hw_ctx ))
				21	{
				22	struct blk_mq_hw_ctx *hctx;
				23	int i;
				24
				25	queue_for_each_hw_ctx(q, hctx, i) {
				26	if (exit && hctx->sched_data)
				27	exit(hctx);
				28	kfree(hctx->sched_data);
				29	hctx->sched_data = NULL;
				30	}
				31	}
				32	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
				33
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	34	void blk_mq_sched_assign_ioc(struct request rq, struct bio bio)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	35	{
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	36	struct request_queue *q = rq->q;
				37	struct io_context *ioc = rq_ioc(bio);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	38	struct io_cq *icq;
				39
				40	spin_lock_irq(q->queue_lock);
				41	icq = ioc_lookup_icq(ioc, q);
				42	spin_unlock_irq(q->queue_lock);
				43
				44	if (!icq) {
				45	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
				46	if (!icq)
				47	return;
				48	}
Christoph Hellwig	ea511e3	2017-06-16 18:15:20 +0200	[diff] [blame]	49	get_io_context(icq->ioc);
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	50	rq->elv.icq = icq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	51	}
				52
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	53	/*
				54	* Mark a hardware queue as needing a restart. For shared queues, maintain
				55	* a count of how many hardware queues are marked for restart.
				56	*/
				57	static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
				58	{
				59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				60	return;
				61
				62	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				63	struct request_queue *q = hctx->queue;
				64
				65	if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				66	atomic_inc(&q->shared_hctx_restart);
				67	} else
				68	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				69	}
				70
				71	static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
				72	{
				73	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				74	return false;
				75
				76	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				77	struct request_queue *q = hctx->queue;
				78
				79	if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				80	atomic_dec(&q->shared_hctx_restart);
				81	} else
				82	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				83
				84	if (blk_mq_hctx_has_pending(hctx)) {
				85	blk_mq_run_hw_queue(hctx, true);
				86	return true;
				87	}
				88
				89	return false;
				90	}
				91
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	92	/* return true if hctx need to run again */
				93	static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	94	{
				95	struct request_queue *q = hctx->queue;
				96	struct elevator_queue *e = q->elevator;
				97	LIST_HEAD(rq_list);
				98
				99	do {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	100	struct request *rq;
				101	blk_status_t ret;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	102
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	103	if (e->type->ops.mq.has_work &&
				104	!e->type->ops.mq.has_work(hctx))
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	105	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	106
				107	ret = blk_mq_get_dispatch_budget(hctx);
				108	if (ret == BLK_STS_RESOURCE)
				109	return true;
				110
				111	rq = e->type->ops.mq.dispatch_request(hctx);
				112	if (!rq) {
				113	blk_mq_put_dispatch_budget(hctx);
				114	break;
				115	} else if (ret != BLK_STS_OK) {
				116	blk_mq_end_request(rq, ret);
				117	continue;
				118	}
				119
				120	/*
				121	* Now this rq owns the budget which has to be released
				122	* if this rq won't be queued to driver via .queue_rq()
				123	* in blk_mq_dispatch_rq_list().
				124	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	125	list_add(&rq->queuelist, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	126	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
				127
				128	return false;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	129	}
				130
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame^]	131	static struct blk_mq_ctx blk_mq_next_ctx(struct blk_mq_hw_ctx hctx,
				132	struct blk_mq_ctx *ctx)
				133	{
				134	unsigned idx = ctx->index_hw;
				135
				136	if (++idx == hctx->nr_ctx)
				137	idx = 0;
				138
				139	return hctx->ctxs[idx];
				140	}
				141
				142	/* return true if hctx need to run again */
				143	static bool blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
				144	{
				145	struct request_queue *q = hctx->queue;
				146	LIST_HEAD(rq_list);
				147	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
				148
				149	do {
				150	struct request *rq;
				151	blk_status_t ret;
				152
				153	if (!sbitmap_any_bit_set(&hctx->ctx_map))
				154	break;
				155
				156	ret = blk_mq_get_dispatch_budget(hctx);
				157	if (ret == BLK_STS_RESOURCE)
				158	return true;
				159
				160	rq = blk_mq_dequeue_from_ctx(hctx, ctx);
				161	if (!rq) {
				162	blk_mq_put_dispatch_budget(hctx);
				163	break;
				164	} else if (ret != BLK_STS_OK) {
				165	blk_mq_end_request(rq, ret);
				166	continue;
				167	}
				168
				169	/*
				170	* Now this rq owns the budget which has to be released
				171	* if this rq won't be queued to driver via .queue_rq()
				172	* in blk_mq_dispatch_rq_list().
				173	*/
				174	list_add(&rq->queuelist, &rq_list);
				175
				176	/* round robin for fair dispatch */
				177	ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
				178
				179	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
				180
				181	WRITE_ONCE(hctx->dispatch_from, ctx);
				182
				183	return false;
				184	}
				185
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	186	/* return true if hw queue need to be run again */
				187	bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	188	{
Omar Sandoval	81380ca	2017-04-07 08:56:26 -0600	[diff] [blame]	189	struct request_queue *q = hctx->queue;
				190	struct elevator_queue *e = q->elevator;
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	191	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	192	LIST_HEAD(rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	193	bool run_queue = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	194
Ming Lei	f4560ff	2017-06-18 14:24:27 -0600	[diff] [blame]	195	/* RCU or SRCU read lock is needed before checking quiesced flag */
				196	if (unlikely(blk_mq_hctx_stopped(hctx) \|\| blk_queue_quiesced(q)))
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	197	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	198
				199	hctx->run++;
				200
				201	/*
				202	* If we have previous entries on our dispatch list, grab them first for
				203	* more fair dispatch.
				204	*/
				205	if (!list_empty_careful(&hctx->dispatch)) {
				206	spin_lock(&hctx->lock);
				207	if (!list_empty(&hctx->dispatch))
				208	list_splice_init(&hctx->dispatch, &rq_list);
				209	spin_unlock(&hctx->lock);
				210	}
				211
				212	/*
				213	* Only ask the scheduler for requests, if we didn't have residual
				214	* requests from the dispatch list. This is to avoid the case where
				215	* we only ever dispatch a fraction of the requests available because
				216	* of low device queue depth. Once we pull requests out of the IO
				217	* scheduler, we can no longer merge or sort them. So it's best to
				218	* leave them there for as long as we can. Mark the hw queue as
				219	* needing a restart in that case.
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	220	*
Ming Lei	5e3d02b	2017-10-14 17:22:25 +0800	[diff] [blame]	221	* We want to dispatch from the scheduler if there was nothing
				222	* on the dispatch list or we were able to dispatch from the
				223	* dispatch list.
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	224	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	225	if (!list_empty(&rq_list)) {
				226	blk_mq_sched_mark_restart_hctx(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame^]	227	if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
				228	if (has_sched_dispatch)
				229	run_queue = blk_mq_do_dispatch_sched(hctx);
				230	else
				231	run_queue = blk_mq_do_dispatch_ctx(hctx);
				232	}
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	233	} else if (has_sched_dispatch) {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	234	run_queue = blk_mq_do_dispatch_sched(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame^]	235	} else if (q->mq_ops->get_budget) {
				236	/*
				237	* If we need to get budget before queuing request, we
				238	* dequeue request one by one from sw queue for avoiding
				239	* to mess up I/O merge when dispatch runs out of resource.
				240	*
				241	* TODO: get more budgets, and dequeue more requests in
				242	* one time.
				243	*/
				244	run_queue = blk_mq_do_dispatch_ctx(hctx);
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	245	} else {
				246	blk_mq_flush_busy_ctxs(hctx, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	247	blk_mq_dispatch_rq_list(q, &rq_list, false);
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	248	}
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	249
				250	if (run_queue && !blk_mq_sched_needs_restart(hctx) &&
				251	!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) {
				252	blk_mq_sched_mark_restart_hctx(hctx);
				253	return true;
				254	}
				255
				256	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	257	}
				258
Jens Axboe	e4d750c	2017-02-03 09:48:28 -0700	[diff] [blame]	259	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio,
				260	struct request **merged_request)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	261	{
				262	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	263
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	264	switch (elv_merge(q, &rq, bio)) {
				265	case ELEVATOR_BACK_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	266	if (!blk_mq_sched_allow_merge(q, rq, bio))
				267	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	268	if (!bio_attempt_back_merge(q, rq, bio))
				269	return false;
				270	*merged_request = attempt_back_merge(q, rq);
				271	if (!*merged_request)
				272	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
				273	return true;
				274	case ELEVATOR_FRONT_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	275	if (!blk_mq_sched_allow_merge(q, rq, bio))
				276	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	277	if (!bio_attempt_front_merge(q, rq, bio))
				278	return false;
				279	*merged_request = attempt_front_merge(q, rq);
				280	if (!*merged_request)
				281	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
				282	return true;
				283	default:
				284	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	285	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	286	}
				287	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
				288
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	289	/*
				290	* Reverse check our software queue for entries that we could potentially
				291	* merge with. Currently includes a hand-wavy stop count of 8, to not spend
				292	* too much time checking for merges.
				293	*/
				294	static bool blk_mq_attempt_merge(struct request_queue *q,
				295	struct blk_mq_ctx ctx, struct bio bio)
				296	{
				297	struct request *rq;
				298	int checked = 8;
				299
Bart Van Assche	7b60781	2017-06-20 11:15:47 -0700	[diff] [blame]	300	lockdep_assert_held(&ctx->lock);
				301
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	302	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
				303	bool merged = false;
				304
				305	if (!checked--)
				306	break;
				307
				308	if (!blk_rq_merge_ok(rq, bio))
				309	continue;
				310
				311	switch (blk_try_merge(rq, bio)) {
				312	case ELEVATOR_BACK_MERGE:
				313	if (blk_mq_sched_allow_merge(q, rq, bio))
				314	merged = bio_attempt_back_merge(q, rq, bio);
				315	break;
				316	case ELEVATOR_FRONT_MERGE:
				317	if (blk_mq_sched_allow_merge(q, rq, bio))
				318	merged = bio_attempt_front_merge(q, rq, bio);
				319	break;
				320	case ELEVATOR_DISCARD_MERGE:
				321	merged = bio_attempt_discard_merge(q, rq, bio);
				322	break;
				323	default:
				324	continue;
				325	}
				326
				327	if (merged)
				328	ctx->rq_merged++;
				329	return merged;
				330	}
				331
				332	return false;
				333	}
				334
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	335	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
				336	{
				337	struct elevator_queue *e = q->elevator;
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	338	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				339	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				340	bool ret = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	341
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	342	if (e && e->type->ops.mq.bio_merge) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	343	blk_mq_put_ctx(ctx);
				344	return e->type->ops.mq.bio_merge(hctx, bio);
				345	}
				346
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	347	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
				348	/* default per sw-queue merge */
				349	spin_lock(&ctx->lock);
				350	ret = blk_mq_attempt_merge(q, ctx, bio);
				351	spin_unlock(&ctx->lock);
				352	}
				353
				354	blk_mq_put_ctx(ctx);
				355	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	356	}
				357
				358	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
				359	{
				360	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
				361	}
				362	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
				363
				364	void blk_mq_sched_request_inserted(struct request *rq)
				365	{
				366	trace_block_rq_insert(rq->q, rq);
				367	}
				368	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
				369
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	370	static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
				371	struct request *rq)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	372	{
				373	if (rq->tag == -1) {
				374	rq->rq_flags \|= RQF_SORTED;
				375	return false;
				376	}
				377
				378	/*
				379	* If we already have a real request tag, send directly to
				380	* the dispatch list.
				381	*/
				382	spin_lock(&hctx->lock);
				383	list_add(&rq->queuelist, &hctx->dispatch);
				384	spin_unlock(&hctx->lock);
				385	return true;
				386	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	387
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	388	/**
				389	* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
				390	* @pos: loop cursor.
				391	* @skip: the list element that will not be examined. Iteration starts at
				392	* @skip->next.
				393	* @head: head of the list to examine. This list must have at least one
				394	* element, namely @skip.
				395	* @member: name of the list_head structure within typeof(*pos).
				396	*/
				397	#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
				398	for ((pos) = (skip); \
				399	(pos = (pos)->member.next != (head) ? list_entry_rcu( \
				400	(pos)->member.next, typeof(*pos), member) : \
				401	list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
				402	(pos) != (skip); )
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	403
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	404	/*
				405	* Called after a driver tag has been freed to check whether a hctx needs to
				406	* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
				407	* queues in a round-robin fashion if the tag set of @hctx is shared with other
				408	* hardware queues.
				409	*/
				410	void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
				411	{
				412	struct blk_mq_tags *const tags = hctx->tags;
				413	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
				414	struct request_queue const queue = hctx->queue, q;
				415	struct blk_mq_hw_ctx *hctx2;
				416	unsigned int i, j;
				417
				418	if (set->flags & BLK_MQ_F_TAG_SHARED) {
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	419	/*
				420	* If this is 0, then we know that no hardware queues
				421	* have RESTART marked. We're done.
				422	*/
				423	if (!atomic_read(&queue->shared_hctx_restart))
				424	return;
				425
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	426	rcu_read_lock();
				427	list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
				428	tag_set_list) {
				429	queue_for_each_hw_ctx(q, hctx2, i)
				430	if (hctx2->tags == tags &&
				431	blk_mq_sched_restart_hctx(hctx2))
				432	goto done;
Omar Sandoval	d38d351	2017-02-22 10:58:30 -0800	[diff] [blame]	433	}
Bart Van Assche	6d8c6c0	2017-04-07 12:40:09 -0600	[diff] [blame]	434	j = hctx->queue_num + 1;
				435	for (i = 0; i < queue->nr_hw_queues; i++, j++) {
				436	if (j == queue->nr_hw_queues)
				437	j = 0;
				438	hctx2 = queue->queue_hw_ctx[j];
				439	if (hctx2->tags == tags &&
				440	blk_mq_sched_restart_hctx(hctx2))
				441	break;
				442	}
				443	done:
				444	rcu_read_unlock();
Omar Sandoval	d38d351	2017-02-22 10:58:30 -0800	[diff] [blame]	445	} else {
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	446	blk_mq_sched_restart_hctx(hctx);
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	447	}
				448	}
				449
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	450	/*
				451	* Add flush/fua to the queue. If we fail getting a driver tag, then
				452	* punt to the requeue list. Requeue will re-invoke us from a context
				453	* that's safe to block from.
				454	*/
				455	static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
				456	struct request *rq, bool can_block)
				457	{
				458	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
				459	blk_insert_flush(rq);
				460	blk_mq_run_hw_queue(hctx, true);
				461	} else
Jens Axboe	c7a571b	2017-02-17 11:37:14 -0700	[diff] [blame]	462	blk_mq_add_to_requeue_list(rq, false, true);
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	463	}
				464
				465	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
				466	bool run_queue, bool async, bool can_block)
				467	{
				468	struct request_queue *q = rq->q;
				469	struct elevator_queue *e = q->elevator;
				470	struct blk_mq_ctx *ctx = rq->mq_ctx;
				471	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				472
Jens Axboe	f3a8ab7	2017-01-27 09:08:23 -0700	[diff] [blame]	473	if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	474	blk_mq_sched_insert_flush(hctx, rq, can_block);
				475	return;
				476	}
				477
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	478	if (e && blk_mq_sched_bypass_insert(hctx, rq))
				479	goto run;
				480
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	481	if (e && e->type->ops.mq.insert_requests) {
				482	LIST_HEAD(list);
				483
				484	list_add(&rq->queuelist, &list);
				485	e->type->ops.mq.insert_requests(hctx, &list, at_head);
				486	} else {
				487	spin_lock(&ctx->lock);
				488	__blk_mq_insert_request(hctx, rq, at_head);
				489	spin_unlock(&ctx->lock);
				490	}
				491
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	492	run:
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	493	if (run_queue)
				494	blk_mq_run_hw_queue(hctx, async);
				495	}
				496
				497	void blk_mq_sched_insert_requests(struct request_queue *q,
				498	struct blk_mq_ctx *ctx,
				499	struct list_head *list, bool run_queue_async)
				500	{
				501	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				502	struct elevator_queue *e = hctx->queue->elevator;
				503
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	504	if (e) {
				505	struct request rq, next;
				506
				507	/*
				508	* We bypass requests that already have a driver tag assigned,
				509	* which should only be flushes. Flushes are only ever inserted
				510	* as single requests, so we shouldn't ever hit the
				511	* WARN_ON_ONCE() below (but let's handle it just in case).
				512	*/
				513	list_for_each_entry_safe(rq, next, list, queuelist) {
				514	if (WARN_ON_ONCE(rq->tag != -1)) {
				515	list_del_init(&rq->queuelist);
				516	blk_mq_sched_bypass_insert(hctx, rq);
				517	}
				518	}
				519	}
				520
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	521	if (e && e->type->ops.mq.insert_requests)
				522	e->type->ops.mq.insert_requests(hctx, list, false);
				523	else
				524	blk_mq_insert_requests(hctx, ctx, list);
				525
				526	blk_mq_run_hw_queue(hctx, run_queue_async);
				527	}
				528
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	529	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				530	struct blk_mq_hw_ctx *hctx,
				531	unsigned int hctx_idx)
				532	{
				533	if (hctx->sched_tags) {
				534	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
				535	blk_mq_free_rq_map(hctx->sched_tags);
				536	hctx->sched_tags = NULL;
				537	}
				538	}
				539
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	540	static int blk_mq_sched_alloc_tags(struct request_queue *q,
				541	struct blk_mq_hw_ctx *hctx,
				542	unsigned int hctx_idx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	543	{
				544	struct blk_mq_tag_set *set = q->tag_set;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	545	int ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	546
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	547	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
				548	set->reserved_tags);
				549	if (!hctx->sched_tags)
				550	return -ENOMEM;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	551
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	552	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
				553	if (ret)
				554	blk_mq_sched_free_tags(set, hctx, hctx_idx);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	555
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	556	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	557	}
				558
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	559	static void blk_mq_sched_tags_teardown(struct request_queue *q)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	560	{
				561	struct blk_mq_tag_set *set = q->tag_set;
				562	struct blk_mq_hw_ctx *hctx;
				563	int i;
				564
				565	queue_for_each_hw_ctx(q, hctx, i)
				566	blk_mq_sched_free_tags(set, hctx, i);
				567	}
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	568
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	569	int blk_mq_sched_init_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				570	unsigned int hctx_idx)
				571	{
				572	struct elevator_queue *e = q->elevator;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	573	int ret;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	574
				575	if (!e)
				576	return 0;
				577
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	578	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
				579	if (ret)
				580	return ret;
				581
				582	if (e->type->ops.mq.init_hctx) {
				583	ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
				584	if (ret) {
				585	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				586	return ret;
				587	}
				588	}
				589
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	590	blk_mq_debugfs_register_sched_hctx(q, hctx);
				591
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	592	return 0;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	593	}
				594
				595	void blk_mq_sched_exit_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				596	unsigned int hctx_idx)
				597	{
				598	struct elevator_queue *e = q->elevator;
				599
				600	if (!e)
				601	return;
				602
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	603	blk_mq_debugfs_unregister_sched_hctx(hctx);
				604
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	605	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				606	e->type->ops.mq.exit_hctx(hctx, hctx_idx);
				607	hctx->sched_data = NULL;
				608	}
				609
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	610	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				611	}
				612
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	613	int blk_mq_init_sched(struct request_queue q, struct elevator_type e)
				614	{
				615	struct blk_mq_hw_ctx *hctx;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	616	struct elevator_queue *eq;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	617	unsigned int i;
				618	int ret;
				619
				620	if (!e) {
				621	q->elevator = NULL;
				622	return 0;
				623	}
				624
				625	/*
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	626	* Default to double of smaller one between hw queue_depth and 128,
				627	* since we don't split into sync/async like the old code did.
				628	* Additionally, this is a per-hw queue depth.
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	629	*/
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	630	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
				631	BLKDEV_MAX_RQ);
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	632
				633	queue_for_each_hw_ctx(q, hctx, i) {
				634	ret = blk_mq_sched_alloc_tags(q, hctx, i);
				635	if (ret)
				636	goto err;
				637	}
				638
				639	ret = e->ops.mq.init_sched(q, e);
				640	if (ret)
				641	goto err;
				642
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	643	blk_mq_debugfs_register_sched(q);
				644
				645	queue_for_each_hw_ctx(q, hctx, i) {
				646	if (e->ops.mq.init_hctx) {
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	647	ret = e->ops.mq.init_hctx(hctx, i);
				648	if (ret) {
				649	eq = q->elevator;
				650	blk_mq_exit_sched(q, eq);
				651	kobject_put(&eq->kobj);
				652	return ret;
				653	}
				654	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	655	blk_mq_debugfs_register_sched_hctx(q, hctx);
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	656	}
				657
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	658	return 0;
				659
				660	err:
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	661	blk_mq_sched_tags_teardown(q);
				662	q->elevator = NULL;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	663	return ret;
				664	}
				665
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	666	void blk_mq_exit_sched(struct request_queue q, struct elevator_queue e)
				667	{
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	668	struct blk_mq_hw_ctx *hctx;
				669	unsigned int i;
				670
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	671	queue_for_each_hw_ctx(q, hctx, i) {
				672	blk_mq_debugfs_unregister_sched_hctx(hctx);
				673	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				674	e->type->ops.mq.exit_hctx(hctx, i);
				675	hctx->sched_data = NULL;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	676	}
				677	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	678	blk_mq_debugfs_unregister_sched(q);
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	679	if (e->type->ops.mq.exit_sched)
				680	e->type->ops.mq.exit_sched(e);
				681	blk_mq_sched_tags_teardown(q);
				682	q->elevator = NULL;
				683	}
				684
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	685	int blk_mq_sched_init(struct request_queue *q)
				686	{
				687	int ret;
				688
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	689	mutex_lock(&q->sysfs_lock);
				690	ret = elevator_init(q, NULL);
				691	mutex_unlock(&q->sysfs_lock);
				692
				693	return ret;
				694	}