Blame - block/kyber-iosched.c - SHIFTPHONES/kernel/shift/mainline

blob: a9f6fd3fab8e5d154933c11c5491098c0cce3aa2 [file] [log] [blame]

Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	1	/*
				2	* The Kyber I/O scheduler. Controls latency by throttling queue depths using
				3	* scalable techniques.
				4	*
				5	* Copyright (C) 2017 Facebook
				6	*
				7	* This program is free software; you can redistribute it and/or
				8	* modify it under the terms of the GNU General Public
				9	* License v2 as published by the Free Software Foundation.
				10	*
				11	* This program is distributed in the hope that it will be useful,
				12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	* General Public License for more details.
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* along with this program. If not, see <https://www.gnu.org/licenses/>.
				18	*/
				19
				20	#include <linux/kernel.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/blk-mq.h>
				23	#include <linux/elevator.h>
				24	#include <linux/module.h>
				25	#include <linux/sbitmap.h>
				26
				27	#include "blk.h"
				28	#include "blk-mq.h"
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	29	#include "blk-mq-debugfs.h"
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	30	#include "blk-mq-sched.h"
				31	#include "blk-mq-tag.h"
				32	#include "blk-stat.h"
				33
				34	/* Scheduling domains. */
				35	enum {
				36	KYBER_READ,
				37	KYBER_SYNC_WRITE,
				38	KYBER_OTHER, /* Async writes, discard, etc. */
				39	KYBER_NUM_DOMAINS,
				40	};
				41
				42	enum {
				43	KYBER_MIN_DEPTH = 256,
				44
				45	/*
				46	* In order to prevent starvation of synchronous requests by a flood of
				47	* asynchronous requests, we reserve 25% of requests for synchronous
				48	* operations.
				49	*/
				50	KYBER_ASYNC_PERCENT = 75,
				51	};
				52
				53	/*
				54	* Initial device-wide depths for each scheduling domain.
				55	*
				56	* Even for fast devices with lots of tags like NVMe, you can saturate
				57	* the device with only a fraction of the maximum possible queue depth.
				58	* So, we cap these to a reasonable value.
				59	*/
				60	static const unsigned int kyber_depth[] = {
				61	[KYBER_READ] = 256,
				62	[KYBER_SYNC_WRITE] = 128,
				63	[KYBER_OTHER] = 64,
				64	};
				65
				66	/*
				67	* Scheduling domain batch sizes. We favor reads.
				68	*/
				69	static const unsigned int kyber_batch_size[] = {
				70	[KYBER_READ] = 16,
				71	[KYBER_SYNC_WRITE] = 8,
				72	[KYBER_OTHER] = 8,
				73	};
				74
				75	struct kyber_queue_data {
				76	struct request_queue *q;
				77
				78	struct blk_stat_callback *cb;
				79
				80	/*
				81	* The device is divided into multiple scheduling domains based on the
				82	* request type. Each domain has a fixed number of in-flight requests of
				83	* that type device-wide, limited by these tokens.
				84	*/
				85	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
				86
				87	/*
				88	* Async request percentage, converted to per-word depth for
				89	* sbitmap_get_shallow().
				90	*/
				91	unsigned int async_depth;
				92
				93	/* Target latencies in nanoseconds. */
				94	u64 read_lat_nsec, write_lat_nsec;
				95	};
				96
				97	struct kyber_hctx_data {
				98	spinlock_t lock;
				99	struct list_head rqs[KYBER_NUM_DOMAINS];
				100	unsigned int cur_domain;
				101	unsigned int batching;
				102	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
				103	atomic_t wait_index[KYBER_NUM_DOMAINS];
				104	};
				105
Stephen Bates	a37244e	2017-04-20 15:29:16 -0600	[diff] [blame]	106	static int rq_sched_domain(const struct request *rq)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	107	{
				108	unsigned int op = rq->cmd_flags;
				109
				110	if ((op & REQ_OP_MASK) == REQ_OP_READ)
				111	return KYBER_READ;
				112	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
				113	return KYBER_SYNC_WRITE;
				114	else
				115	return KYBER_OTHER;
				116	}
				117
				118	enum {
				119	NONE = 0,
				120	GOOD = 1,
				121	GREAT = 2,
				122	BAD = -1,
				123	AWFUL = -2,
				124	};
				125
				126	#define IS_GOOD(status) ((status) > 0)
				127	#define IS_BAD(status) ((status) < 0)
				128
				129	static int kyber_lat_status(struct blk_stat_callback *cb,
				130	unsigned int sched_domain, u64 target)
				131	{
				132	u64 latency;
				133
				134	if (!cb->stat[sched_domain].nr_samples)
				135	return NONE;
				136
				137	latency = cb->stat[sched_domain].mean;
				138	if (latency >= 2 * target)
				139	return AWFUL;
				140	else if (latency > target)
				141	return BAD;
				142	else if (latency <= target / 2)
				143	return GREAT;
				144	else /* (latency <= target) */
				145	return GOOD;
				146	}
				147
				148	/*
				149	* Adjust the read or synchronous write depth given the status of reads and
				150	* writes. The goal is that the latencies of the two domains are fair (i.e., if
				151	* one is good, then the other is good).
				152	*/
				153	static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
				154	unsigned int sched_domain, int this_status,
				155	int other_status)
				156	{
				157	unsigned int orig_depth, depth;
				158
				159	/*
				160	* If this domain had no samples, or reads and writes are both good or
				161	* both bad, don't adjust the depth.
				162	*/
				163	if (this_status == NONE \|\|
				164	(IS_GOOD(this_status) && IS_GOOD(other_status)) \|\|
				165	(IS_BAD(this_status) && IS_BAD(other_status)))
				166	return;
				167
				168	orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
				169
				170	if (other_status == NONE) {
				171	depth++;
				172	} else {
				173	switch (this_status) {
				174	case GOOD:
				175	if (other_status == AWFUL)
				176	depth -= max(depth / 4, 1U);
				177	else
				178	depth -= max(depth / 8, 1U);
				179	break;
				180	case GREAT:
				181	if (other_status == AWFUL)
				182	depth /= 2;
				183	else
				184	depth -= max(depth / 4, 1U);
				185	break;
				186	case BAD:
				187	depth++;
				188	break;
				189	case AWFUL:
				190	if (other_status == GREAT)
				191	depth += 2;
				192	else
				193	depth++;
				194	break;
				195	}
				196	}
				197
				198	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
				199	if (depth != orig_depth)
				200	sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
				201	}
				202
				203	/*
				204	* Adjust the depth of other requests given the status of reads and synchronous
				205	* writes. As long as either domain is doing fine, we don't throttle, but if
				206	* both domains are doing badly, we throttle heavily.
				207	*/
				208	static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
				209	int read_status, int write_status,
				210	bool have_samples)
				211	{
				212	unsigned int orig_depth, depth;
				213	int status;
				214
				215	orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
				216
				217	if (read_status == NONE && write_status == NONE) {
				218	depth += 2;
				219	} else if (have_samples) {
				220	if (read_status == NONE)
				221	status = write_status;
				222	else if (write_status == NONE)
				223	status = read_status;
				224	else
				225	status = max(read_status, write_status);
				226	switch (status) {
				227	case GREAT:
				228	depth += 2;
				229	break;
				230	case GOOD:
				231	depth++;
				232	break;
				233	case BAD:
				234	depth -= max(depth / 4, 1U);
				235	break;
				236	case AWFUL:
				237	depth /= 2;
				238	break;
				239	}
				240	}
				241
				242	depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
				243	if (depth != orig_depth)
				244	sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
				245	}
				246
				247	/*
				248	* Apply heuristics for limiting queue depths based on gathered latency
				249	* statistics.
				250	*/
				251	static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
				252	{
				253	struct kyber_queue_data *kqd = cb->data;
				254	int read_status, write_status;
				255
				256	read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
				257	write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
				258
				259	kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
				260	kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
				261	kyber_adjust_other_depth(kqd, read_status, write_status,
				262	cb->stat[KYBER_OTHER].nr_samples != 0);
				263
				264	/*
				265	* Continue monitoring latencies if we aren't hitting the targets or
				266	* we're still throttling other requests.
				267	*/
				268	if (!blk_stat_is_active(kqd->cb) &&
				269	((IS_BAD(read_status) \|\| IS_BAD(write_status) \|\|
				270	kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
				271	blk_stat_activate_msecs(kqd->cb, 100);
				272	}
				273
				274	static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
				275	{
				276	/*
				277	* All of the hardware queues have the same depth, so we can just grab
				278	* the shift of the first one.
				279	*/
				280	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
				281	}
				282
				283	static struct kyber_queue_data kyber_queue_data_alloc(struct request_queue q)
				284	{
				285	struct kyber_queue_data *kqd;
				286	unsigned int max_tokens;
				287	unsigned int shift;
				288	int ret = -ENOMEM;
				289	int i;
				290
				291	kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
				292	if (!kqd)
				293	goto err;
				294	kqd->q = q;
				295
				296	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
				297	KYBER_NUM_DOMAINS, kqd);
				298	if (!kqd->cb)
				299	goto err_kqd;
				300
				301	/*
				302	* The maximum number of tokens for any scheduling domain is at least
				303	* the queue depth of a single hardware queue. If the hardware doesn't
				304	* have many tags, still provide a reasonable number.
				305	*/
				306	max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
				307	KYBER_MIN_DEPTH);
				308	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				309	WARN_ON(!kyber_depth[i]);
				310	WARN_ON(!kyber_batch_size[i]);
				311	ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
				312	max_tokens, -1, false, GFP_KERNEL,
				313	q->node);
				314	if (ret) {
				315	while (--i >= 0)
				316	sbitmap_queue_free(&kqd->domain_tokens[i]);
				317	goto err_cb;
				318	}
				319	sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
				320	}
				321
				322	shift = kyber_sched_tags_shift(kqd);
				323	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
				324
				325	kqd->read_lat_nsec = 2000000ULL;
				326	kqd->write_lat_nsec = 10000000ULL;
				327
				328	return kqd;
				329
				330	err_cb:
				331	blk_stat_free_callback(kqd->cb);
				332	err_kqd:
				333	kfree(kqd);
				334	err:
				335	return ERR_PTR(ret);
				336	}
				337
				338	static int kyber_init_sched(struct request_queue q, struct elevator_type e)
				339	{
				340	struct kyber_queue_data *kqd;
				341	struct elevator_queue *eq;
				342
				343	eq = elevator_alloc(q, e);
				344	if (!eq)
				345	return -ENOMEM;
				346
				347	kqd = kyber_queue_data_alloc(q);
				348	if (IS_ERR(kqd)) {
				349	kobject_put(&eq->kobj);
				350	return PTR_ERR(kqd);
				351	}
				352
				353	eq->elevator_data = kqd;
				354	q->elevator = eq;
				355
				356	blk_stat_add_callback(q, kqd->cb);
				357
				358	return 0;
				359	}
				360
				361	static void kyber_exit_sched(struct elevator_queue *e)
				362	{
				363	struct kyber_queue_data *kqd = e->elevator_data;
				364	struct request_queue *q = kqd->q;
				365	int i;
				366
				367	blk_stat_remove_callback(q, kqd->cb);
				368
				369	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
				370	sbitmap_queue_free(&kqd->domain_tokens[i]);
				371	blk_stat_free_callback(kqd->cb);
				372	kfree(kqd);
				373	}
				374
				375	static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				376	{
				377	struct kyber_hctx_data *khd;
				378	int i;
				379
				380	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
				381	if (!khd)
				382	return -ENOMEM;
				383
				384	spin_lock_init(&khd->lock);
				385
				386	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				387	INIT_LIST_HEAD(&khd->rqs[i]);
				388	INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
				389	atomic_set(&khd->wait_index[i], 0);
				390	}
				391
				392	khd->cur_domain = 0;
				393	khd->batching = 0;
				394
				395	hctx->sched_data = khd;
				396
				397	return 0;
				398	}
				399
				400	static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				401	{
				402	kfree(hctx->sched_data);
				403	}
				404
				405	static int rq_get_domain_token(struct request *rq)
				406	{
				407	return (long)rq->elv.priv[0];
				408	}
				409
				410	static void rq_set_domain_token(struct request *rq, int token)
				411	{
				412	rq->elv.priv[0] = (void *)(long)token;
				413	}
				414
				415	static void rq_clear_domain_token(struct kyber_queue_data *kqd,
				416	struct request *rq)
				417	{
				418	unsigned int sched_domain;
				419	int nr;
				420
				421	nr = rq_get_domain_token(rq);
				422	if (nr != -1) {
				423	sched_domain = rq_sched_domain(rq);
				424	sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
				425	rq->mq_ctx->cpu);
				426	}
				427	}
				428
Christoph Hellwig	5bbf4e5	2017-06-16 18:15:26 +0200	[diff] [blame^]	429	static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	430	{
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	431	/*
				432	* We use the scheduler tags as per-hardware queue queueing tokens.
				433	* Async requests can be limited at this stage.
				434	*/
Christoph Hellwig	5bbf4e5	2017-06-16 18:15:26 +0200	[diff] [blame^]	435	if (!op_is_sync(op)) {
				436	struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	437
Christoph Hellwig	5bbf4e5	2017-06-16 18:15:26 +0200	[diff] [blame^]	438	data->shallow_depth = kqd->async_depth;
				439	}
				440	}
				441
				442	static void kyber_prepare_request(struct request rq, struct bio bio)
				443	{
				444	rq_set_domain_token(rq, -1);
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	445	}
				446
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame]	447	static void kyber_finish_request(struct request *rq)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	448	{
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame]	449	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	450
				451	rq_clear_domain_token(kqd, rq);
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	452	}
				453
				454	static void kyber_completed_request(struct request *rq)
				455	{
				456	struct request_queue *q = rq->q;
				457	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				458	unsigned int sched_domain;
				459	u64 now, latency, target;
				460
				461	/*
				462	* Check if this request met our latency goal. If not, quickly gather
				463	* some statistics and start throttling.
				464	*/
				465	sched_domain = rq_sched_domain(rq);
				466	switch (sched_domain) {
				467	case KYBER_READ:
				468	target = kqd->read_lat_nsec;
				469	break;
				470	case KYBER_SYNC_WRITE:
				471	target = kqd->write_lat_nsec;
				472	break;
				473	default:
				474	return;
				475	}
				476
				477	/* If we are already monitoring latencies, don't check again. */
				478	if (blk_stat_is_active(kqd->cb))
				479	return;
				480
				481	now = __blk_stat_time(ktime_to_ns(ktime_get()));
				482	if (now < blk_stat_time(&rq->issue_stat))
				483	return;
				484
				485	latency = now - blk_stat_time(&rq->issue_stat);
				486
				487	if (latency > target)
				488	blk_stat_activate_msecs(kqd->cb, 10);
				489	}
				490
				491	static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
				492	struct blk_mq_hw_ctx *hctx)
				493	{
				494	LIST_HEAD(rq_list);
				495	struct request rq, next;
				496
				497	blk_mq_flush_busy_ctxs(hctx, &rq_list);
				498	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
				499	unsigned int sched_domain;
				500
				501	sched_domain = rq_sched_domain(rq);
				502	list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
				503	}
				504	}
				505
				506	static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
				507	void *key)
				508	{
				509	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
				510
				511	list_del_init(&wait->task_list);
				512	blk_mq_run_hw_queue(hctx, true);
				513	return 1;
				514	}
				515
				516	static int kyber_get_domain_token(struct kyber_queue_data *kqd,
				517	struct kyber_hctx_data *khd,
				518	struct blk_mq_hw_ctx *hctx)
				519	{
				520	unsigned int sched_domain = khd->cur_domain;
				521	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
				522	wait_queue_t *wait = &khd->domain_wait[sched_domain];
				523	struct sbq_wait_state *ws;
				524	int nr;
				525
				526	nr = __sbitmap_queue_get(domain_tokens);
				527	if (nr >= 0)
				528	return nr;
				529
				530	/*
				531	* If we failed to get a domain token, make sure the hardware queue is
				532	* run when one becomes available. Note that this is serialized on
				533	* khd->lock, but we still need to be careful about the waker.
				534	*/
				535	if (list_empty_careful(&wait->task_list)) {
				536	init_waitqueue_func_entry(wait, kyber_domain_wake);
				537	wait->private = hctx;
				538	ws = sbq_wait_ptr(domain_tokens,
				539	&khd->wait_index[sched_domain]);
				540	add_wait_queue(&ws->wait, wait);
				541
				542	/*
				543	* Try again in case a token was freed before we got on the wait
				544	* queue.
				545	*/
				546	nr = __sbitmap_queue_get(domain_tokens);
				547	}
				548	return nr;
				549	}
				550
				551	static struct request *
				552	kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
				553	struct kyber_hctx_data *khd,
				554	struct blk_mq_hw_ctx *hctx,
				555	bool *flushed)
				556	{
				557	struct list_head *rqs;
				558	struct request *rq;
				559	int nr;
				560
				561	rqs = &khd->rqs[khd->cur_domain];
				562	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				563
				564	/*
				565	* If there wasn't already a pending request and we haven't flushed the
				566	* software queues yet, flush the software queues and check again.
				567	*/
				568	if (!rq && !*flushed) {
				569	kyber_flush_busy_ctxs(khd, hctx);
				570	*flushed = true;
				571	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				572	}
				573
				574	if (rq) {
				575	nr = kyber_get_domain_token(kqd, khd, hctx);
				576	if (nr >= 0) {
				577	khd->batching++;
				578	rq_set_domain_token(rq, nr);
				579	list_del_init(&rq->queuelist);
				580	return rq;
				581	}
				582	}
				583
				584	/* There were either no pending requests or no tokens. */
				585	return NULL;
				586	}
				587
				588	static struct request kyber_dispatch_request(struct blk_mq_hw_ctx hctx)
				589	{
				590	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
				591	struct kyber_hctx_data *khd = hctx->sched_data;
				592	bool flushed = false;
				593	struct request *rq;
				594	int i;
				595
				596	spin_lock(&khd->lock);
				597
				598	/*
				599	* First, if we are still entitled to batch, try to dispatch a request
				600	* from the batch.
				601	*/
				602	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
				603	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				604	if (rq)
				605	goto out;
				606	}
				607
				608	/*
				609	* Either,
				610	* 1. We were no longer entitled to a batch.
				611	* 2. The domain we were batching didn't have any requests.
				612	* 3. The domain we were batching was out of tokens.
				613	*
				614	* Start another batch. Note that this wraps back around to the original
				615	* domain if no other domains have requests or tokens.
				616	*/
				617	khd->batching = 0;
				618	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				619	if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
				620	khd->cur_domain = 0;
				621	else
				622	khd->cur_domain++;
				623
				624	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				625	if (rq)
				626	goto out;
				627	}
				628
				629	rq = NULL;
				630	out:
				631	spin_unlock(&khd->lock);
				632	return rq;
				633	}
				634
				635	static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
				636	{
				637	struct kyber_hctx_data *khd = hctx->sched_data;
				638	int i;
				639
				640	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				641	if (!list_empty_careful(&khd->rqs[i]))
				642	return true;
				643	}
				644	return false;
				645	}
				646
				647	#define KYBER_LAT_SHOW_STORE(op) \
				648	static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \
				649	char *page) \
				650	{ \
				651	struct kyber_queue_data *kqd = e->elevator_data; \
				652	\
				653	return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \
				654	} \
				655	\
				656	static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
				657	const char *page, size_t count) \
				658	{ \
				659	struct kyber_queue_data *kqd = e->elevator_data; \
				660	unsigned long long nsec; \
				661	int ret; \
				662	\
				663	ret = kstrtoull(page, 10, &nsec); \
				664	if (ret) \
				665	return ret; \
				666	\
				667	kqd->op##_lat_nsec = nsec; \
				668	\
				669	return count; \
				670	}
				671	KYBER_LAT_SHOW_STORE(read);
				672	KYBER_LAT_SHOW_STORE(write);
				673	#undef KYBER_LAT_SHOW_STORE
				674
				675	#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
				676	static struct elv_fs_entry kyber_sched_attrs[] = {
				677	KYBER_LAT_ATTR(read),
				678	KYBER_LAT_ATTR(write),
				679	__ATTR_NULL
				680	};
				681	#undef KYBER_LAT_ATTR
				682
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	683	#ifdef CONFIG_BLK_DEBUG_FS
				684	#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \
				685	static int kyber_##name##_tokens_show(void data, struct seq_file m) \
				686	{ \
				687	struct request_queue *q = data; \
				688	struct kyber_queue_data *kqd = q->elevator->elevator_data; \
				689	\
				690	sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
				691	return 0; \
				692	} \
				693	\
				694	static void kyber_##name##_rqs_start(struct seq_file m, loff_t *pos) \
				695	__acquires(&khd->lock) \
				696	{ \
				697	struct blk_mq_hw_ctx *hctx = m->private; \
				698	struct kyber_hctx_data *khd = hctx->sched_data; \
				699	\
				700	spin_lock(&khd->lock); \
				701	return seq_list_start(&khd->rqs[domain], *pos); \
				702	} \
				703	\
				704	static void kyber_##name##_rqs_next(struct seq_file m, void *v, \
				705	loff_t *pos) \
				706	{ \
				707	struct blk_mq_hw_ctx *hctx = m->private; \
				708	struct kyber_hctx_data *khd = hctx->sched_data; \
				709	\
				710	return seq_list_next(v, &khd->rqs[domain], pos); \
				711	} \
				712	\
				713	static void kyber_##name##_rqs_stop(struct seq_file m, void v) \
				714	__releases(&khd->lock) \
				715	{ \
				716	struct blk_mq_hw_ctx *hctx = m->private; \
				717	struct kyber_hctx_data *khd = hctx->sched_data; \
				718	\
				719	spin_unlock(&khd->lock); \
				720	} \
				721	\
				722	static const struct seq_operations kyber_##name##_rqs_seq_ops = { \
				723	.start = kyber_##name##_rqs_start, \
				724	.next = kyber_##name##_rqs_next, \
				725	.stop = kyber_##name##_rqs_stop, \
				726	.show = blk_mq_debugfs_rq_show, \
				727	}; \
				728	\
				729	static int kyber_##name##_waiting_show(void data, struct seq_file m) \
				730	{ \
				731	struct blk_mq_hw_ctx *hctx = data; \
				732	struct kyber_hctx_data *khd = hctx->sched_data; \
				733	wait_queue_t *wait = &khd->domain_wait[domain]; \
				734	\
				735	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \
				736	return 0; \
				737	}
				738	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
				739	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
				740	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
				741	#undef KYBER_DEBUGFS_DOMAIN_ATTRS
				742
				743	static int kyber_async_depth_show(void data, struct seq_file m)
				744	{
				745	struct request_queue *q = data;
				746	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				747
				748	seq_printf(m, "%u\n", kqd->async_depth);
				749	return 0;
				750	}
				751
				752	static int kyber_cur_domain_show(void data, struct seq_file m)
				753	{
				754	struct blk_mq_hw_ctx *hctx = data;
				755	struct kyber_hctx_data *khd = hctx->sched_data;
				756
				757	switch (khd->cur_domain) {
				758	case KYBER_READ:
				759	seq_puts(m, "READ\n");
				760	break;
				761	case KYBER_SYNC_WRITE:
				762	seq_puts(m, "SYNC_WRITE\n");
				763	break;
				764	case KYBER_OTHER:
				765	seq_puts(m, "OTHER\n");
				766	break;
				767	default:
				768	seq_printf(m, "%u\n", khd->cur_domain);
				769	break;
				770	}
				771	return 0;
				772	}
				773
				774	static int kyber_batching_show(void data, struct seq_file m)
				775	{
				776	struct blk_mq_hw_ctx *hctx = data;
				777	struct kyber_hctx_data *khd = hctx->sched_data;
				778
				779	seq_printf(m, "%u\n", khd->batching);
				780	return 0;
				781	}
				782
				783	#define KYBER_QUEUE_DOMAIN_ATTRS(name) \
				784	{#name "_tokens", 0400, kyber_##name##_tokens_show}
				785	static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
				786	KYBER_QUEUE_DOMAIN_ATTRS(read),
				787	KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
				788	KYBER_QUEUE_DOMAIN_ATTRS(other),
				789	{"async_depth", 0400, kyber_async_depth_show},
				790	{},
				791	};
				792	#undef KYBER_QUEUE_DOMAIN_ATTRS
				793
				794	#define KYBER_HCTX_DOMAIN_ATTRS(name) \
				795	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \
				796	{#name "_waiting", 0400, kyber_##name##_waiting_show}
				797	static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
				798	KYBER_HCTX_DOMAIN_ATTRS(read),
				799	KYBER_HCTX_DOMAIN_ATTRS(sync_write),
				800	KYBER_HCTX_DOMAIN_ATTRS(other),
				801	{"cur_domain", 0400, kyber_cur_domain_show},
				802	{"batching", 0400, kyber_batching_show},
				803	{},
				804	};
				805	#undef KYBER_HCTX_DOMAIN_ATTRS
				806	#endif
				807
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	808	static struct elevator_type kyber_sched = {
				809	.ops.mq = {
				810	.init_sched = kyber_init_sched,
				811	.exit_sched = kyber_exit_sched,
				812	.init_hctx = kyber_init_hctx,
				813	.exit_hctx = kyber_exit_hctx,
Christoph Hellwig	5bbf4e5	2017-06-16 18:15:26 +0200	[diff] [blame^]	814	.limit_depth = kyber_limit_depth,
				815	.prepare_request = kyber_prepare_request,
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame]	816	.finish_request = kyber_finish_request,
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	817	.completed_request = kyber_completed_request,
				818	.dispatch_request = kyber_dispatch_request,
				819	.has_work = kyber_has_work,
				820	},
				821	.uses_mq = true,
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	822	#ifdef CONFIG_BLK_DEBUG_FS
				823	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
				824	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
				825	#endif
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	826	.elevator_attrs = kyber_sched_attrs,
				827	.elevator_name = "kyber",
				828	.elevator_owner = THIS_MODULE,
				829	};
				830
				831	static int __init kyber_init(void)
				832	{
				833	return elv_register(&kyber_sched);
				834	}
				835
				836	static void __exit kyber_exit(void)
				837	{
				838	elv_unregister(&kyber_sched);
				839	}
				840
				841	module_init(kyber_init);
				842	module_exit(kyber_exit);
				843
				844	MODULE_AUTHOR("Omar Sandoval");
				845	MODULE_LICENSE("GPL");
				846	MODULE_DESCRIPTION("Kyber I/O scheduler");