Blame - block/kyber-iosched.c - SHIFTPHONES/mainline/linux

blob: 2557b399f0a8ee2bf87e408f6c191d677a8f4e75 [file] [log] [blame]

Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	1	/*
				2	* The Kyber I/O scheduler. Controls latency by throttling queue depths using
				3	* scalable techniques.
				4	*
				5	* Copyright (C) 2017 Facebook
				6	*
				7	* This program is free software; you can redistribute it and/or
				8	* modify it under the terms of the GNU General Public
				9	* License v2 as published by the Free Software Foundation.
				10	*
				11	* This program is distributed in the hope that it will be useful,
				12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	* General Public License for more details.
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* along with this program. If not, see <https://www.gnu.org/licenses/>.
				18	*/
				19
				20	#include <linux/kernel.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/blk-mq.h>
				23	#include <linux/elevator.h>
				24	#include <linux/module.h>
				25	#include <linux/sbitmap.h>
				26
				27	#include "blk.h"
				28	#include "blk-mq.h"
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	29	#include "blk-mq-debugfs.h"
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	30	#include "blk-mq-sched.h"
				31	#include "blk-mq-tag.h"
				32	#include "blk-stat.h"
				33
				34	/* Scheduling domains. */
				35	enum {
				36	KYBER_READ,
				37	KYBER_SYNC_WRITE,
				38	KYBER_OTHER, /* Async writes, discard, etc. */
				39	KYBER_NUM_DOMAINS,
				40	};
				41
				42	enum {
				43	KYBER_MIN_DEPTH = 256,
				44
				45	/*
				46	* In order to prevent starvation of synchronous requests by a flood of
				47	* asynchronous requests, we reserve 25% of requests for synchronous
				48	* operations.
				49	*/
				50	KYBER_ASYNC_PERCENT = 75,
				51	};
				52
				53	/*
				54	* Initial device-wide depths for each scheduling domain.
				55	*
				56	* Even for fast devices with lots of tags like NVMe, you can saturate
				57	* the device with only a fraction of the maximum possible queue depth.
				58	* So, we cap these to a reasonable value.
				59	*/
				60	static const unsigned int kyber_depth[] = {
				61	[KYBER_READ] = 256,
				62	[KYBER_SYNC_WRITE] = 128,
				63	[KYBER_OTHER] = 64,
				64	};
				65
				66	/*
				67	* Scheduling domain batch sizes. We favor reads.
				68	*/
				69	static const unsigned int kyber_batch_size[] = {
				70	[KYBER_READ] = 16,
				71	[KYBER_SYNC_WRITE] = 8,
				72	[KYBER_OTHER] = 8,
				73	};
				74
				75	struct kyber_queue_data {
				76	struct request_queue *q;
				77
				78	struct blk_stat_callback *cb;
				79
				80	/*
				81	* The device is divided into multiple scheduling domains based on the
				82	* request type. Each domain has a fixed number of in-flight requests of
				83	* that type device-wide, limited by these tokens.
				84	*/
				85	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
				86
				87	/*
				88	* Async request percentage, converted to per-word depth for
				89	* sbitmap_get_shallow().
				90	*/
				91	unsigned int async_depth;
				92
				93	/* Target latencies in nanoseconds. */
				94	u64 read_lat_nsec, write_lat_nsec;
				95	};
				96
				97	struct kyber_hctx_data {
				98	spinlock_t lock;
				99	struct list_head rqs[KYBER_NUM_DOMAINS];
				100	unsigned int cur_domain;
				101	unsigned int batching;
				102	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
				103	atomic_t wait_index[KYBER_NUM_DOMAINS];
				104	};
				105
Stephen Bates	a37244e	2017-04-20 15:29:16 -0600	[diff] [blame]	106	static int rq_sched_domain(const struct request *rq)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	107	{
				108	unsigned int op = rq->cmd_flags;
				109
				110	if ((op & REQ_OP_MASK) == REQ_OP_READ)
				111	return KYBER_READ;
				112	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
				113	return KYBER_SYNC_WRITE;
				114	else
				115	return KYBER_OTHER;
				116	}
				117
				118	enum {
				119	NONE = 0,
				120	GOOD = 1,
				121	GREAT = 2,
				122	BAD = -1,
				123	AWFUL = -2,
				124	};
				125
				126	#define IS_GOOD(status) ((status) > 0)
				127	#define IS_BAD(status) ((status) < 0)
				128
				129	static int kyber_lat_status(struct blk_stat_callback *cb,
				130	unsigned int sched_domain, u64 target)
				131	{
				132	u64 latency;
				133
				134	if (!cb->stat[sched_domain].nr_samples)
				135	return NONE;
				136
				137	latency = cb->stat[sched_domain].mean;
				138	if (latency >= 2 * target)
				139	return AWFUL;
				140	else if (latency > target)
				141	return BAD;
				142	else if (latency <= target / 2)
				143	return GREAT;
				144	else /* (latency <= target) */
				145	return GOOD;
				146	}
				147
				148	/*
				149	* Adjust the read or synchronous write depth given the status of reads and
				150	* writes. The goal is that the latencies of the two domains are fair (i.e., if
				151	* one is good, then the other is good).
				152	*/
				153	static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
				154	unsigned int sched_domain, int this_status,
				155	int other_status)
				156	{
				157	unsigned int orig_depth, depth;
				158
				159	/*
				160	* If this domain had no samples, or reads and writes are both good or
				161	* both bad, don't adjust the depth.
				162	*/
				163	if (this_status == NONE \|\|
				164	(IS_GOOD(this_status) && IS_GOOD(other_status)) \|\|
				165	(IS_BAD(this_status) && IS_BAD(other_status)))
				166	return;
				167
				168	orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
				169
				170	if (other_status == NONE) {
				171	depth++;
				172	} else {
				173	switch (this_status) {
				174	case GOOD:
				175	if (other_status == AWFUL)
				176	depth -= max(depth / 4, 1U);
				177	else
				178	depth -= max(depth / 8, 1U);
				179	break;
				180	case GREAT:
				181	if (other_status == AWFUL)
				182	depth /= 2;
				183	else
				184	depth -= max(depth / 4, 1U);
				185	break;
				186	case BAD:
				187	depth++;
				188	break;
				189	case AWFUL:
				190	if (other_status == GREAT)
				191	depth += 2;
				192	else
				193	depth++;
				194	break;
				195	}
				196	}
				197
				198	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
				199	if (depth != orig_depth)
				200	sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
				201	}
				202
				203	/*
				204	* Adjust the depth of other requests given the status of reads and synchronous
				205	* writes. As long as either domain is doing fine, we don't throttle, but if
				206	* both domains are doing badly, we throttle heavily.
				207	*/
				208	static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
				209	int read_status, int write_status,
				210	bool have_samples)
				211	{
				212	unsigned int orig_depth, depth;
				213	int status;
				214
				215	orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
				216
				217	if (read_status == NONE && write_status == NONE) {
				218	depth += 2;
				219	} else if (have_samples) {
				220	if (read_status == NONE)
				221	status = write_status;
				222	else if (write_status == NONE)
				223	status = read_status;
				224	else
				225	status = max(read_status, write_status);
				226	switch (status) {
				227	case GREAT:
				228	depth += 2;
				229	break;
				230	case GOOD:
				231	depth++;
				232	break;
				233	case BAD:
				234	depth -= max(depth / 4, 1U);
				235	break;
				236	case AWFUL:
				237	depth /= 2;
				238	break;
				239	}
				240	}
				241
				242	depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
				243	if (depth != orig_depth)
				244	sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
				245	}
				246
				247	/*
				248	* Apply heuristics for limiting queue depths based on gathered latency
				249	* statistics.
				250	*/
				251	static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
				252	{
				253	struct kyber_queue_data *kqd = cb->data;
				254	int read_status, write_status;
				255
				256	read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
				257	write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
				258
				259	kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
				260	kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
				261	kyber_adjust_other_depth(kqd, read_status, write_status,
				262	cb->stat[KYBER_OTHER].nr_samples != 0);
				263
				264	/*
				265	* Continue monitoring latencies if we aren't hitting the targets or
				266	* we're still throttling other requests.
				267	*/
				268	if (!blk_stat_is_active(kqd->cb) &&
				269	((IS_BAD(read_status) \|\| IS_BAD(write_status) \|\|
				270	kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
				271	blk_stat_activate_msecs(kqd->cb, 100);
				272	}
				273
				274	static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
				275	{
				276	/*
				277	* All of the hardware queues have the same depth, so we can just grab
				278	* the shift of the first one.
				279	*/
				280	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
				281	}
				282
				283	static struct kyber_queue_data kyber_queue_data_alloc(struct request_queue q)
				284	{
				285	struct kyber_queue_data *kqd;
				286	unsigned int max_tokens;
				287	unsigned int shift;
				288	int ret = -ENOMEM;
				289	int i;
				290
				291	kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
				292	if (!kqd)
				293	goto err;
				294	kqd->q = q;
				295
				296	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
				297	KYBER_NUM_DOMAINS, kqd);
				298	if (!kqd->cb)
				299	goto err_kqd;
				300
				301	/*
				302	* The maximum number of tokens for any scheduling domain is at least
				303	* the queue depth of a single hardware queue. If the hardware doesn't
				304	* have many tags, still provide a reasonable number.
				305	*/
				306	max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
				307	KYBER_MIN_DEPTH);
				308	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				309	WARN_ON(!kyber_depth[i]);
				310	WARN_ON(!kyber_batch_size[i]);
				311	ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
				312	max_tokens, -1, false, GFP_KERNEL,
				313	q->node);
				314	if (ret) {
				315	while (--i >= 0)
				316	sbitmap_queue_free(&kqd->domain_tokens[i]);
				317	goto err_cb;
				318	}
				319	sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
				320	}
				321
				322	shift = kyber_sched_tags_shift(kqd);
				323	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
				324
				325	kqd->read_lat_nsec = 2000000ULL;
				326	kqd->write_lat_nsec = 10000000ULL;
				327
				328	return kqd;
				329
				330	err_cb:
				331	blk_stat_free_callback(kqd->cb);
				332	err_kqd:
				333	kfree(kqd);
				334	err:
				335	return ERR_PTR(ret);
				336	}
				337
				338	static int kyber_init_sched(struct request_queue q, struct elevator_type e)
				339	{
				340	struct kyber_queue_data *kqd;
				341	struct elevator_queue *eq;
				342
				343	eq = elevator_alloc(q, e);
				344	if (!eq)
				345	return -ENOMEM;
				346
				347	kqd = kyber_queue_data_alloc(q);
				348	if (IS_ERR(kqd)) {
				349	kobject_put(&eq->kobj);
				350	return PTR_ERR(kqd);
				351	}
				352
				353	eq->elevator_data = kqd;
				354	q->elevator = eq;
				355
				356	blk_stat_add_callback(q, kqd->cb);
				357
				358	return 0;
				359	}
				360
				361	static void kyber_exit_sched(struct elevator_queue *e)
				362	{
				363	struct kyber_queue_data *kqd = e->elevator_data;
				364	struct request_queue *q = kqd->q;
				365	int i;
				366
				367	blk_stat_remove_callback(q, kqd->cb);
				368
				369	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
				370	sbitmap_queue_free(&kqd->domain_tokens[i]);
				371	blk_stat_free_callback(kqd->cb);
				372	kfree(kqd);
				373	}
				374
				375	static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				376	{
				377	struct kyber_hctx_data *khd;
				378	int i;
				379
				380	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
				381	if (!khd)
				382	return -ENOMEM;
				383
				384	spin_lock_init(&khd->lock);
				385
				386	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				387	INIT_LIST_HEAD(&khd->rqs[i]);
				388	INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
				389	atomic_set(&khd->wait_index[i], 0);
				390	}
				391
				392	khd->cur_domain = 0;
				393	khd->batching = 0;
				394
				395	hctx->sched_data = khd;
				396
				397	return 0;
				398	}
				399
				400	static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				401	{
				402	kfree(hctx->sched_data);
				403	}
				404
				405	static int rq_get_domain_token(struct request *rq)
				406	{
				407	return (long)rq->elv.priv[0];
				408	}
				409
				410	static void rq_set_domain_token(struct request *rq, int token)
				411	{
				412	rq->elv.priv[0] = (void *)(long)token;
				413	}
				414
				415	static void rq_clear_domain_token(struct kyber_queue_data *kqd,
				416	struct request *rq)
				417	{
				418	unsigned int sched_domain;
				419	int nr;
				420
				421	nr = rq_get_domain_token(rq);
				422	if (nr != -1) {
				423	sched_domain = rq_sched_domain(rq);
				424	sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
				425	rq->mq_ctx->cpu);
				426	}
				427	}
				428
				429	static struct request kyber_get_request(struct request_queue q,
				430	unsigned int op,
				431	struct blk_mq_alloc_data *data)
				432	{
				433	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				434	struct request *rq;
				435
				436	/*
				437	* We use the scheduler tags as per-hardware queue queueing tokens.
				438	* Async requests can be limited at this stage.
				439	*/
				440	if (!op_is_sync(op))
				441	data->shallow_depth = kqd->async_depth;
				442
				443	rq = __blk_mq_alloc_request(data, op);
				444	if (rq)
				445	rq_set_domain_token(rq, -1);
				446	return rq;
				447	}
				448
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame^]	449	static void kyber_finish_request(struct request *rq)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	450	{
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame^]	451	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	452
				453	rq_clear_domain_token(kqd, rq);
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	454	}
				455
				456	static void kyber_completed_request(struct request *rq)
				457	{
				458	struct request_queue *q = rq->q;
				459	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				460	unsigned int sched_domain;
				461	u64 now, latency, target;
				462
				463	/*
				464	* Check if this request met our latency goal. If not, quickly gather
				465	* some statistics and start throttling.
				466	*/
				467	sched_domain = rq_sched_domain(rq);
				468	switch (sched_domain) {
				469	case KYBER_READ:
				470	target = kqd->read_lat_nsec;
				471	break;
				472	case KYBER_SYNC_WRITE:
				473	target = kqd->write_lat_nsec;
				474	break;
				475	default:
				476	return;
				477	}
				478
				479	/* If we are already monitoring latencies, don't check again. */
				480	if (blk_stat_is_active(kqd->cb))
				481	return;
				482
				483	now = __blk_stat_time(ktime_to_ns(ktime_get()));
				484	if (now < blk_stat_time(&rq->issue_stat))
				485	return;
				486
				487	latency = now - blk_stat_time(&rq->issue_stat);
				488
				489	if (latency > target)
				490	blk_stat_activate_msecs(kqd->cb, 10);
				491	}
				492
				493	static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
				494	struct blk_mq_hw_ctx *hctx)
				495	{
				496	LIST_HEAD(rq_list);
				497	struct request rq, next;
				498
				499	blk_mq_flush_busy_ctxs(hctx, &rq_list);
				500	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
				501	unsigned int sched_domain;
				502
				503	sched_domain = rq_sched_domain(rq);
				504	list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
				505	}
				506	}
				507
				508	static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
				509	void *key)
				510	{
				511	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
				512
				513	list_del_init(&wait->task_list);
				514	blk_mq_run_hw_queue(hctx, true);
				515	return 1;
				516	}
				517
				518	static int kyber_get_domain_token(struct kyber_queue_data *kqd,
				519	struct kyber_hctx_data *khd,
				520	struct blk_mq_hw_ctx *hctx)
				521	{
				522	unsigned int sched_domain = khd->cur_domain;
				523	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
				524	wait_queue_t *wait = &khd->domain_wait[sched_domain];
				525	struct sbq_wait_state *ws;
				526	int nr;
				527
				528	nr = __sbitmap_queue_get(domain_tokens);
				529	if (nr >= 0)
				530	return nr;
				531
				532	/*
				533	* If we failed to get a domain token, make sure the hardware queue is
				534	* run when one becomes available. Note that this is serialized on
				535	* khd->lock, but we still need to be careful about the waker.
				536	*/
				537	if (list_empty_careful(&wait->task_list)) {
				538	init_waitqueue_func_entry(wait, kyber_domain_wake);
				539	wait->private = hctx;
				540	ws = sbq_wait_ptr(domain_tokens,
				541	&khd->wait_index[sched_domain]);
				542	add_wait_queue(&ws->wait, wait);
				543
				544	/*
				545	* Try again in case a token was freed before we got on the wait
				546	* queue.
				547	*/
				548	nr = __sbitmap_queue_get(domain_tokens);
				549	}
				550	return nr;
				551	}
				552
				553	static struct request *
				554	kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
				555	struct kyber_hctx_data *khd,
				556	struct blk_mq_hw_ctx *hctx,
				557	bool *flushed)
				558	{
				559	struct list_head *rqs;
				560	struct request *rq;
				561	int nr;
				562
				563	rqs = &khd->rqs[khd->cur_domain];
				564	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				565
				566	/*
				567	* If there wasn't already a pending request and we haven't flushed the
				568	* software queues yet, flush the software queues and check again.
				569	*/
				570	if (!rq && !*flushed) {
				571	kyber_flush_busy_ctxs(khd, hctx);
				572	*flushed = true;
				573	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				574	}
				575
				576	if (rq) {
				577	nr = kyber_get_domain_token(kqd, khd, hctx);
				578	if (nr >= 0) {
				579	khd->batching++;
				580	rq_set_domain_token(rq, nr);
				581	list_del_init(&rq->queuelist);
				582	return rq;
				583	}
				584	}
				585
				586	/* There were either no pending requests or no tokens. */
				587	return NULL;
				588	}
				589
				590	static struct request kyber_dispatch_request(struct blk_mq_hw_ctx hctx)
				591	{
				592	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
				593	struct kyber_hctx_data *khd = hctx->sched_data;
				594	bool flushed = false;
				595	struct request *rq;
				596	int i;
				597
				598	spin_lock(&khd->lock);
				599
				600	/*
				601	* First, if we are still entitled to batch, try to dispatch a request
				602	* from the batch.
				603	*/
				604	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
				605	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				606	if (rq)
				607	goto out;
				608	}
				609
				610	/*
				611	* Either,
				612	* 1. We were no longer entitled to a batch.
				613	* 2. The domain we were batching didn't have any requests.
				614	* 3. The domain we were batching was out of tokens.
				615	*
				616	* Start another batch. Note that this wraps back around to the original
				617	* domain if no other domains have requests or tokens.
				618	*/
				619	khd->batching = 0;
				620	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				621	if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
				622	khd->cur_domain = 0;
				623	else
				624	khd->cur_domain++;
				625
				626	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				627	if (rq)
				628	goto out;
				629	}
				630
				631	rq = NULL;
				632	out:
				633	spin_unlock(&khd->lock);
				634	return rq;
				635	}
				636
				637	static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
				638	{
				639	struct kyber_hctx_data *khd = hctx->sched_data;
				640	int i;
				641
				642	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				643	if (!list_empty_careful(&khd->rqs[i]))
				644	return true;
				645	}
				646	return false;
				647	}
				648
				649	#define KYBER_LAT_SHOW_STORE(op) \
				650	static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \
				651	char *page) \
				652	{ \
				653	struct kyber_queue_data *kqd = e->elevator_data; \
				654	\
				655	return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \
				656	} \
				657	\
				658	static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
				659	const char *page, size_t count) \
				660	{ \
				661	struct kyber_queue_data *kqd = e->elevator_data; \
				662	unsigned long long nsec; \
				663	int ret; \
				664	\
				665	ret = kstrtoull(page, 10, &nsec); \
				666	if (ret) \
				667	return ret; \
				668	\
				669	kqd->op##_lat_nsec = nsec; \
				670	\
				671	return count; \
				672	}
				673	KYBER_LAT_SHOW_STORE(read);
				674	KYBER_LAT_SHOW_STORE(write);
				675	#undef KYBER_LAT_SHOW_STORE
				676
				677	#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
				678	static struct elv_fs_entry kyber_sched_attrs[] = {
				679	KYBER_LAT_ATTR(read),
				680	KYBER_LAT_ATTR(write),
				681	__ATTR_NULL
				682	};
				683	#undef KYBER_LAT_ATTR
				684
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	685	#ifdef CONFIG_BLK_DEBUG_FS
				686	#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \
				687	static int kyber_##name##_tokens_show(void data, struct seq_file m) \
				688	{ \
				689	struct request_queue *q = data; \
				690	struct kyber_queue_data *kqd = q->elevator->elevator_data; \
				691	\
				692	sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
				693	return 0; \
				694	} \
				695	\
				696	static void kyber_##name##_rqs_start(struct seq_file m, loff_t *pos) \
				697	__acquires(&khd->lock) \
				698	{ \
				699	struct blk_mq_hw_ctx *hctx = m->private; \
				700	struct kyber_hctx_data *khd = hctx->sched_data; \
				701	\
				702	spin_lock(&khd->lock); \
				703	return seq_list_start(&khd->rqs[domain], *pos); \
				704	} \
				705	\
				706	static void kyber_##name##_rqs_next(struct seq_file m, void *v, \
				707	loff_t *pos) \
				708	{ \
				709	struct blk_mq_hw_ctx *hctx = m->private; \
				710	struct kyber_hctx_data *khd = hctx->sched_data; \
				711	\
				712	return seq_list_next(v, &khd->rqs[domain], pos); \
				713	} \
				714	\
				715	static void kyber_##name##_rqs_stop(struct seq_file m, void v) \
				716	__releases(&khd->lock) \
				717	{ \
				718	struct blk_mq_hw_ctx *hctx = m->private; \
				719	struct kyber_hctx_data *khd = hctx->sched_data; \
				720	\
				721	spin_unlock(&khd->lock); \
				722	} \
				723	\
				724	static const struct seq_operations kyber_##name##_rqs_seq_ops = { \
				725	.start = kyber_##name##_rqs_start, \
				726	.next = kyber_##name##_rqs_next, \
				727	.stop = kyber_##name##_rqs_stop, \
				728	.show = blk_mq_debugfs_rq_show, \
				729	}; \
				730	\
				731	static int kyber_##name##_waiting_show(void data, struct seq_file m) \
				732	{ \
				733	struct blk_mq_hw_ctx *hctx = data; \
				734	struct kyber_hctx_data *khd = hctx->sched_data; \
				735	wait_queue_t *wait = &khd->domain_wait[domain]; \
				736	\
				737	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \
				738	return 0; \
				739	}
				740	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
				741	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
				742	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
				743	#undef KYBER_DEBUGFS_DOMAIN_ATTRS
				744
				745	static int kyber_async_depth_show(void data, struct seq_file m)
				746	{
				747	struct request_queue *q = data;
				748	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				749
				750	seq_printf(m, "%u\n", kqd->async_depth);
				751	return 0;
				752	}
				753
				754	static int kyber_cur_domain_show(void data, struct seq_file m)
				755	{
				756	struct blk_mq_hw_ctx *hctx = data;
				757	struct kyber_hctx_data *khd = hctx->sched_data;
				758
				759	switch (khd->cur_domain) {
				760	case KYBER_READ:
				761	seq_puts(m, "READ\n");
				762	break;
				763	case KYBER_SYNC_WRITE:
				764	seq_puts(m, "SYNC_WRITE\n");
				765	break;
				766	case KYBER_OTHER:
				767	seq_puts(m, "OTHER\n");
				768	break;
				769	default:
				770	seq_printf(m, "%u\n", khd->cur_domain);
				771	break;
				772	}
				773	return 0;
				774	}
				775
				776	static int kyber_batching_show(void data, struct seq_file m)
				777	{
				778	struct blk_mq_hw_ctx *hctx = data;
				779	struct kyber_hctx_data *khd = hctx->sched_data;
				780
				781	seq_printf(m, "%u\n", khd->batching);
				782	return 0;
				783	}
				784
				785	#define KYBER_QUEUE_DOMAIN_ATTRS(name) \
				786	{#name "_tokens", 0400, kyber_##name##_tokens_show}
				787	static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
				788	KYBER_QUEUE_DOMAIN_ATTRS(read),
				789	KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
				790	KYBER_QUEUE_DOMAIN_ATTRS(other),
				791	{"async_depth", 0400, kyber_async_depth_show},
				792	{},
				793	};
				794	#undef KYBER_QUEUE_DOMAIN_ATTRS
				795
				796	#define KYBER_HCTX_DOMAIN_ATTRS(name) \
				797	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \
				798	{#name "_waiting", 0400, kyber_##name##_waiting_show}
				799	static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
				800	KYBER_HCTX_DOMAIN_ATTRS(read),
				801	KYBER_HCTX_DOMAIN_ATTRS(sync_write),
				802	KYBER_HCTX_DOMAIN_ATTRS(other),
				803	{"cur_domain", 0400, kyber_cur_domain_show},
				804	{"batching", 0400, kyber_batching_show},
				805	{},
				806	};
				807	#undef KYBER_HCTX_DOMAIN_ATTRS
				808	#endif
				809
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	810	static struct elevator_type kyber_sched = {
				811	.ops.mq = {
				812	.init_sched = kyber_init_sched,
				813	.exit_sched = kyber_exit_sched,
				814	.init_hctx = kyber_init_hctx,
				815	.exit_hctx = kyber_exit_hctx,
				816	.get_request = kyber_get_request,
Christoph Hellwig	7b9e936	2017-06-16 18:15:21 +0200	[diff] [blame^]	817	.finish_request = kyber_finish_request,
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	818	.completed_request = kyber_completed_request,
				819	.dispatch_request = kyber_dispatch_request,
				820	.has_work = kyber_has_work,
				821	},
				822	.uses_mq = true,
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	823	#ifdef CONFIG_BLK_DEBUG_FS
				824	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
				825	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
				826	#endif
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	827	.elevator_attrs = kyber_sched_attrs,
				828	.elevator_name = "kyber",
				829	.elevator_owner = THIS_MODULE,
				830	};
				831
				832	static int __init kyber_init(void)
				833	{
				834	return elv_register(&kyber_sched);
				835	}
				836
				837	static void __exit kyber_exit(void)
				838	{
				839	elv_unregister(&kyber_sched);
				840	}
				841
				842	module_init(kyber_init);
				843	module_exit(kyber_exit);
				844
				845	MODULE_AUTHOR("Omar Sandoval");
				846	MODULE_LICENSE("GPL");
				847	MODULE_DESCRIPTION("Kyber I/O scheduler");