blk-mq: support per-distpatch_queue flush machinery This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Jens Axboe <axboe@fb.com>

commit: f70ced09170761acb69840cafaace4abc72cba4b [log] [tgz]
author: Ming Lei <ming.lei@canonical.com> Thu Sep 25 23:23:47 2014 +0800
committer: Jens Axboe <axboe@fb.com> Thu Sep 25 15:22:45 2014 -0600
tree: bc62f5926a5e8b74be30316196a41b25ece12368
parent: e97c293cdf77263abdc021de280516e0017afc84 [diff] [blame]
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 53b6def1..4e7a314 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c

@@ -281,26 +281,6 @@
 	__blk_mq_free_request(hctx, ctx, rq);
 }
 
-/*
- * Clone all relevant state from a request that has been put on hold in
- * the flush state machine into the preallocated flush request that hangs
- * off the request queue.
- *
- * For a driver the flush request should be invisible, that's why we are
- * impersonating the original request here.
- */
-void blk_mq_clone_flush_request(struct request *flush_rq,
-		struct request *orig_rq)
-{
-	struct blk_mq_hw_ctx *hctx =
-		orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
-
-	flush_rq->mq_ctx = orig_rq->mq_ctx;
-	flush_rq->tag = orig_rq->tag;
-	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
-		hctx->cmd_size);
-}
-
 inline void __blk_mq_end_request(struct request *rq, int error)
 {
 	blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	unsigned flush_start_tag = set->queue_depth;
+
 	blk_mq_tag_idle(hctx);
 
+	if (set->ops->exit_request)
+		set->ops->exit_request(set->driver_data,
+				       hctx->fq->flush_rq, hctx_idx,
+				       flush_start_tag + hctx_idx);
+
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
+	blk_free_flush_queue(hctx->fq);
 	kfree(hctx->ctxs);
 	blk_mq_free_bitmap(&hctx->ctx_map);
 }
@@ -1556,6 +1544,7 @@
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
 {
 	int node;
+	unsigned flush_start_tag = set->queue_depth;
 
 	node = hctx->numa_node;
 	if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@
 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
 		goto free_bitmap;
 
+	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
+	if (!hctx->fq)
+		goto exit_hctx;
+
+	if (set->ops->init_request &&
+	    set->ops->init_request(set->driver_data,
+				   hctx->fq->flush_rq, hctx_idx,
+				   flush_start_tag + hctx_idx, node))
+		goto free_fq;
+
 	return 0;
 
+ free_fq:
+	kfree(hctx->fq);
+ exit_hctx:
+	if (set->ops->exit_hctx)
+		set->ops->exit_hctx(hctx, hctx_idx);
  free_bitmap:
 	blk_mq_free_bitmap(&hctx->ctx_map);
  free_ctxs:
@@ -1862,16 +1866,10 @@
 
 	blk_mq_add_queue_tag_set(set, q);
 
-	q->fq = blk_alloc_flush_queue(q);
-	if (!q->fq)
-		goto err_hw_queues;
-
 	blk_mq_map_swqueue(q);
 
 	return q;
 
-err_hw_queues:
-	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 err_hw:
 	blk_cleanup_queue(q);
 err_hctxs:
commit	f70ced09170761acb69840cafaace4abc72cba4b	[log] [tgz]
author	Ming Lei <ming.lei@canonical.com>	Thu Sep 25 23:23:47 2014 +0800
committer	Jens Axboe <axboe@fb.com>	Thu Sep 25 15:22:45 2014 -0600
tree	bc62f5926a5e8b74be30316196a41b25ece12368
parent	e97c293cdf77263abdc021de280516e0017afc84 [diff] [blame]