blk-mq: support per-distpatch_queue flush machinery This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Jens Axboe <axboe@fb.com>

commit: f70ced09170761acb69840cafaace4abc72cba4b [log] [tgz]
author: Ming Lei <ming.lei@canonical.com> Thu Sep 25 23:23:47 2014 +0800
committer: Jens Axboe <axboe@fb.com> Thu Sep 25 15:22:45 2014 -0600
tree: bc62f5926a5e8b74be30316196a41b25ece12368
parent: e97c293cdf77263abdc021de280516e0017afc84 [diff] [blame]
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 004d95e..20badd7 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c

@@ -305,8 +305,15 @@
 	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
-	if (q->mq_ops)
-		blk_mq_clone_flush_request(flush_rq, first_rq);
+
+	/*
+	 * Borrow tag from the first request since they can't
+	 * be in flight at the same time.
+	 */
+	if (q->mq_ops) {
+		flush_rq->mq_ctx = first_rq->mq_ctx;
+		flush_rq->tag = first_rq->tag;
+	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
 	flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
@@ -480,22 +487,22 @@
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q)
+struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+		int node, int cmd_size)
 {
 	struct blk_flush_queue *fq;
 	int rq_sz = sizeof(struct request);
 
-	fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
 	if (!fq)
 		goto fail;
 
 	if (q->mq_ops) {
 		spin_lock_init(&fq->mq_flush_lock);
-		rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
-				cache_line_size());
+		rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
 	}
 
-	fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+	fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
 	if (!fq->flush_rq)
 		goto fail_rq;
commit	f70ced09170761acb69840cafaace4abc72cba4b	[log] [tgz]
author	Ming Lei <ming.lei@canonical.com>	Thu Sep 25 23:23:47 2014 +0800
committer	Jens Axboe <axboe@fb.com>	Thu Sep 25 15:22:45 2014 -0600
tree	bc62f5926a5e8b74be30316196a41b25ece12368
parent	e97c293cdf77263abdc021de280516e0017afc84 [diff] [blame]