blk-mq: fix schedule from atomic context

blk_mq_put_ctx() has to be called before io_schedule() in
bt_get().

This patch fixes the problem by taking similar approach from
percpu_ida allocation for the situation.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b9230c5..43eb315 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -210,24 +210,23 @@
 }
 
 static struct request *
-__blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
-		struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
 {
 	struct request *rq;
 	unsigned int tag;
 
-	tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
+	tag = blk_mq_get_tag(data);
 	if (tag != BLK_MQ_TAG_FAIL) {
-		rq = hctx->tags->rqs[tag];
+		rq = data->hctx->tags->rqs[tag];
 
 		rq->cmd_flags = 0;
-		if (blk_mq_tag_busy(hctx)) {
+		if (blk_mq_tag_busy(data->hctx)) {
 			rq->cmd_flags = REQ_MQ_INFLIGHT;
-			atomic_inc(&hctx->nr_active);
+			atomic_inc(&data->hctx->nr_active);
 		}
 
 		rq->tag = tag;
-		blk_mq_rq_ctx_init(q, ctx, rq, rw);
+		blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
 		return rq;
 	}
 
@@ -240,22 +239,27 @@
 	struct blk_mq_ctx *ctx;
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
+	struct blk_mq_alloc_data alloc_data;
 
 	if (blk_mq_queue_enter(q))
 		return NULL;
 
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
+			reserved, ctx, hctx);
 
-	rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT,
-				    reserved);
+	rq = __blk_mq_alloc_request(&alloc_data, rw);
 	if (!rq && (gfp & __GFP_WAIT)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
 
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
-		rq =  __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved);
+		blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx,
+				hctx);
+		rq =  __blk_mq_alloc_request(&alloc_data, rw);
+		ctx = alloc_data.ctx;
 	}
 	blk_mq_put_ctx(ctx);
 	return rq;
@@ -1136,6 +1140,7 @@
 	struct blk_mq_ctx *ctx;
 	struct request *rq;
 	int rw = bio_data_dir(bio);
+	struct blk_mq_alloc_data alloc_data;
 
 	if (unlikely(blk_mq_queue_enter(q))) {
 		bio_endio(bio, -EIO);
@@ -1149,7 +1154,9 @@
 		rw |= REQ_SYNC;
 
 	trace_block_getrq(q, bio, rw);
-	rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false);
+	blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx,
+			hctx);
+	rq = __blk_mq_alloc_request(&alloc_data, rw);
 	if (unlikely(!rq)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
@@ -1157,8 +1164,11 @@
 
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
-		rq = __blk_mq_alloc_request(q, hctx, ctx, rw,
-					    __GFP_WAIT|GFP_ATOMIC, false);
+		blk_mq_set_alloc_data(&alloc_data, q,
+				__GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
+		rq = __blk_mq_alloc_request(&alloc_data, rw);
+		ctx = alloc_data.ctx;
+		hctx = alloc_data.hctx;
 	}
 
 	hctx->queued++;