block: optimize __blk_segment_map_sg() for single-page bvec

Introduce a fast path for single-page bvec IO, then blk_bvec_map_sg()
can be avoided.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/blk-merge.c b/block/blk-merge.c
index c7e8a82..c1ad8ab 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -447,7 +447,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 	return biovec_phys_mergeable(q, &end_bv, &nxt_bv);
 }
 
-static struct scatterlist *blk_next_sg(struct scatterlist **sg,
+static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
 		struct scatterlist *sglist)
 {
 	if (!*sg)
@@ -512,7 +512,12 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
 		(*sg)->length += nbytes;
 	} else {
 new_segment:
-		(*nsegs) += blk_bvec_map_sg(q, bvec, sglist, sg);
+		if (bvec->bv_offset + bvec->bv_len <= PAGE_SIZE) {
+			*sg = blk_next_sg(sg, sglist);
+			sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
+			(*nsegs) += 1;
+		} else
+			(*nsegs) += blk_bvec_map_sg(q, bvec, sglist, sg);
 	}
 	*bvprv = *bvec;
 }