net/9p: switch the guts of p9_client_{read,write}() to iov_iter

... and have get_user_pages_fast() mapping fewer pages than requested
to generate a short read/write.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 2a25dec..5122b5e 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -61,7 +61,7 @@
 	int (*cancel) (struct p9_client *, struct p9_req_t *req);
 	int (*cancelled)(struct p9_client *, struct p9_req_t *req);
 	int (*zc_request)(struct p9_client *, struct p9_req_t *,
-			  char *, char *, int , int, int, int);
+			  struct iov_iter *, struct iov_iter *, int , int, int);
 };
 
 void v9fs_register_trans(struct p9_trans_module *m);
diff --git a/net/9p/client.c b/net/9p/client.c
index e86a9be..9ef5d85 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
 #include <net/9p/client.h>
@@ -555,7 +556,7 @@
  */
 
 static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
-			      char *uidata, int in_hdrlen, int kern_buf)
+			      struct iov_iter *uidata, int in_hdrlen)
 {
 	int err;
 	int ecode;
@@ -591,16 +592,11 @@
 		ename = &req->rc->sdata[req->rc->offset];
 		if (len > inline_len) {
 			/* We have error in external buffer */
-			if (kern_buf) {
-				memcpy(ename + inline_len, uidata,
-				       len - inline_len);
-			} else {
-				err = copy_from_user(ename + inline_len,
-						     uidata, len - inline_len);
-				if (err) {
-					err = -EFAULT;
-					goto out_err;
-				}
+			err = copy_from_iter(ename + inline_len,
+					     len - inline_len, uidata);
+			if (err != len - inline_len) {
+				err = -EFAULT;
+				goto out_err;
 			}
 		}
 		ename = NULL;
@@ -806,8 +802,8 @@
  * p9_client_zc_rpc - issue a request and wait for a response
  * @c: client session
  * @type: type of request
- * @uidata: user bffer that should be ued for zero copy read
- * @uodata: user buffer that shoud be user for zero copy write
+ * @uidata: destination for zero copy read
+ * @uodata: source for zero copy write
  * @inlen: read buffer size
  * @olen: write buffer size
  * @hdrlen: reader header size, This is the size of response protocol data
@@ -816,9 +812,10 @@
  * Returns request structure (which client must free using p9_free_req)
  */
 static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
-					 char *uidata, char *uodata,
+					 struct iov_iter *uidata,
+					 struct iov_iter *uodata,
 					 int inlen, int olen, int in_hdrlen,
-					 int kern_buf, const char *fmt, ...)
+					 const char *fmt, ...)
 {
 	va_list ap;
 	int sigpending, err;
@@ -841,12 +838,8 @@
 	} else
 		sigpending = 0;
 
-	/* If we are called with KERNEL_DS force kern_buf */
-	if (segment_eq(get_fs(), KERNEL_DS))
-		kern_buf = 1;
-
 	err = c->trans_mod->zc_request(c, req, uidata, uodata,
-				       inlen, olen, in_hdrlen, kern_buf);
+				       inlen, olen, in_hdrlen);
 	if (err < 0) {
 		if (err == -EIO)
 			c->status = Disconnected;
@@ -876,7 +869,7 @@
 	if (err < 0)
 		goto reterr;
 
-	err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf);
+	err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
 	trace_9p_client_res(c, type, req->rc->tag, err);
 	if (!err)
 		return req;
@@ -1545,11 +1538,24 @@
 								u32 count)
 {
 	char *dataptr;
-	int kernel_buf = 0;
 	struct p9_req_t *req;
 	struct p9_client *clnt;
 	int err, rsize, non_zc = 0;
+	struct iov_iter to;
+	union {
+		struct kvec kv;
+		struct iovec iov;
+	} v;
 
+	if (data) {
+		v.kv.iov_base = data;
+		v.kv.iov_len = count;
+		iov_iter_kvec(&to, ITER_KVEC | READ, &v.kv, 1, count);
+	} else {
+		v.iov.iov_base = udata;
+		v.iov.iov_len = count;
+		iov_iter_init(&to, READ, &v.iov, 1, count);
+	}
 
 	p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
 		   fid->fid, (unsigned long long) offset, count);
@@ -1565,18 +1571,12 @@
 
 	/* Don't bother zerocopy for small IO (< 1024) */
 	if (clnt->trans_mod->zc_request && rsize > 1024) {
-		char *indata;
-		if (data) {
-			kernel_buf = 1;
-			indata = data;
-		} else
-			indata = (__force char *)udata;
 		/*
 		 * response header len is 11
 		 * PDU Header(7) + IO Size (4)
 		 */
-		req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0,
-				       11, kernel_buf, "dqd", fid->fid,
+		req = p9_client_zc_rpc(clnt, P9_TREAD, &to, NULL, rsize, 0,
+				       11, "dqd", fid->fid,
 				       offset, rsize);
 	} else {
 		non_zc = 1;
@@ -1596,16 +1596,9 @@
 
 	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-	if (non_zc) {
-		if (data) {
-			memmove(data, dataptr, count);
-		} else {
-			err = copy_to_user(udata, dataptr, count);
-			if (err) {
-				err = -EFAULT;
-				goto free_and_error;
-			}
-		}
+	if (non_zc && copy_to_iter(dataptr, count, &to) != count) {
+		err = -EFAULT;
+		goto free_and_error;
 	}
 	p9_free_req(clnt, req);
 	return count;
@@ -1622,9 +1615,23 @@
 							u64 offset, u32 count)
 {
 	int err, rsize;
-	int kernel_buf = 0;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
+	struct iov_iter from;
+	union {
+		struct kvec kv;
+		struct iovec iov;
+	} v;
+
+	if (data) {
+		v.kv.iov_base = data;
+		v.kv.iov_len = count;
+		iov_iter_kvec(&from, ITER_KVEC | WRITE, &v.kv, 1, count);
+	} else {
+		v.iov.iov_base = udata;
+		v.iov.iov_len = count;
+		iov_iter_init(&from, WRITE, &v.iov, 1, count);
+	}
 
 	p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
 				fid->fid, (unsigned long long) offset, count);
@@ -1640,22 +1647,12 @@
 
 	/* Don't bother zerocopy for small IO (< 1024) */
 	if (clnt->trans_mod->zc_request && rsize > 1024) {
-		char *odata;
-		if (data) {
-			kernel_buf = 1;
-			odata = data;
-		} else
-			odata = (char *)udata;
-		req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize,
-				       P9_ZC_HDR_SZ, kernel_buf, "dqd",
+		req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &from, 0, rsize,
+				       P9_ZC_HDR_SZ, "dqd",
 				       fid->fid, offset, rsize);
 	} else {
-		if (data)
-			req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
-					    offset, rsize, data);
-		else
-			req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
-					    offset, rsize, udata);
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+					    offset, rsize, &from);
 	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -2068,6 +2065,10 @@
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 	char *dataptr;
+	struct kvec kv = {.iov_base = data, .iov_len = count};
+	struct iov_iter to;
+
+	iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
 
 	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
 				fid->fid, (unsigned long long) offset, count);
@@ -2088,8 +2089,8 @@
 		 * response header len is 11
 		 * PDU Header(7) + IO Size (4)
 		 */
-		req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0,
-				       11, 1, "dqd", fid->fid, offset, rsize);
+		req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0,
+				       11, "dqd", fid->fid, offset, rsize);
 	} else {
 		non_zc = 1;
 		req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index ab9127e..e9d0f0c 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -33,6 +33,7 @@
 #include <linux/sched.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
@@ -69,10 +70,11 @@
 }
 
 static size_t
-pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
+pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
 {
 	size_t len = min(pdu->capacity - pdu->size, size);
-	if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
+	struct iov_iter i = *from;
+	if (copy_from_iter(&pdu->sdata[pdu->size], len, &i) != len)
 		len = 0;
 
 	pdu->size += len;
@@ -437,23 +439,13 @@
 						 stbuf->extension, stbuf->n_uid,
 						 stbuf->n_gid, stbuf->n_muid);
 			} break;
-		case 'D':{
-				uint32_t count = va_arg(ap, uint32_t);
-				const void *data = va_arg(ap, const void *);
-
-				errcode = p9pdu_writef(pdu, proto_version, "d",
-									count);
-				if (!errcode && pdu_write(pdu, data, count))
-					errcode = -EFAULT;
-			}
-			break;
-		case 'U':{
+		case 'V':{
 				int32_t count = va_arg(ap, int32_t);
-				const char __user *udata =
-						va_arg(ap, const void __user *);
+				struct iov_iter *from =
+						va_arg(ap, struct iov_iter *);
 				errcode = p9pdu_writef(pdu, proto_version, "d",
 									count);
-				if (!errcode && pdu_write_u(pdu, udata, count))
+				if (!errcode && pdu_write_u(pdu, from, count))
 					errcode = -EFAULT;
 			}
 			break;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 36a1a73..e62bcbb 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -217,15 +217,15 @@
  * @start: which segment of the sg_list to start at
  * @pdata: a list of pages to add into sg.
  * @nr_pages: number of pages to pack into the scatter/gather list
- * @data: data to pack into scatter/gather list
+ * @offs: amount of data in the beginning of first page _not_ to pack
  * @count: amount of data to pack into the scatter/gather list
  */
 static int
 pack_sg_list_p(struct scatterlist *sg, int start, int limit,
-	       struct page **pdata, int nr_pages, char *data, int count)
+	       struct page **pdata, int nr_pages, size_t offs, int count)
 {
 	int i = 0, s;
-	int data_off;
+	int data_off = offs;
 	int index = start;
 
 	BUG_ON(nr_pages > (limit - start));
@@ -233,16 +233,14 @@
 	 * if the first page doesn't start at
 	 * page boundary find the offset
 	 */
-	data_off = offset_in_page(data);
 	while (nr_pages) {
-		s = rest_of_page(data);
+		s = PAGE_SIZE - data_off;
 		if (s > count)
 			s = count;
 		/* Make sure we don't terminate early. */
 		sg_unmark_end(&sg[index]);
 		sg_set_page(&sg[index++], pdata[i++], s, data_off);
 		data_off = 0;
-		data += s;
 		count -= s;
 		nr_pages--;
 	}
@@ -314,11 +312,20 @@
 }
 
 static int p9_get_mapped_pages(struct virtio_chan *chan,
-			       struct page **pages, char *data,
-			       int nr_pages, int write, int kern_buf)
+			       struct page ***pages,
+			       struct iov_iter *data,
+			       int count,
+			       size_t *offs,
+			       int *need_drop)
 {
+	int nr_pages;
 	int err;
-	if (!kern_buf) {
+
+	if (!iov_iter_count(data))
+		return 0;
+
+	if (!(data->type & ITER_KVEC)) {
+		int n;
 		/*
 		 * We allow only p9_max_pages pinned. We wait for the
 		 * Other zc request to finish here
@@ -329,26 +336,49 @@
 			if (err == -ERESTARTSYS)
 				return err;
 		}
-		err = p9_payload_gup(data, &nr_pages, pages, write);
-		if (err < 0)
-			return err;
+		n = iov_iter_get_pages_alloc(data, pages, count, offs);
+		if (n < 0)
+			return n;
+		*need_drop = 1;
+		nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
 		atomic_add(nr_pages, &vp_pinned);
+		return n;
 	} else {
 		/* kernel buffer, no need to pin pages */
-		int s, index = 0;
-		int count = nr_pages;
-		while (nr_pages) {
-			s = rest_of_page(data);
-			if (is_vmalloc_addr(data))
-				pages[index++] = vmalloc_to_page(data);
-			else
-				pages[index++] = kmap_to_page(data);
-			data += s;
-			nr_pages--;
+		int index;
+		size_t len;
+		void *p;
+
+		/* we'd already checked that it's non-empty */
+		while (1) {
+			len = iov_iter_single_seg_count(data);
+			if (likely(len)) {
+				p = data->kvec->iov_base + data->iov_offset;
+				break;
+			}
+			iov_iter_advance(data, 0);
 		}
-		nr_pages = count;
+		if (len > count)
+			len = count;
+
+		nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
+			   (unsigned long)p / PAGE_SIZE;
+
+		*pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+		if (!*pages)
+			return -ENOMEM;
+
+		*need_drop = 0;
+		p -= (*offs = (unsigned long)p % PAGE_SIZE);
+		for (index = 0; index < nr_pages; index++) {
+			if (is_vmalloc_addr(p))
+				(*pages)[index] = vmalloc_to_page(p);
+			else
+				(*pages)[index] = kmap_to_page(p);
+			p += PAGE_SIZE;
+		}
+		return len;
 	}
-	return nr_pages;
 }
 
 /**
@@ -364,8 +394,8 @@
  */
 static int
 p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
-		     char *uidata, char *uodata, int inlen,
-		     int outlen, int in_hdr_len, int kern_buf)
+		     struct iov_iter *uidata, struct iov_iter *uodata,
+		     int inlen, int outlen, int in_hdr_len)
 {
 	int in, out, err, out_sgs, in_sgs;
 	unsigned long flags;
@@ -373,41 +403,32 @@
 	struct page **in_pages = NULL, **out_pages = NULL;
 	struct virtio_chan *chan = client->trans;
 	struct scatterlist *sgs[4];
+	size_t offs;
+	int need_drop = 0;
 
 	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
 
 	if (uodata) {
-		out_nr_pages = p9_nr_pages(uodata, outlen);
-		out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
-				    GFP_NOFS);
-		if (!out_pages) {
-			err = -ENOMEM;
-			goto err_out;
+		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
+					    outlen, &offs, &need_drop);
+		if (n < 0)
+			return n;
+		out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
+		if (n != outlen) {
+			__le32 v = cpu_to_le32(n);
+			memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+			outlen = n;
 		}
-		out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
-						   out_nr_pages, 0, kern_buf);
-		if (out_nr_pages < 0) {
-			err = out_nr_pages;
-			kfree(out_pages);
-			out_pages = NULL;
-			goto err_out;
-		}
-	}
-	if (uidata) {
-		in_nr_pages = p9_nr_pages(uidata, inlen);
-		in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
-				   GFP_NOFS);
-		if (!in_pages) {
-			err = -ENOMEM;
-			goto err_out;
-		}
-		in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
-						  in_nr_pages, 1, kern_buf);
-		if (in_nr_pages < 0) {
-			err = in_nr_pages;
-			kfree(in_pages);
-			in_pages = NULL;
-			goto err_out;
+	} else if (uidata) {
+		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
+					    inlen, &offs, &need_drop);
+		if (n < 0)
+			return n;
+		in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
+		if (n != inlen) {
+			__le32 v = cpu_to_le32(n);
+			memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+			inlen = n;
 		}
 	}
 	req->status = REQ_STATUS_SENT;
@@ -426,7 +447,7 @@
 	if (out_pages) {
 		sgs[out_sgs++] = chan->sg + out;
 		out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
-				      out_pages, out_nr_pages, uodata, outlen);
+				      out_pages, out_nr_pages, offs, outlen);
 	}
 		
 	/*
@@ -444,7 +465,7 @@
 	if (in_pages) {
 		sgs[out_sgs + in_sgs++] = chan->sg + out + in;
 		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
-				     in_pages, in_nr_pages, uidata, inlen);
+				     in_pages, in_nr_pages, offs, inlen);
 	}
 
 	BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
@@ -478,7 +499,7 @@
 	 * Non kernel buffers are pinned, unpin them
 	 */
 err_out:
-	if (!kern_buf) {
+	if (need_drop) {
 		if (in_pages) {
 			p9_release_pages(in_pages, in_nr_pages);
 			atomic_sub(in_nr_pages, &vp_pinned);