nfs: enable swap on NFS
Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This
will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under
PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol
->connect() method.
PF_MEMALLOC should allow the allocation of struct socket and related
objects and the early (re)setting of SOCK_MEMALLOC should allow us to
receive the packets required for the TCP connection buildup.
[jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases]
[dfeng@redhat.com: Fix handling of multiple swap files]
[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 42dce909..bf9c8d0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -115,17 +115,28 @@
* @nr_segs: size of iovec array
*
* The presence of this routine in the address space ops vector means
- * the NFS client supports direct I/O. However, we shunt off direct
- * read and write requests before the VFS gets them, so this method
- * should never be called.
+ * the NFS client supports direct I/O. However, for most direct IO, we
+ * shunt off direct read and write requests before the VFS gets them,
+ * so this method is only ever called for swap.
*/
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
+#ifndef CONFIG_NFS_SWAP
dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
(long long) pos, nr_segs);
return -EINVAL;
+#else
+ VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
+ VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+
+ if (rw == READ || rw == KERNEL_READ)
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+ rw == READ ? true : false);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+ rw == WRITE ? true : false);
+#endif /* CONFIG_NFS_SWAP */
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -303,7 +314,7 @@
*/
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -331,12 +342,20 @@
GFP_KERNEL);
if (!pagevec)
break;
- down_read(¤t->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
+ if (uio) {
+ down_read(¤t->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
npages, 1, 0, pagevec, NULL);
- up_read(¤t->mm->mmap_sem);
- if (result < 0)
- break;
+ up_read(¤t->mm->mmap_sem);
+ if (result < 0)
+ break;
+ } else {
+ WARN_ON(npages != 1);
+ result = get_kernel_page(user_addr, 1, pagevec);
+ if (WARN_ON(result != 1))
+ break;
+ }
+
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
@@ -386,7 +405,7 @@
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
@@ -400,7 +419,7 @@
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(&desc, vec, pos);
+ result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -426,7 +445,7 @@
}
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -444,7 +463,7 @@
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
+ result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
NFS_I(inode)->read_io += result;
@@ -610,7 +629,7 @@
*/
static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -638,12 +657,19 @@
if (!pagevec)
break;
- down_read(¤t->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 0, 0, pagevec, NULL);
- up_read(¤t->mm->mmap_sem);
- if (result < 0)
- break;
+ if (uio) {
+ down_read(¤t->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
+ npages, 0, 0, pagevec, NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (result < 0)
+ break;
+ } else {
+ WARN_ON(npages != 1);
+ result = get_kernel_page(user_addr, 0, pagevec);
+ if (WARN_ON(result != 1))
+ break;
+ }
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
@@ -774,7 +800,7 @@
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
@@ -790,7 +816,7 @@
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(&desc, vec, pos);
+ result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -818,7 +844,7 @@
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
- size_t count)
+ size_t count, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -836,7 +862,7 @@
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -867,7 +893,7 @@
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -892,7 +918,7 @@
task_io_account_read(count);
- retval = nfs_direct_read(iocb, iov, nr_segs, pos);
+ retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -923,7 +949,7 @@
* is no atomic O_APPEND write facility in the NFS protocol.
*/
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -955,7 +981,7 @@
task_io_account_write(count);
- retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+ retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
if (retval > 0) {
struct inode *inode = mapping->host;