NFSv4.2: Fix writeback races in nfs4_copy_file_range
We need to ensure that any writes to the destination file are serialised
with the copy, meaning that the writeback has to occur under the inode lock.
Also relax the writeback requirement on the source, and rely on the
stateid checking to tell us if the source rebooted. Add the helper
nfs_filemap_write_and_wait_range() to call pnfs_sync_inode() as
is appropriate for pNFS servers that may need a layoutcommit.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 01dccf18..3b01c91 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -512,6 +512,9 @@
bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio);
+int nfs_filemap_write_and_wait_range(struct address_space *mapping,
+ loff_t lstart, loff_t lend);
+
#ifdef CONFIG_NFS_V4_1
static inline
void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 0f9f536..b7d457c 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -156,11 +156,20 @@
if (status)
return status;
+ status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
+ pos_src, pos_src + (loff_t)count - 1);
+ if (status)
+ return status;
+
status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
if (status)
return status;
+ status = nfs_sync_inode(dst_inode);
+ if (status)
+ return status;
+
status = nfs4_call_sync(server->client, server, &msg,
&args.seq_args, &res.seq_res, 0);
if (status == -ENOTSUPP)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 014b0e4..7cdc0ab 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,21 +133,9 @@
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
- struct inode *in_inode = file_inode(file_in);
- struct inode *out_inode = file_inode(file_out);
- int ret;
-
- if (in_inode == out_inode)
+ if (file_inode(file_in) == file_inode(file_out))
return -EINVAL;
- /* flush any pending writes */
- ret = nfs_sync_inode(in_inode);
- if (ret)
- return ret;
- ret = nfs_sync_inode(out_inode);
- if (ret)
- return ret;
-
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3087fb6..538a473 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1913,6 +1913,24 @@
EXPORT_SYMBOL_GPL(nfs_write_inode);
/*
+ * Wrapper for filemap_write_and_wait_range()
+ *
+ * Needed for pNFS in order to ensure data becomes visible to the
+ * client.
+ */
+int nfs_filemap_write_and_wait_range(struct address_space *mapping,
+ loff_t lstart, loff_t lend)
+{
+ int ret;
+
+ ret = filemap_write_and_wait_range(mapping, lstart, lend);
+ if (ret == 0)
+ ret = pnfs_sync_inode(mapping->host, true);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
+
+/*
* flush the inode to disk.
*/
int nfs_wb_all(struct inode *inode)