->splice_write() via ->write_iter()
iter_file_splice_write() - a ->splice_write() instance that gathers the
pipe buffers, builds a bio_vec-based iov_iter covering those and feeds
it to ->write_iter(). A bunch of simple cases coverted to that...
[AV: fixed the braino spotted by Cyrill]
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4e36b8e..e68e150 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1583,7 +1583,7 @@
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 5b7f6be..71bf8e4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -77,7 +77,7 @@
.fsync = exofs_file_fsync,
.flush = exofs_flush,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
const struct inode_operations exofs_file_inode_operations = {
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 970c6ac..7c87b22 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -75,7 +75,7 @@
.release = ext2_release_file,
.fsync = ext2_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
#ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index c833b12..a062fa1 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -63,7 +63,7 @@
.release = ext3_release_file,
.fsync = ext3_sync_file,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
const struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 48383a5..708aad7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -599,7 +599,7 @@
.release = ext4_release_file,
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
};
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 22f4900..e4ba4b9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -692,5 +692,5 @@
.compat_ioctl = f2fs_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ca932cd..01b4c5b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1068,7 +1068,7 @@
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.setlease = gfs2_setlease,
.fallocate = gfs2_fallocate,
};
@@ -1098,7 +1098,7 @@
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
};
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index cc744ec..33aa0cc 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -157,7 +157,7 @@
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.fsync = jfs_fsync,
.release = jfs_release,
.unlocked_ioctl = jfs_ioctl,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ea0b97..4f56de8 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -38,7 +38,7 @@
.mmap = generic_file_mmap,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9ed420f..dda012a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,7 +43,7 @@
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7c8ecd6..f070cc8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -248,7 +248,7 @@
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/splice.c b/fs/splice.c
index f99e420..f195a9b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,6 +32,7 @@
#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/compat.h>
+#include <linux/aio.h>
#include "internal.h"
/*
@@ -1052,6 +1053,145 @@
EXPORT_SYMBOL(generic_file_splice_write);
+/**
+ * iter_file_splice_write - splice data from a pipe to a file
+ * @pipe: pipe info
+ * @out: file to write to
+ * @ppos: position in @out
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file.
+ * This one is ->write_iter-based.
+ *
+ */
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.file = out,
+ };
+ int nbufs = pipe->buffers;
+ struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ ssize_t ret;
+
+ if (unlikely(!array))
+ return -ENOMEM;
+
+ pipe_lock(pipe);
+
+ splice_from_pipe_begin(&sd);
+ while (sd.total_len) {
+ struct iov_iter from;
+ struct kiocb kiocb;
+ size_t left;
+ int n, idx;
+
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
+
+ if (unlikely(nbufs < pipe->buffers)) {
+ kfree(array);
+ nbufs = pipe->buffers;
+ array = kcalloc(nbufs, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ if (!array) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ /* build the vector */
+ left = sd.total_len;
+ for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+ struct pipe_buffer *buf = pipe->bufs + idx;
+ size_t this_len = buf->len;
+
+ if (this_len > left)
+ this_len = left;
+
+ if (idx == pipe->buffers - 1)
+ idx = -1;
+
+ ret = buf->ops->confirm(pipe, buf);
+ if (unlikely(ret)) {
+ if (ret == -ENODATA)
+ ret = 0;
+ goto done;
+ }
+
+ array[n].bv_page = buf->page;
+ array[n].bv_len = this_len;
+ array[n].bv_offset = buf->offset;
+ left -= this_len;
+ }
+
+ /* ... iov_iter */
+ from.type = ITER_BVEC | WRITE;
+ from.bvec = array;
+ from.nr_segs = n;
+ from.count = sd.total_len - left;
+ from.iov_offset = 0;
+
+ /* ... and iocb */
+ init_sync_kiocb(&kiocb, out);
+ kiocb.ki_pos = sd.pos;
+ kiocb.ki_nbytes = sd.total_len - left;
+
+ /* now, send it */
+ ret = out->f_op->write_iter(&kiocb, &from);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+
+ if (ret <= 0)
+ break;
+
+ sd.num_spliced += ret;
+ sd.total_len -= ret;
+ *ppos = sd.pos = kiocb.ki_pos;
+
+ /* dismiss the fully eaten buffers, adjust the partial one */
+ while (ret) {
+ struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+ if (ret >= buf->len) {
+ const struct pipe_buf_operations *ops = buf->ops;
+ ret -= buf->len;
+ buf->len = 0;
+ buf->ops = NULL;
+ ops->release(pipe, buf);
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ if (pipe->files)
+ sd.need_wakeup = true;
+ } else {
+ buf->offset += ret;
+ buf->len -= ret;
+ ret = 0;
+ }
+ }
+ }
+done:
+ kfree(array);
+ splice_from_pipe_end(pipe, &sd);
+
+ pipe_unlock(pipe);
+
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
+
+ return ret;
+}
+
+EXPORT_SYMBOL(iter_file_splice_write);
+
static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6bc4e8e..0888502 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1585,7 +1585,7 @@
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
#ifdef CONFIG_COMPAT
.compat_ioctl = ubifs_compat_ioctl,
#endif
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5446e86..b1c489c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -343,47 +343,6 @@
}
/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t count,
- unsigned int flags)
-{
- struct inode *inode = outfilp->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- int ioflags = 0;
- ssize_t ret;
-
- XFS_STATS_INC(xs_write_calls);
-
- if (outfilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
-}
-
-/*
* This routine is called to handle zeroing any space in the last block of the
* file that is beyond the EOF. We do this since the size is being increased
* without writing anything to that block and we don't want to read the
@@ -1442,7 +1401,7 @@
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
- .splice_write = xfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 65d8c79..53182f9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1060,7 +1060,6 @@
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a644884..8bd8ed3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2434,6 +2434,8 @@
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
+ struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);