xfs: don't allocate an ioend for direct I/O completions
Back in the days when the direct I/O ->end_io callback could be called
from interrupt context for AIO we needed a structure to hand off to the
workqueue, and reused the ioend structure for this purpose. These days
->end_io is always called from user or workqueue context, which allows us
to avoid this memory allocation and simplify the code significantly.
[dchinner: removed now unused xfs_finish_ioend_sync() function after
Brian Foster did an initial review. ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 18e2f3b..3a9b7a1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -135,30 +135,22 @@
*/
STATIC int
xfs_setfilesize(
- struct xfs_ioend *ioend)
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ xfs_off_t offset,
+ size_t size)
{
- struct xfs_inode *ip = XFS_I(ioend->io_inode);
- struct xfs_trans *tp = ioend->io_append_trans;
xfs_fsize_t isize;
- /*
- * The transaction may have been allocated in the I/O submission thread,
- * thus we need to mark ourselves as beeing in a transaction manually.
- * Similarly for freeze protection.
- */
- current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
- rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 0, 1, _THIS_IP_);
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
- isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
+ isize = xfs_new_eof(ip, offset + size);
if (!isize) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_trans_cancel(tp, 0);
return 0;
}
- trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+ trace_xfs_setfilesize(ip, offset, size);
ip->i_d.di_size = isize;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -167,6 +159,25 @@
return xfs_trans_commit(tp, 0);
}
+STATIC int
+xfs_setfilesize_ioend(
+ struct xfs_ioend *ioend)
+{
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ struct xfs_trans *tp = ioend->io_append_trans;
+
+ /*
+ * The transaction may have been allocated in the I/O submission thread,
+ * thus we need to mark ourselves as being in a transaction manually.
+ * Similarly for freeze protection.
+ */
+ current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 0, 1, _THIS_IP_);
+
+ return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+}
+
/*
* Schedule IO completion handling on the final put of an ioend.
*
@@ -182,8 +193,7 @@
if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
- else if (ioend->io_append_trans ||
- (ioend->io_isdirect && xfs_ioend_is_append(ioend)))
+ else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
@@ -215,22 +225,8 @@
if (ioend->io_type == XFS_IO_UNWRITTEN) {
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
- } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
- /*
- * For direct I/O we do not know if we need to allocate blocks
- * or not so we can't preallocate an append transaction as that
- * results in nested reservations and log space deadlocks. Hence
- * allocate the transaction here. While this is sub-optimal and
- * can block IO completion for some time, we're stuck with doing
- * it this way until we can pass the ioend to the direct IO
- * allocation callbacks and avoid nesting that way.
- */
- error = xfs_setfilesize_trans_alloc(ioend);
- if (error)
- goto done;
- error = xfs_setfilesize(ioend);
} else if (ioend->io_append_trans) {
- error = xfs_setfilesize(ioend);
+ error = xfs_setfilesize_ioend(ioend);
} else {
ASSERT(!xfs_ioend_is_append(ioend));
}
@@ -242,17 +238,6 @@
}
/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
- struct xfs_ioend *ioend)
-{
- if (atomic_dec_and_test(&ioend->io_remaining))
- xfs_end_io(&ioend->io_work);
-}
-
-/*
* Allocate and initialise an IO completion structure.
* We need to track unwritten extent write completion here initially.
* We'll need to extend this for updating the ondisk inode size later
@@ -273,7 +258,6 @@
* all the I/O from calling the completion routine too early.
*/
atomic_set(&ioend->io_remaining, 1);
- ioend->io_isdirect = 0;
ioend->io_error = 0;
ioend->io_list = NULL;
ioend->io_type = type;
@@ -1459,11 +1443,7 @@
*
* If the private argument is non-NULL __xfs_get_blocks signals us that we
* need to issue a transaction to convert the range from unwritten to written
- * extents. In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done. But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions. In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
+ * extents.
*/
STATIC void
xfs_end_io_direct_write(
@@ -1472,7 +1452,12 @@
ssize_t size,
void *private)
{
- struct xfs_ioend *ioend = iocb->private;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return;
/*
* While the generic direct I/O code updates the inode size, it does
@@ -1480,22 +1465,33 @@
* end_io handler thinks the on-disk size is outside the in-core
* size. To prevent this just update it a little bit earlier here.
*/
- if (offset + size > i_size_read(ioend->io_inode))
- i_size_write(ioend->io_inode, offset + size);
+ if (offset + size > i_size_read(inode))
+ i_size_write(inode, offset + size);
/*
- * blockdev_direct_IO can return an error even after the I/O
- * completion handler was called. Thus we need to protect
- * against double-freeing.
+ * For direct I/O we do not know if we need to allocate blocks or not,
+ * so we can't preallocate an append transaction, as that results in
+ * nested reservations and log space deadlocks. Hence allocate the
+ * transaction here. While this is sub-optimal and can block IO
+ * completion for some time, we're stuck with doing it this way until
+ * we can pass the ioend to the direct IO allocation callbacks and
+ * avoid nesting that way.
*/
- iocb->private = NULL;
+ if (private && size > 0) {
+ xfs_iomap_write_unwritten(ip, offset, size);
+ } else if (offset + size > ip->i_d.di_size) {
+ struct xfs_trans *tp;
+ int error;
- ioend->io_offset = offset;
- ioend->io_size = size;
- if (private && size > 0)
- ioend->io_type = XFS_IO_UNWRITTEN;
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return;
+ }
- xfs_finish_ioend_sync(ioend);
+ xfs_setfilesize(ip, tp, offset, size);
+ }
}
STATIC ssize_t
@@ -1507,39 +1503,16 @@
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- struct xfs_ioend *ioend = NULL;
- ssize_t ret;
if (rw & WRITE) {
- size_t size = iov_iter_count(iter);
-
- /*
- * We cannot preallocate a size update transaction here as we
- * don't know whether allocation is necessary or not. Hence we
- * can only tell IO completion that one is necessary if we are
- * not doing unwritten extent conversion.
- */
- iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
- if (offset + size > XFS_I(inode)->i_d.di_size)
- ioend->io_isdirect = 1;
-
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
offset, xfs_get_blocks_direct,
xfs_end_io_direct_write, NULL,
DIO_ASYNC_EXTEND);
- if (ret != -EIOCBQUEUED && iocb->private)
- goto out_destroy_ioend;
- } else {
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
- offset, xfs_get_blocks_direct,
- NULL, NULL, 0);
}
-
- return ret;
-
-out_destroy_ioend:
- xfs_destroy_ioend(ioend);
- return ret;
+ return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+ offset, xfs_get_blocks_direct,
+ NULL, NULL, 0);
}
/*