Blame - fs/xfs/xfs_file.c - SHIFTPHONES/mainline/linux

blob: 38528e59030eefeca188d65a7c27e11f5e4632f7 [file] [log] [blame]

Dave Chinner	0b61f8a	2018-06-05 19:42:14 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	3	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				4	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	#include "xfs.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	7	#include "xfs_fs.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame]	8	#include "xfs_shared.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	9	#include "xfs_format.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	10	#include "xfs_log_format.h"
				11	#include "xfs_trans_resv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	12	#include "xfs_mount.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	#include "xfs_inode.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	14	#include "xfs_trans.h"
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	15	#include "xfs_inode_item.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	16	#include "xfs_bmap.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	17	#include "xfs_bmap_util.h"
Dave Chinner	2b9ab5a	2013-08-12 20:49:37 +1000	[diff] [blame]	18	#include "xfs_dir2.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	19	#include "xfs_dir2_priv.h"
Christoph Hellwig	ddcd856	2008-12-03 07:55:34 -0500	[diff] [blame]	20	#include "xfs_ioctl.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	21	#include "xfs_trace.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	22	#include "xfs_log.h"
Brian Foster	dc06f398	2014-07-24 19:49:28 +1000	[diff] [blame]	23	#include "xfs_icache.h"
Christoph Hellwig	781355c	2015-02-16 11:59:50 +1100	[diff] [blame]	24	#include "xfs_pnfs.h"
Christoph Hellwig	68a9f5e	2016-06-21 09:53:44 +1000	[diff] [blame]	25	#include "xfs_iomap.h"
Darrick J. Wong	0613f16	2016-10-03 09:11:37 -0700	[diff] [blame]	26	#include "xfs_reflink.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	28	#include <linux/falloc.h>
Tejun Heo	66114ca	2015-05-22 17:13:32 -0400	[diff] [blame]	29	#include <linux/backing-dev.h>
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	30	#include <linux/mman.h>
Jan Kara	40144e4	2019-08-29 09:04:12 -0700	[diff] [blame]	31	#include <linux/fadvise.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	32
Alexey Dobriyan	f0f37e2f	2009-09-27 22:29:37 +0400	[diff] [blame]	33	static const struct vm_operations_struct xfs_file_vm_ops;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34
Darrick J. Wong	25219db	2020-10-09 16:42:59 -0700	[diff] [blame]	35	/*
				36	* Decide if the given file range is aligned to the size of the fundamental
				37	* allocation unit for the file.
				38	*/
				39	static bool
				40	xfs_is_falloc_aligned(
				41	struct xfs_inode *ip,
				42	loff_t pos,
				43	long long int len)
				44	{
				45	struct xfs_mount *mp = ip->i_mount;
				46	uint64_t mask;
				47
				48	if (XFS_IS_REALTIME_INODE(ip)) {
				49	if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
				50	u64 rextbytes;
				51	u32 mod;
				52
				53	rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
				54	div_u64_rem(pos, rextbytes, &mod);
				55	if (mod)
				56	return false;
				57	div_u64_rem(len, rextbytes, &mod);
				58	return mod == 0;
				59	}
				60	mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
				61	} else {
				62	mask = mp->m_sb.sb_blocksize - 1;
				63	}
				64
				65	return !((pos \| len) & mask);
				66	}
				67
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	68	int
				69	xfs_update_prealloc_flags(
				70	struct xfs_inode *ip,
				71	enum xfs_prealloc_flags flags)
				72	{
				73	struct xfs_trans *tp;
				74	int error;
				75
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	76	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
				77	0, 0, 0, &tp);
				78	if (error)
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	79	return error;
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	80
				81	xfs_ilock(ip, XFS_ILOCK_EXCL);
				82	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				83
				84	if (!(flags & XFS_PREALLOC_INVISIBLE)) {
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	85	VFS_I(ip)->i_mode &= ~S_ISUID;
				86	if (VFS_I(ip)->i_mode & S_IXGRP)
				87	VFS_I(ip)->i_mode &= ~S_ISGID;
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	88	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				89	}
				90
				91	if (flags & XFS_PREALLOC_SET)
				92	ip->i_d.di_flags \|= XFS_DIFLAG_PREALLOC;
				93	if (flags & XFS_PREALLOC_CLEAR)
				94	ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
				95
				96	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				97	if (flags & XFS_PREALLOC_SYNC)
				98	xfs_trans_set_sync(tp);
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	99	return xfs_trans_commit(tp);
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	100	}
				101
Christoph Hellwig	1da2f2d	2011-10-02 14:25:16 +0000	[diff] [blame]	102	/*
				103	* Fsync operations on directories are much simpler than on regular files,
				104	* as there is no file data to flush, and thus also no need for explicit
				105	* cache flush operations, and there are no non-transaction metadata updates
				106	* on directories either.
				107	*/
				108	STATIC int
				109	xfs_dir_fsync(
				110	struct file *file,
				111	loff_t start,
				112	loff_t end,
				113	int datasync)
				114	{
				115	struct xfs_inode *ip = XFS_I(file->f_mapping->host);
Christoph Hellwig	1da2f2d	2011-10-02 14:25:16 +0000	[diff] [blame]	116
				117	trace_xfs_dir_fsync(ip);
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	118	return xfs_log_force_inode(ip);
Christoph Hellwig	1da2f2d	2011-10-02 14:25:16 +0000	[diff] [blame]	119	}
				120
Christoph Hellwig	f22c7f8	2021-01-22 16:48:25 -0800	[diff] [blame]	121	static xfs_lsn_t
				122	xfs_fsync_lsn(
				123	struct xfs_inode *ip,
				124	bool datasync)
				125	{
				126	if (!xfs_ipincount(ip))
				127	return 0;
				128	if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
				129	return 0;
				130	return ip->i_itemp->ili_last_lsn;
				131	}
				132
				133	/*
				134	* All metadata updates are logged, which means that we just have to flush the
				135	* log up to the latest LSN that touched the inode.
				136	*
				137	* If we have concurrent fsync/fdatasync() calls, we need them to all block on
				138	* the log force before we clear the ili_fsync_fields field. This ensures that
				139	* we don't get a racing sync operation that does not wait for the metadata to
				140	* hit the journal before returning. If we race with clearing ili_fsync_fields,
				141	* then all that will happen is the log force will do nothing as the lsn will
				142	* already be on disk. We can't race with setting ili_fsync_fields because that
				143	* is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
				144	* shared until after the ili_fsync_fields is cleared.
				145	*/
				146	static int
				147	xfs_fsync_flush_log(
				148	struct xfs_inode *ip,
				149	bool datasync,
				150	int *log_flushed)
				151	{
				152	int error = 0;
				153	xfs_lsn_t lsn;
				154
				155	xfs_ilock(ip, XFS_ILOCK_SHARED);
				156	lsn = xfs_fsync_lsn(ip, datasync);
				157	if (lsn) {
				158	error = xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC,
				159	log_flushed);
				160
				161	spin_lock(&ip->i_itemp->ili_lock);
				162	ip->i_itemp->ili_fsync_fields = 0;
				163	spin_unlock(&ip->i_itemp->ili_lock);
				164	}
				165	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				166	return error;
				167	}
				168
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	169	STATIC int
				170	xfs_file_fsync(
				171	struct file *file,
Josef Bacik	02c24a8	2011-07-16 20:44:56 -0400	[diff] [blame]	172	loff_t start,
				173	loff_t end,
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	174	int datasync)
				175	{
Christoph Hellwig	f22c7f8	2021-01-22 16:48:25 -0800	[diff] [blame]	176	struct xfs_inode *ip = XFS_I(file->f_mapping->host);
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	177	struct xfs_mount *mp = ip->i_mount;
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	178	int error = 0;
				179	int log_flushed = 0;
				180
Christoph Hellwig	cca28fb	2010-06-24 11:57:09 +1000	[diff] [blame]	181	trace_xfs_file_fsync(ip);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	182
Jeff Layton	1b18027	2017-07-06 07:02:30 -0400	[diff] [blame]	183	error = file_write_and_wait_range(file, start, end);
Josef Bacik	02c24a8	2011-07-16 20:44:56 -0400	[diff] [blame]	184	if (error)
				185	return error;
				186
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	187	if (XFS_FORCED_SHUTDOWN(mp))
Eric Sandeen	b474c7a	2014-06-22 15:04:54 +1000	[diff] [blame]	188	return -EIO;
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	189
				190	xfs_iflags_clear(ip, XFS_ITRUNCATED);
				191
Dave Chinner	2291dab	2016-12-09 16:49:54 +1100	[diff] [blame]	192	/*
				193	* If we have an RT and/or log subvolume we need to make sure to flush
				194	* the write cache the device used for file data first. This is to
				195	* ensure newly written file data make it to disk before logging the new
				196	* inode size in case of an extending write.
				197	*/
				198	if (XFS_IS_REALTIME_INODE(ip))
				199	xfs_blkdev_issue_flush(mp->m_rtdev_targp);
				200	else if (mp->m_logdev_targp != mp->m_ddev_targp)
				201	xfs_blkdev_issue_flush(mp->m_ddev_targp);
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	202
Christoph Hellwig	ae29e42	2021-01-22 16:48:25 -0800	[diff] [blame]	203	/*
				204	* Any inode that has dirty modifications in the log is pinned. The
				205	* racy check here for a pinned inode while not catch modifications
				206	* that happen concurrently to the fsync call, but fsync semantics
				207	* only require to sync previously completed I/O.
				208	*/
				209	if (xfs_ipincount(ip))
				210	error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
Christoph Hellwig	b103705	2011-09-19 14:55:51 +0000	[diff] [blame]	211
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	212	/*
				213	* If we only have a single device, and the log force about was
				214	* a no-op we might have to flush the data device cache here.
				215	* This can only happen for fdatasync/O_DSYNC if we were overwriting
				216	* an already allocated file and thus do not have any metadata to
				217	* commit.
				218	*/
Dave Chinner	2291dab	2016-12-09 16:49:54 +1100	[diff] [blame]	219	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
				220	mp->m_logdev_targp == mp->m_ddev_targp)
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	221	xfs_blkdev_issue_flush(mp->m_ddev_targp);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	222
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	223	return error;
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	224	}
				225
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	226	STATIC ssize_t
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	227	xfs_file_dio_aio_read(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	228	struct kiocb *iocb,
Al Viro	b4f5d2c	2014-04-02 14:37:59 -0400	[diff] [blame]	229	struct iov_iter *to)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	230	{
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	231	struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	232	size_t count = iov_iter_count(to);
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	233	ssize_t ret;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	234
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	235	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	236
Christoph Hellwig	f1285ff	2016-07-20 11:36:57 +1000	[diff] [blame]	237	if (!count)
				238	return 0; /* skip atime */
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	239
Christoph Hellwig	a447d7c	2016-10-03 09:47:34 +1100	[diff] [blame]	240	file_accessed(iocb->ki_filp);
				241
Christoph Hellwig	7b53b86	2020-01-15 09:11:17 -0800	[diff] [blame]	242	if (iocb->ki_flags & IOCB_NOWAIT) {
				243	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
				244	return -EAGAIN;
				245	} else {
				246	xfs_ilock(ip, XFS_IOLOCK_SHARED);
				247	}
Christoph Hellwig	690c2a3	2019-10-19 09:09:45 -0700	[diff] [blame]	248	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL,
				249	is_sync_kiocb(iocb));
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	250	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	251
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	252	return ret;
				253	}
				254
Arnd Bergmann	f021bd0	2016-07-22 09:50:55 +1000	[diff] [blame]	255	static noinline ssize_t
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	256	xfs_file_dax_read(
				257	struct kiocb *iocb,
				258	struct iov_iter *to)
				259	{
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	260	struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	261	size_t count = iov_iter_count(to);
				262	ssize_t ret = 0;
				263
				264	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
				265
				266	if (!count)
				267	return 0; /* skip atime */
				268
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	269	if (iocb->ki_flags & IOCB_NOWAIT) {
				270	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	271	return -EAGAIN;
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	272	} else {
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	273	xfs_ilock(ip, XFS_IOLOCK_SHARED);
				274	}
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	275
Christoph Hellwig	690c2a3	2019-10-19 09:09:45 -0700	[diff] [blame]	276	ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	277	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	278
Christoph Hellwig	f1285ff	2016-07-20 11:36:57 +1000	[diff] [blame]	279	file_accessed(iocb->ki_filp);
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	280	return ret;
				281	}
				282
				283	STATIC ssize_t
				284	xfs_file_buffered_aio_read(
				285	struct kiocb *iocb,
				286	struct iov_iter *to)
				287	{
				288	struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
				289	ssize_t ret;
				290
				291	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
				292
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	293	if (iocb->ki_flags & IOCB_NOWAIT) {
				294	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
Christoph Hellwig	91f9943	2017-08-29 16:13:20 +0200	[diff] [blame]	295	return -EAGAIN;
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	296	} else {
Christoph Hellwig	91f9943	2017-08-29 16:13:20 +0200	[diff] [blame]	297	xfs_ilock(ip, XFS_IOLOCK_SHARED);
				298	}
Al Viro	b4f5d2c	2014-04-02 14:37:59 -0400	[diff] [blame]	299	ret = generic_file_read_iter(iocb, to);
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	300	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	301
				302	return ret;
				303	}
				304
				305	STATIC ssize_t
				306	xfs_file_read_iter(
				307	struct kiocb *iocb,
				308	struct iov_iter *to)
				309	{
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	310	struct inode *inode = file_inode(iocb->ki_filp);
				311	struct xfs_mount *mp = XFS_I(inode)->i_mount;
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	312	ssize_t ret = 0;
				313
				314	XFS_STATS_INC(mp, xs_read_calls);
				315
				316	if (XFS_FORCED_SHUTDOWN(mp))
				317	return -EIO;
				318
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	319	if (IS_DAX(inode))
				320	ret = xfs_file_dax_read(iocb, to);
				321	else if (iocb->ki_flags & IOCB_DIRECT)
Christoph Hellwig	bbc5a74	2016-07-20 11:35:42 +1000	[diff] [blame]	322	ret = xfs_file_dio_aio_read(iocb, to);
				323	else
				324	ret = xfs_file_buffered_aio_read(iocb, to);
				325
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	326	if (ret > 0)
Bill O'Donnell	ff6d6af	2015-10-12 18:21:22 +1100	[diff] [blame]	327	XFS_STATS_ADD(mp, xs_read_bytes, ret);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	328	return ret;
				329	}
				330
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	331	/*
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	332	* Common pre-write limit and setup checks.
				333	*
Christoph Hellwig	5bf1f26	2011-12-18 20:00:13 +0000	[diff] [blame]	334	* Called with the iolocked held either shared and exclusive according to
				335	* @iolock, and returns with it held. Might upgrade the iolock to exclusive
				336	* if called for a direct write beyond i_size.
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	337	*/
				338	STATIC ssize_t
				339	xfs_file_aio_write_checks(
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	340	struct kiocb *iocb,
				341	struct iov_iter *from,
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	342	int *iolock)
				343	{
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	344	struct file *file = iocb->ki_filp;
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	345	struct inode *inode = file->f_mapping->host;
				346	struct xfs_inode *ip = XFS_I(inode);
Al Viro	3309dd0	2015-04-09 12:55:47 -0400	[diff] [blame]	347	ssize_t error = 0;
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	348	size_t count = iov_iter_count(from);
Brian Foster	3136e8b	2015-10-12 16:02:05 +1100	[diff] [blame]	349	bool drained_dio = false;
Christoph Hellwig	f5c54717	2018-03-13 23:15:32 -0700	[diff] [blame]	350	loff_t isize;
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	351
Dave Chinner	7271d24	2011-08-25 07:17:02 +0000	[diff] [blame]	352	restart:
Al Viro	3309dd0	2015-04-09 12:55:47 -0400	[diff] [blame]	353	error = generic_write_checks(iocb, from);
				354	if (error <= 0)
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	355	return error;
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	356
Dan Williams	69eb5fa	2018-03-20 14:42:38 -0700	[diff] [blame]	357	error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
Christoph Hellwig	781355c	2015-02-16 11:59:50 +1100	[diff] [blame]	358	if (error)
				359	return error;
				360
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	361	/*
				362	* For changing security info in file_remove_privs() we need i_rwsem
				363	* exclusively.
				364	*/
Jan Kara	a6de82c	2015-05-21 16:05:56 +0200	[diff] [blame]	365	if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	366	xfs_iunlock(ip, *iolock);
Jan Kara	a6de82c	2015-05-21 16:05:56 +0200	[diff] [blame]	367	*iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	368	xfs_ilock(ip, *iolock);
Jan Kara	a6de82c	2015-05-21 16:05:56 +0200	[diff] [blame]	369	goto restart;
				370	}
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	371	/*
				372	* If the offset is beyond the size of the file, we need to zero any
				373	* blocks that fall between the existing EOF and the start of this
Christoph Hellwig	2813d68	2011-12-18 20:00:12 +0000	[diff] [blame]	374	* write. If zeroing is needed and we are currently holding the
Christoph Hellwig	467f789	2012-03-27 10:34:47 -0400	[diff] [blame]	375	* iolock shared, we need to update it to exclusive which implies
				376	* having to redo all checks before.
Dave Chinner	b9d5984	2015-04-16 22:03:07 +1000	[diff] [blame]	377	*
				378	* We need to serialise against EOF updates that occur in IO
				379	* completions here. We want to make sure that nobody is changing the
				380	* size while we do this check until we have placed an IO barrier (i.e.
				381	* hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
				382	* The spinlock effectively forms a memory barrier once we have the
				383	* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
				384	* and hence be able to correctly determine if we need to run zeroing.
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	385	*/
Dave Chinner	b9d5984	2015-04-16 22:03:07 +1000	[diff] [blame]	386	spin_lock(&ip->i_flags_lock);
Christoph Hellwig	f5c54717	2018-03-13 23:15:32 -0700	[diff] [blame]	387	isize = i_size_read(inode);
				388	if (iocb->ki_pos > isize) {
Dave Chinner	b9d5984	2015-04-16 22:03:07 +1000	[diff] [blame]	389	spin_unlock(&ip->i_flags_lock);
Brian Foster	3136e8b	2015-10-12 16:02:05 +1100	[diff] [blame]	390	if (!drained_dio) {
				391	if (*iolock == XFS_IOLOCK_SHARED) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	392	xfs_iunlock(ip, *iolock);
Brian Foster	3136e8b	2015-10-12 16:02:05 +1100	[diff] [blame]	393	*iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	394	xfs_ilock(ip, *iolock);
Brian Foster	3136e8b	2015-10-12 16:02:05 +1100	[diff] [blame]	395	iov_iter_reexpand(from, count);
				396	}
Dave Chinner	40c63fb	2015-04-16 22:03:17 +1000	[diff] [blame]	397	/*
				398	* We now have an IO submission barrier in place, but
				399	* AIO can do EOF updates during IO completion and hence
				400	* we now need to wait for all of them to drain. Non-AIO
				401	* DIO will have drained before we are given the
				402	* XFS_IOLOCK_EXCL, and so for most cases this wait is a
				403	* no-op.
				404	*/
				405	inode_dio_wait(inode);
Brian Foster	3136e8b	2015-10-12 16:02:05 +1100	[diff] [blame]	406	drained_dio = true;
Dave Chinner	7271d24	2011-08-25 07:17:02 +0000	[diff] [blame]	407	goto restart;
				408	}
Christoph Hellwig	f5c54717	2018-03-13 23:15:32 -0700	[diff] [blame]	409
				410	trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
				411	error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	412	NULL, &xfs_buffered_write_iomap_ops);
Christoph Hellwig	467f789	2012-03-27 10:34:47 -0400	[diff] [blame]	413	if (error)
				414	return error;
Dave Chinner	b9d5984	2015-04-16 22:03:07 +1000	[diff] [blame]	415	} else
				416	spin_unlock(&ip->i_flags_lock);
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	417
Amir Goldstein	8c3f406	2019-06-05 08:04:50 -0700	[diff] [blame]	418	return file_modified(file);
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	419	}
				420
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	421	static int
				422	xfs_dio_write_end_io(
				423	struct kiocb *iocb,
				424	ssize_t size,
Matthew Bobrowski	6fe7b99	2019-09-19 15:32:44 -0700	[diff] [blame]	425	int error,
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	426	unsigned flags)
				427	{
				428	struct inode *inode = file_inode(iocb->ki_filp);
				429	struct xfs_inode *ip = XFS_I(inode);
				430	loff_t offset = iocb->ki_pos;
Christoph Hellwig	73d30d4	2019-06-28 19:31:38 -0700	[diff] [blame]	431	unsigned int nofs_flag;
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	432
				433	trace_xfs_end_io_direct_write(ip, offset, size);
				434
				435	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				436	return -EIO;
				437
Matthew Bobrowski	6fe7b99	2019-09-19 15:32:44 -0700	[diff] [blame]	438	if (error)
				439	return error;
				440	if (!size)
				441	return 0;
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	442
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	443	/*
				444	* Capture amount written on completion as we can't reliably account
				445	* for it on submission.
				446	*/
				447	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
				448
Christoph Hellwig	73d30d4	2019-06-28 19:31:38 -0700	[diff] [blame]	449	/*
				450	* We can allocate memory here while doing writeback on behalf of
				451	* memory reclaim. To avoid memory allocation deadlocks set the
				452	* task-wide nofs context for the following operations.
				453	*/
				454	nofs_flag = memalloc_nofs_save();
				455
Eryu Guan	ee70daa	2017-09-21 11:26:18 -0700	[diff] [blame]	456	if (flags & IOMAP_DIO_COW) {
				457	error = xfs_reflink_end_cow(ip, offset, size);
				458	if (error)
Christoph Hellwig	73d30d4	2019-06-28 19:31:38 -0700	[diff] [blame]	459	goto out;
Eryu Guan	ee70daa	2017-09-21 11:26:18 -0700	[diff] [blame]	460	}
				461
				462	/*
				463	* Unwritten conversion updates the in-core isize after extent
				464	* conversion but before updating the on-disk size. Updating isize any
				465	* earlier allows a racing dio read to find unwritten extents before
				466	* they are converted.
				467	*/
Christoph Hellwig	73d30d4	2019-06-28 19:31:38 -0700	[diff] [blame]	468	if (flags & IOMAP_DIO_UNWRITTEN) {
				469	error = xfs_iomap_write_unwritten(ip, offset, size, true);
				470	goto out;
				471	}
Eryu Guan	ee70daa	2017-09-21 11:26:18 -0700	[diff] [blame]	472
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	473	/*
				474	* We need to update the in-core inode size here so that we don't end up
				475	* with the on-disk inode size being outside the in-core inode size. We
				476	* have no other method of updating EOF for AIO, so always do it here
				477	* if necessary.
				478	*
				479	* We need to lock the test/set EOF update as we can be racing with
				480	* other IO completions here to update the EOF. Failing to serialise
				481	* here can result in EOF moving backwards and Bad Things Happen when
				482	* that occurs.
				483	*/
				484	spin_lock(&ip->i_flags_lock);
				485	if (offset + size > i_size_read(inode)) {
				486	i_size_write(inode, offset + size);
Eryu Guan	ee70daa	2017-09-21 11:26:18 -0700	[diff] [blame]	487	spin_unlock(&ip->i_flags_lock);
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	488	error = xfs_setfilesize(ip, offset, size);
Eryu Guan	ee70daa	2017-09-21 11:26:18 -0700	[diff] [blame]	489	} else {
				490	spin_unlock(&ip->i_flags_lock);
				491	}
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	492
Christoph Hellwig	73d30d4	2019-06-28 19:31:38 -0700	[diff] [blame]	493	out:
				494	memalloc_nofs_restore(nofs_flag);
Christoph Hellwig	acdda3a	2016-11-30 14:37:15 +1100	[diff] [blame]	495	return error;
				496	}
				497
Christoph Hellwig	838c4f3	2019-09-19 15:32:45 -0700	[diff] [blame]	498	static const struct iomap_dio_ops xfs_dio_write_ops = {
				499	.end_io = xfs_dio_write_end_io,
				500	};
				501
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	502	/*
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	503	* xfs_file_dio_aio_write - handle direct IO writes
				504	*
				505	* Lock the inode appropriately to prepare for and issue a direct IO write.
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	506	* By separating it from the buffered write path we remove all the tricky to
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	507	* follow locking changes and looping.
				508	*
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	509	* If there are cached pages or we're extending the file, we need IOLOCK_EXCL
				510	* until we're sure the bytes at the new EOF have been zeroed and/or the cached
				511	* pages are flushed out.
				512	*
				513	* In most cases the direct IO writes will be done holding IOLOCK_SHARED
				514	* allowing them to be done in parallel with reads and other direct IO writes.
				515	* However, if the IO is not aligned to filesystem blocks, the direct IO layer
				516	* needs to do sub-block zeroing and that requires serialisation against other
				517	* direct IOs to the same block. In this case we need to serialise the
				518	* submission of the unaligned IOs so that we don't get racing block zeroing in
				519	* the dio layer. To avoid the problem with aio, we also need to wait for
				520	* outstanding IOs to complete so that unwritten extent conversion is completed
				521	* before we try to map the overlapping block. This is currently implemented by
Christoph Hellwig	4a06fd2	2011-08-23 08:28:13 +0000	[diff] [blame]	522	* hitting it with a big hammer (i.e. inode_dio_wait()).
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	523	*
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	524	* Returns with locks held indicated by @iolock and errors indicated by
				525	* negative return values.
				526	*/
				527	STATIC ssize_t
				528	xfs_file_dio_aio_write(
				529	struct kiocb *iocb,
Al Viro	b318891	2014-04-02 07:06:30 -0400	[diff] [blame]	530	struct iov_iter *from)
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	531	{
				532	struct file *file = iocb->ki_filp;
				533	struct address_space *mapping = file->f_mapping;
				534	struct inode *inode = mapping->host;
				535	struct xfs_inode *ip = XFS_I(inode);
				536	struct xfs_mount *mp = ip->i_mount;
				537	ssize_t ret = 0;
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	538	int unaligned_io = 0;
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	539	int iolock;
Al Viro	b318891	2014-04-02 07:06:30 -0400	[diff] [blame]	540	size_t count = iov_iter_count(from);
Christoph Hellwig	f9acc19	2019-10-24 22:25:38 -0700	[diff] [blame]	541	struct xfs_buftarg *target = xfs_inode_buftarg(ip);
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	542
Eric Sandeen	7c71ee7	2014-01-21 16:46:23 -0600	[diff] [blame]	543	/* DIO must be aligned to device logical sector size */
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	544	if ((iocb->ki_pos \| count) & target->bt_logical_sectormask)
Eric Sandeen	b474c7a	2014-06-22 15:04:54 +1000	[diff] [blame]	545	return -EINVAL;
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	546
Christoph Hellwig	0ee7a3f	2016-10-20 15:44:14 +1100	[diff] [blame]	547	/*
				548	* Don't take the exclusive iolock here unless the I/O is unaligned to
				549	* the file system block size. We don't need to consider the EOF
				550	* extension case here because xfs_file_aio_write_checks() will relock
				551	* the inode as necessary for EOF zeroing cases and fill out the new
				552	* inode size as appropriate.
				553	*/
Christoph Hellwig	1371271	2016-04-07 08:51:57 -0700	[diff] [blame]	554	if ((iocb->ki_pos & mp->m_blockmask) \|\|
Christoph Hellwig	0ee7a3f	2016-10-20 15:44:14 +1100	[diff] [blame]	555	((iocb->ki_pos + count) & mp->m_blockmask)) {
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	556	unaligned_io = 1;
Christoph Hellwig	54a4ef8	2017-02-06 13:00:54 -0800	[diff] [blame]	557
				558	/*
				559	* We can't properly handle unaligned direct I/O to reflink
				560	* files yet, as we can't unshare a partial block.
				561	*/
Christoph Hellwig	66ae56a	2019-02-18 09:38:49 -0800	[diff] [blame]	562	if (xfs_is_cow_inode(ip)) {
Christoph Hellwig	54a4ef8	2017-02-06 13:00:54 -0800	[diff] [blame]	563	trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
Christoph Hellwig	80e543a	2020-07-23 22:45:58 -0700	[diff] [blame]	564	return -ENOTBLK;
Christoph Hellwig	54a4ef8	2017-02-06 13:00:54 -0800	[diff] [blame]	565	}
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	566	iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig	0ee7a3f	2016-10-20 15:44:14 +1100	[diff] [blame]	567	} else {
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	568	iolock = XFS_IOLOCK_SHARED;
Christoph Hellwig	c58cb16	2011-08-27 14:42:53 +0000	[diff] [blame]	569	}
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	570
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	571	if (iocb->ki_flags & IOCB_NOWAIT) {
Darrick J. Wong	1fdeaea	2019-04-17 08:49:36 -0700	[diff] [blame]	572	/* unaligned dio always waits, bail */
				573	if (unaligned_io)
				574	return -EAGAIN;
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	575	if (!xfs_ilock_nowait(ip, iolock))
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	576	return -EAGAIN;
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	577	} else {
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	578	xfs_ilock(ip, iolock);
				579	}
Christoph Hellwig	0ee7a3f	2016-10-20 15:44:14 +1100	[diff] [blame]	580
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	581	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	582	if (ret)
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	583	goto out;
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	584	count = iov_iter_count(from);
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	585
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	586	/*
Brian Foster	2032a8a	2019-03-25 17:01:45 -0700	[diff] [blame]	587	* If we are doing unaligned IO, we can't allow any other overlapping IO
				588	* in-flight at the same time or we risk data corruption. Wait for all
				589	* other IO to drain before we submit. If the IO is aligned, demote the
				590	* iolock if we had to take the exclusive lock in
				591	* xfs_file_aio_write_checks() for other reasons.
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	592	*/
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	593	if (unaligned_io) {
Brian Foster	2032a8a	2019-03-25 17:01:45 -0700	[diff] [blame]	594	inode_dio_wait(inode);
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	595	} else if (iolock == XFS_IOLOCK_EXCL) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	596	xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	597	iolock = XFS_IOLOCK_SHARED;
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	598	}
				599
Christoph Hellwig	3176c3e	2016-07-20 11:31:42 +1000	[diff] [blame]	600	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
Brian Foster	2032a8a	2019-03-25 17:01:45 -0700	[diff] [blame]	601	/*
Jan Kara	906753b	2019-10-15 08:43:43 -0700	[diff] [blame]	602	* If unaligned, this is the only IO in-flight. Wait on it before we
				603	* release the iolock to prevent subsequent overlapping IO.
Brian Foster	2032a8a	2019-03-25 17:01:45 -0700	[diff] [blame]	604	*/
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	605	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
				606	&xfs_dio_write_ops,
Jan Kara	906753b	2019-10-15 08:43:43 -0700	[diff] [blame]	607	is_sync_kiocb(iocb) \|\| unaligned_io);
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	608	out:
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	609	xfs_iunlock(ip, iolock);
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	610
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	611	/*
Christoph Hellwig	60263d5	2020-07-23 22:45:59 -0700	[diff] [blame]	612	* No fallback to buffered IO after short writes for XFS, direct I/O
				613	* will either complete fully or return an error.
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	614	*/
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	615	ASSERT(ret < 0 \|\| ret == count);
				616	return ret;
				617	}
				618
Arnd Bergmann	f021bd0	2016-07-22 09:50:55 +1000	[diff] [blame]	619	static noinline ssize_t
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	620	xfs_file_dax_write(
				621	struct kiocb *iocb,
				622	struct iov_iter *from)
				623	{
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	624	struct inode *inode = iocb->ki_filp->f_mapping->host;
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	625	struct xfs_inode *ip = XFS_I(inode);
Christoph Hellwig	17879e8	2016-09-19 11:24:50 +1000	[diff] [blame]	626	int iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	627	ssize_t ret, error = 0;
				628	size_t count;
				629	loff_t pos;
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	630
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	631	if (iocb->ki_flags & IOCB_NOWAIT) {
				632	if (!xfs_ilock_nowait(ip, iolock))
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	633	return -EAGAIN;
Christoph Hellwig	942491c	2017-10-23 18:31:50 -0700	[diff] [blame]	634	} else {
Goldwyn Rodrigues	29a5d29	2017-06-20 07:05:48 -0500	[diff] [blame]	635	xfs_ilock(ip, iolock);
				636	}
				637
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	638	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
				639	if (ret)
				640	goto out;
				641
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	642	pos = iocb->ki_pos;
				643	count = iov_iter_count(from);
Dave Chinner	8b2180b	2016-08-17 08:31:33 +1000	[diff] [blame]	644
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	645	trace_xfs_file_dax_write(ip, count, pos);
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	646	ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops);
Christoph Hellwig	6c31f495	2016-09-19 11:28:38 +1000	[diff] [blame]	647	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
				648	i_size_write(inode, iocb->ki_pos);
				649	error = xfs_setfilesize(ip, pos, ret);
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	650	}
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	651	out:
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	652	xfs_iunlock(ip, iolock);
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	653	if (error)
				654	return error;
				655
				656	if (ret > 0) {
				657	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
				658
				659	/* Handle various SYNC-type writes */
				660	ret = generic_write_sync(iocb, ret);
				661	}
				662	return ret;
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	663	}
				664
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	665	STATIC ssize_t
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	666	xfs_file_buffered_aio_write(
				667	struct kiocb *iocb,
Al Viro	b318891	2014-04-02 07:06:30 -0400	[diff] [blame]	668	struct iov_iter *from)
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	669	{
				670	struct file *file = iocb->ki_filp;
				671	struct address_space *mapping = file->f_mapping;
				672	struct inode *inode = mapping->host;
				673	struct xfs_inode *ip = XFS_I(inode);
				674	ssize_t ret;
Darrick J. Wong	a636b1d	2021-01-22 16:48:34 -0800	[diff] [blame]	675	bool cleared_space = false;
Brian Foster	c315509	2017-01-27 23:22:56 -0800	[diff] [blame]	676	int iolock;
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	677
Christoph Hellwig	91f9943	2017-08-29 16:13:20 +0200	[diff] [blame]	678	if (iocb->ki_flags & IOCB_NOWAIT)
				679	return -EOPNOTSUPP;
				680
Brian Foster	c315509	2017-01-27 23:22:56 -0800	[diff] [blame]	681	write_retry:
				682	iolock = XFS_IOLOCK_EXCL;
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	683	xfs_ilock(ip, iolock);
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	684
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	685	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	686	if (ret)
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	687	goto out;
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	688
				689	/* We can write back this queue in page reclaim */
Christoph Hellwig	de1414a	2015-01-14 10:42:36 +0100	[diff] [blame]	690	current->backing_dev_info = inode_to_bdi(inode);
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	691
Christoph Hellwig	3176c3e	2016-07-20 11:31:42 +1000	[diff] [blame]	692	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	693	ret = iomap_file_buffered_write(iocb, from,
				694	&xfs_buffered_write_iomap_ops);
Al Viro	0a64bc2	2014-02-11 22:25:22 -0500	[diff] [blame]	695	if (likely(ret >= 0))
Al Viro	99733fa	2015-04-07 14:25:18 -0400	[diff] [blame]	696	iocb->ki_pos += ret;
Brian Foster	dc06f398	2014-07-24 19:49:28 +1000	[diff] [blame]	697
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	698	/*
Brian Foster	dc06f398	2014-07-24 19:49:28 +1000	[diff] [blame]	699	* If we hit a space limit, try to free up some lingering preallocated
				700	* space before returning an error. In the case of ENOSPC, first try to
				701	* write back all dirty inodes to free up some of the excess reserved
				702	* metadata space. This reduces the chances that the eofblocks scan
				703	* waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
				704	* also behaves as a filter to prevent too many eofblocks scans from
Darrick J. Wong	111068f	2021-01-22 16:48:36 -0800	[diff] [blame]	705	* running at the same time. Use a synchronous scan to increase the
				706	* effectiveness of the scan.
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	707	*/
Darrick J. Wong	a636b1d	2021-01-22 16:48:34 -0800	[diff] [blame]	708	if (ret == -EDQUOT && !cleared_space) {
Brian Foster	c315509	2017-01-27 23:22:56 -0800	[diff] [blame]	709	xfs_iunlock(ip, iolock);
Darrick J. Wong	111068f	2021-01-22 16:48:36 -0800	[diff] [blame]	710	xfs_blockgc_free_quota(ip, XFS_EOF_FLAGS_SYNC);
				711	cleared_space = true;
				712	goto write_retry;
Darrick J. Wong	a636b1d	2021-01-22 16:48:34 -0800	[diff] [blame]	713	} else if (ret == -ENOSPC && !cleared_space) {
Brian Foster	dc06f398	2014-07-24 19:49:28 +1000	[diff] [blame]	714	struct xfs_eofblocks eofb = {0};
				715
Darrick J. Wong	a636b1d	2021-01-22 16:48:34 -0800	[diff] [blame]	716	cleared_space = true;
Dave Chinner	9aa0500	2012-10-08 21:56:04 +1100	[diff] [blame]	717	xfs_flush_inodes(ip->i_mount);
Brian Foster	c315509	2017-01-27 23:22:56 -0800	[diff] [blame]	718
				719	xfs_iunlock(ip, iolock);
Brian Foster	dc06f398	2014-07-24 19:49:28 +1000	[diff] [blame]	720	eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
Darrick J. Wong	85c5b27	2021-01-22 16:48:39 -0800	[diff] [blame^]	721	xfs_blockgc_free_space(ip->i_mount, &eofb);
Dave Chinner	9aa0500	2012-10-08 21:56:04 +1100	[diff] [blame]	722	goto write_retry;
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	723	}
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	724
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	725	current->backing_dev_info = NULL;
Christoph Hellwig	d060646	2011-12-18 20:00:14 +0000	[diff] [blame]	726	out:
Brian Foster	c315509	2017-01-27 23:22:56 -0800	[diff] [blame]	727	if (iolock)
				728	xfs_iunlock(ip, iolock);
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	729
				730	if (ret > 0) {
				731	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
				732	/* Handle various SYNC-type writes */
				733	ret = generic_write_sync(iocb, ret);
				734	}
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	735	return ret;
				736	}
				737
				738	STATIC ssize_t
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	739	xfs_file_write_iter(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	740	struct kiocb *iocb,
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	741	struct iov_iter *from)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	742	{
				743	struct file *file = iocb->ki_filp;
				744	struct address_space *mapping = file->f_mapping;
				745	struct inode *inode = mapping->host;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	746	struct xfs_inode *ip = XFS_I(inode);
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	747	ssize_t ret;
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	748	size_t ocount = iov_iter_count(from);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	749
Bill O'Donnell	ff6d6af	2015-10-12 18:21:22 +1100	[diff] [blame]	750	XFS_STATS_INC(ip->i_mount, xs_write_calls);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	751
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	752	if (ocount == 0)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	753	return 0;
				754
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	755	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				756	return -EIO;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	757
Christoph Hellwig	16d4d43	2016-07-20 11:38:55 +1000	[diff] [blame]	758	if (IS_DAX(inode))
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	759	return xfs_file_dax_write(iocb, from);
				760
				761	if (iocb->ki_flags & IOCB_DIRECT) {
Darrick J. Wong	0613f16	2016-10-03 09:11:37 -0700	[diff] [blame]	762	/*
				763	* Allow a directio write to fall back to a buffered
				764	* write only in the case that we're doing a reflink
				765	* CoW. In all other directio scenarios we do not
				766	* allow an operation to fall back to buffered mode.
				767	*/
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	768	ret = xfs_file_dio_aio_write(iocb, from);
Christoph Hellwig	80e543a	2020-07-23 22:45:58 -0700	[diff] [blame]	769	if (ret != -ENOTBLK)
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	770	return ret;
Darrick J. Wong	0613f16	2016-10-03 09:11:37 -0700	[diff] [blame]	771	}
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	772
Dave Chinner	ed5c3e6	2018-05-02 12:54:52 -0700	[diff] [blame]	773	return xfs_file_buffered_aio_write(iocb, from);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	774	}
				775
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	776	static void
				777	xfs_wait_dax_page(
Dave Jiang	e25ff83	2018-08-10 08:48:18 -0700	[diff] [blame]	778	struct inode *inode)
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	779	{
				780	struct xfs_inode *ip = XFS_I(inode);
				781
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	782	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
				783	schedule();
				784	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
				785	}
				786
				787	static int
				788	xfs_break_dax_layouts(
				789	struct inode *inode,
Dave Jiang	e25ff83	2018-08-10 08:48:18 -0700	[diff] [blame]	790	bool *retry)
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	791	{
				792	struct page *page;
				793
				794	ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
				795
				796	page = dax_layout_busy_page(inode->i_mapping);
				797	if (!page)
				798	return 0;
				799
Dave Jiang	e25ff83	2018-08-10 08:48:18 -0700	[diff] [blame]	800	*retry = true;
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	801	return ___wait_var_event(&page->_refcount,
				802	atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
Dave Jiang	e25ff83	2018-08-10 08:48:18 -0700	[diff] [blame]	803	0, 0, xfs_wait_dax_page(inode));
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	804	}
				805
Dan Williams	69eb5fa	2018-03-20 14:42:38 -0700	[diff] [blame]	806	int
				807	xfs_break_layouts(
				808	struct inode *inode,
				809	uint *iolock,
				810	enum layout_break_reason reason)
				811	{
				812	bool retry;
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	813	int error;
Dan Williams	69eb5fa	2018-03-20 14:42:38 -0700	[diff] [blame]	814
				815	ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL));
				816
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	817	do {
				818	retry = false;
				819	switch (reason) {
				820	case BREAK_UNMAP:
Eric Sandeen	a4722a6	2018-07-11 22:26:36 -0700	[diff] [blame]	821	error = xfs_break_dax_layouts(inode, &retry);
Dan Williams	d6dc57e	2018-05-09 15:47:49 -0700	[diff] [blame]	822	if (error \|\| retry)
				823	break;
				824	/* fall through */
				825	case BREAK_WRITE:
				826	error = xfs_break_leased_layouts(inode, iolock, &retry);
				827	break;
				828	default:
				829	WARN_ON_ONCE(1);
				830	error = -EINVAL;
				831	}
				832	} while (error == 0 && retry);
				833
				834	return error;
Dan Williams	69eb5fa	2018-03-20 14:42:38 -0700	[diff] [blame]	835	}
				836
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	837	#define XFS_FALLOC_FL_SUPPORTED \
				838	(FALLOC_FL_KEEP_SIZE \| FALLOC_FL_PUNCH_HOLE \| \
				839	FALLOC_FL_COLLAPSE_RANGE \| FALLOC_FL_ZERO_RANGE \| \
Darrick J. Wong	98cc2db	2016-10-03 09:11:43 -0700	[diff] [blame]	840	FALLOC_FL_INSERT_RANGE \| FALLOC_FL_UNSHARE_RANGE)
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	841
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	842	STATIC long
				843	xfs_file_fallocate(
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	844	struct file *file,
				845	int mode,
				846	loff_t offset,
				847	loff_t len)
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	848	{
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	849	struct inode *inode = file_inode(file);
				850	struct xfs_inode *ip = XFS_I(inode);
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	851	long error;
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	852	enum xfs_prealloc_flags flags = 0;
Dan Williams	c63a8ea	2018-03-12 14:12:29 -0700	[diff] [blame]	853	uint iolock = XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL;
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	854	loff_t new_size = 0;
Thomas Meyer	749f24f	2017-10-09 11:38:54 -0700	[diff] [blame]	855	bool do_file_insert = false;
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	856
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	857	if (!S_ISREG(inode->i_mode))
				858	return -EINVAL;
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	859	if (mode & ~XFS_FALLOC_FL_SUPPORTED)
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	860	return -EOPNOTSUPP;
				861
Christoph Hellwig	781355c	2015-02-16 11:59:50 +1100	[diff] [blame]	862	xfs_ilock(ip, iolock);
Dan Williams	69eb5fa	2018-03-20 14:42:38 -0700	[diff] [blame]	863	error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
Christoph Hellwig	781355c	2015-02-16 11:59:50 +1100	[diff] [blame]	864	if (error)
				865	goto out_unlock;
				866
Dave Chinner	249bd90	2019-10-29 13:04:32 -0700	[diff] [blame]	867	/*
				868	* Must wait for all AIO to complete before we continue as AIO can
				869	* change the file size on completion without holding any locks we
				870	* currently hold. We must do this first because AIO can update both
				871	* the on disk and in memory inode sizes, and the operations that follow
				872	* require the in-memory size to be fully up-to-date.
				873	*/
				874	inode_dio_wait(inode);
				875
				876	/*
				877	* Now AIO and DIO has drained we flush and (if necessary) invalidate
				878	* the cached range over the first operation we are about to run.
				879	*
				880	* We care about zero and collapse here because they both run a hole
				881	* punch over the range first. Because that can zero data, and the range
				882	* of invalidation for the shift operations is much larger, we still do
				883	* the required flush for collapse in xfs_prepare_shift().
				884	*
				885	* Insert has the same range requirements as collapse, and we extend the
				886	* file first which can zero data. Hence insert has the same
				887	* flush/invalidate requirements as collapse and so they are both
				888	* handled at the right time by xfs_prepare_shift().
				889	*/
				890	if (mode & (FALLOC_FL_PUNCH_HOLE \| FALLOC_FL_ZERO_RANGE \|
				891	FALLOC_FL_COLLAPSE_RANGE)) {
				892	error = xfs_flush_unmap_range(ip, offset, len);
				893	if (error)
				894	goto out_unlock;
				895	}
				896
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	897	if (mode & FALLOC_FL_PUNCH_HOLE) {
				898	error = xfs_free_file_space(ip, offset, len);
				899	if (error)
				900	goto out_unlock;
Namjae Jeon	e1d8fb8	2014-02-24 10:58:19 +1100	[diff] [blame]	901	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
Darrick J. Wong	25219db	2020-10-09 16:42:59 -0700	[diff] [blame]	902	if (!xfs_is_falloc_aligned(ip, offset, len)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	903	error = -EINVAL;
Namjae Jeon	e1d8fb8	2014-02-24 10:58:19 +1100	[diff] [blame]	904	goto out_unlock;
				905	}
				906
Lukas Czerner	23fffa9	2014-04-12 09:56:41 -0400	[diff] [blame]	907	/*
				908	* There is no need to overlap collapse range with EOF,
				909	* in which case it is effectively a truncate operation
				910	*/
				911	if (offset + len >= i_size_read(inode)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	912	error = -EINVAL;
Lukas Czerner	23fffa9	2014-04-12 09:56:41 -0400	[diff] [blame]	913	goto out_unlock;
				914	}
				915
Namjae Jeon	e1d8fb8	2014-02-24 10:58:19 +1100	[diff] [blame]	916	new_size = i_size_read(inode) - len;
				917
				918	error = xfs_collapse_file_space(ip, offset, len);
				919	if (error)
				920	goto out_unlock;
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	921	} else if (mode & FALLOC_FL_INSERT_RANGE) {
Darrick J. Wong	7d83fb1	2018-04-16 23:07:45 -0700	[diff] [blame]	922	loff_t isize = i_size_read(inode);
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	923
Darrick J. Wong	25219db	2020-10-09 16:42:59 -0700	[diff] [blame]	924	if (!xfs_is_falloc_aligned(ip, offset, len)) {
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	925	error = -EINVAL;
				926	goto out_unlock;
				927	}
				928
Darrick J. Wong	7d83fb1	2018-04-16 23:07:45 -0700	[diff] [blame]	929	/*
				930	* New inode size must not exceed ->s_maxbytes, accounting for
				931	* possible signed overflow.
				932	*/
				933	if (inode->i_sb->s_maxbytes - isize < len) {
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	934	error = -EFBIG;
				935	goto out_unlock;
				936	}
Darrick J. Wong	7d83fb1	2018-04-16 23:07:45 -0700	[diff] [blame]	937	new_size = isize + len;
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	938
				939	/* Offset should be less than i_size */
Darrick J. Wong	7d83fb1	2018-04-16 23:07:45 -0700	[diff] [blame]	940	if (offset >= isize) {
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	941	error = -EINVAL;
				942	goto out_unlock;
				943	}
Thomas Meyer	749f24f	2017-10-09 11:38:54 -0700	[diff] [blame]	944	do_file_insert = true;
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	945	} else {
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	946	flags \|= XFS_PREALLOC_SET;
				947
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	948	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
				949	offset + len > i_size_read(inode)) {
				950	new_size = offset + len;
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	951	error = inode_newsize_ok(inode, new_size);
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	952	if (error)
				953	goto out_unlock;
				954	}
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	955
Christoph Hellwig	66ae56a	2019-02-18 09:38:49 -0800	[diff] [blame]	956	if (mode & FALLOC_FL_ZERO_RANGE) {
Christoph Hellwig	360c09c	2019-10-24 22:26:27 -0700	[diff] [blame]	957	/*
				958	* Punch a hole and prealloc the range. We use a hole
				959	* punch rather than unwritten extent conversion for two
				960	* reasons:
				961	*
				962	* 1.) Hole punch handles partial block zeroing for us.
				963	* 2.) If prealloc returns ENOSPC, the file range is
				964	* still zero-valued by virtue of the hole punch.
				965	*/
				966	unsigned int blksize = i_blocksize(inode);
				967
				968	trace_xfs_zero_file_space(ip);
				969
				970	error = xfs_free_file_space(ip, offset, len);
				971	if (error)
				972	goto out_unlock;
				973
				974	len = round_up(offset + len, blksize) -
				975	round_down(offset, blksize);
				976	offset = round_down(offset, blksize);
Christoph Hellwig	66ae56a	2019-02-18 09:38:49 -0800	[diff] [blame]	977	} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
				978	error = xfs_reflink_unshare(ip, offset, len);
				979	if (error)
				980	goto out_unlock;
Christoph Hellwig	66ae56a	2019-02-18 09:38:49 -0800	[diff] [blame]	981	} else {
				982	/*
				983	* If always_cow mode we can't use preallocations and
				984	* thus should not create them.
				985	*/
				986	if (xfs_is_always_cow_inode(ip)) {
				987	error = -EOPNOTSUPP;
				988	goto out_unlock;
				989	}
Christoph Hellwig	360c09c	2019-10-24 22:26:27 -0700	[diff] [blame]	990	}
Christoph Hellwig	66ae56a	2019-02-18 09:38:49 -0800	[diff] [blame]	991
Christoph Hellwig	360c09c	2019-10-24 22:26:27 -0700	[diff] [blame]	992	if (!xfs_is_always_cow_inode(ip)) {
Lukas Czerner	376ba31	2014-03-13 19:07:58 +1100	[diff] [blame]	993	error = xfs_alloc_file_space(ip, offset, len,
				994	XFS_BMAPI_PREALLOC);
Christoph Hellwig	360c09c	2019-10-24 22:26:27 -0700	[diff] [blame]	995	if (error)
				996	goto out_unlock;
Darrick J. Wong	98cc2db	2016-10-03 09:11:43 -0700	[diff] [blame]	997	}
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	998	}
				999
Christoph Hellwig	83aee9e	2013-10-12 00:55:07 -0700	[diff] [blame]	1000	if (file->f_flags & O_DSYNC)
Christoph Hellwig	8add71c	2015-02-02 09:53:56 +1100	[diff] [blame]	1001	flags \|= XFS_PREALLOC_SYNC;
				1002
				1003	error = xfs_update_prealloc_flags(ip, flags);
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1004	if (error)
				1005	goto out_unlock;
				1006
				1007	/* Change file size if needed */
				1008	if (new_size) {
				1009	struct iattr iattr;
				1010
				1011	iattr.ia_valid = ATTR_SIZE;
				1012	iattr.ia_size = new_size;
Jan Kara	69bca80	2016-05-26 14:46:43 +0200	[diff] [blame]	1013	error = xfs_vn_setattr_size(file_dentry(file), &iattr);
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	1014	if (error)
				1015	goto out_unlock;
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1016	}
				1017
Namjae Jeon	a904b1c	2015-03-25 15:08:56 +1100	[diff] [blame]	1018	/*
				1019	* Perform hole insertion now that the file size has been
				1020	* updated so that if we crash during the operation we don't
				1021	* leave shifted extents past EOF and hence losing access to
				1022	* the data that is contained within them.
				1023	*/
				1024	if (do_file_insert)
				1025	error = xfs_insert_file_space(ip, offset, len);
				1026
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1027	out_unlock:
Christoph Hellwig	781355c	2015-02-16 11:59:50 +1100	[diff] [blame]	1028	xfs_iunlock(ip, iolock);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1029	return error;
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1030	}
				1031
Jan Kara	40144e4	2019-08-29 09:04:12 -0700	[diff] [blame]	1032	STATIC int
				1033	xfs_file_fadvise(
				1034	struct file *file,
				1035	loff_t start,
				1036	loff_t end,
				1037	int advice)
				1038	{
				1039	struct xfs_inode *ip = XFS_I(file_inode(file));
				1040	int ret;
				1041	int lockflags = 0;
				1042
				1043	/*
				1044	* Operations creating pages in page cache need protection from hole
				1045	* punching and similar ops
				1046	*/
				1047	if (advice == POSIX_FADV_WILLNEED) {
				1048	lockflags = XFS_IOLOCK_SHARED;
				1049	xfs_ilock(ip, lockflags);
				1050	}
				1051	ret = generic_fadvise(file, start, end, advice);
				1052	if (lockflags)
				1053	xfs_iunlock(ip, lockflags);
				1054	return ret;
				1055	}
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1056
Darrick J. Wong	5ffce3c	2020-09-04 10:20:16 -0700	[diff] [blame]	1057	/* Does this file, inode, or mount want synchronous writes? */
				1058	static inline bool xfs_file_sync_writes(struct file *filp)
				1059	{
				1060	struct xfs_inode *ip = XFS_I(file_inode(filp));
				1061
				1062	if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
				1063	return true;
				1064	if (filp->f_flags & (__O_SYNC \| O_DSYNC))
				1065	return true;
				1066	if (IS_SYNC(file_inode(filp)))
				1067	return true;
				1068
				1069	return false;
				1070	}
				1071
Eric Biggers	da034bc	2018-11-14 21:48:18 -0800	[diff] [blame]	1072	STATIC loff_t
Darrick J. Wong	2e5dfc9	2018-10-30 10:41:21 +1100	[diff] [blame]	1073	xfs_file_remap_range(
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1074	struct file *file_in,
				1075	loff_t pos_in,
				1076	struct file *file_out,
				1077	loff_t pos_out,
				1078	loff_t len,
				1079	unsigned int remap_flags)
Darrick J. Wong	9fe2604	2016-10-03 09:11:40 -0700	[diff] [blame]	1080	{
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1081	struct inode *inode_in = file_inode(file_in);
				1082	struct xfs_inode *src = XFS_I(inode_in);
				1083	struct inode *inode_out = file_inode(file_out);
				1084	struct xfs_inode *dest = XFS_I(inode_out);
				1085	struct xfs_mount *mp = src->i_mount;
				1086	loff_t remapped = 0;
				1087	xfs_extlen_t cowextsize;
				1088	int ret;
				1089
Darrick J. Wong	2e5dfc9	2018-10-30 10:41:21 +1100	[diff] [blame]	1090	if (remap_flags & ~(REMAP_FILE_DEDUP \| REMAP_FILE_ADVISORY))
				1091	return -EINVAL;
Darrick J. Wong	cc71466	2016-10-03 09:11:41 -0700	[diff] [blame]	1092
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1093	if (!xfs_sb_version_hasreflink(&mp->m_sb))
				1094	return -EOPNOTSUPP;
				1095
				1096	if (XFS_FORCED_SHUTDOWN(mp))
				1097	return -EIO;
				1098
				1099	/* Prepare and then clone file data. */
				1100	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
				1101	&len, remap_flags);
Darrick J. Wong	451d34e	2020-06-29 14:47:20 -0700	[diff] [blame]	1102	if (ret \|\| len == 0)
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1103	return ret;
				1104
				1105	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
				1106
				1107	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
				1108	&remapped);
				1109	if (ret)
				1110	goto out_unlock;
				1111
				1112	/*
				1113	* Carry the cowextsize hint from src to dest if we're sharing the
				1114	* entire source file to the entire destination file, the source file
				1115	* has a cowextsize hint, and the destination file does not.
				1116	*/
				1117	cowextsize = 0;
				1118	if (pos_in == 0 && len == i_size_read(inode_in) &&
				1119	(src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
				1120	pos_out == 0 && len >= i_size_read(inode_out) &&
				1121	!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
				1122	cowextsize = src->i_d.di_cowextsize;
				1123
				1124	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
				1125	remap_flags);
Christoph Hellwig	5833112	2020-04-03 11:45:37 -0700	[diff] [blame]	1126	if (ret)
				1127	goto out_unlock;
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1128
Darrick J. Wong	5ffce3c	2020-09-04 10:20:16 -0700	[diff] [blame]	1129	if (xfs_file_sync_writes(file_in) \|\| xfs_file_sync_writes(file_out))
Christoph Hellwig	5833112	2020-04-03 11:45:37 -0700	[diff] [blame]	1130	xfs_log_force_inode(dest);
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1131	out_unlock:
Darrick J. Wong	e2aaee9	2020-06-29 14:47:20 -0700	[diff] [blame]	1132	xfs_iunlock2_io_mmap(src, dest);
Darrick J. Wong	3fc9f5e	2018-10-30 10:47:26 +1100	[diff] [blame]	1133	if (ret)
				1134	trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
				1135	return remapped > 0 ? remapped : ret;
Darrick J. Wong	9fe2604	2016-10-03 09:11:40 -0700	[diff] [blame]	1136	}
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1137
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1138	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1139	xfs_file_open(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1140	struct inode *inode,
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1141	struct file *file)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1142	{
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1143	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1144	return -EFBIG;
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1145	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
				1146	return -EIO;
Jens Axboe	f89fb73	2020-05-22 09:27:33 -0600	[diff] [blame]	1147	file->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1148	return 0;
				1149	}
				1150
				1151	STATIC int
				1152	xfs_dir_open(
				1153	struct inode *inode,
				1154	struct file *file)
				1155	{
				1156	struct xfs_inode *ip = XFS_I(inode);
				1157	int mode;
				1158	int error;
				1159
				1160	error = xfs_file_open(inode, file);
				1161	if (error)
				1162	return error;
				1163
				1164	/*
				1165	* If there are any blocks, read-ahead block 0 as we're almost
				1166	* certain to have the next operation be a read there.
				1167	*/
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	1168	mode = xfs_ilock_data_map_shared(ip);
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1169	if (ip->i_df.if_nextents > 0)
Christoph Hellwig	06566fd	2019-11-20 09:46:02 -0800	[diff] [blame]	1170	error = xfs_dir3_data_readahead(ip, 0, 0);
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1171	xfs_iunlock(ip, mode);
Darrick J. Wong	7a652bb	2017-02-02 15:13:58 -0800	[diff] [blame]	1172	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1173	}
				1174
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1175	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1176	xfs_file_release(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1177	struct inode *inode,
				1178	struct file *filp)
				1179	{
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1180	return xfs_release(XFS_I(inode));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1181	}
				1182
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1183	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1184	xfs_file_readdir(
Al Viro	b822755	2013-05-22 17:07:56 -0400	[diff] [blame]	1185	struct file *file,
				1186	struct dir_context *ctx)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1187	{
Al Viro	b822755	2013-05-22 17:07:56 -0400	[diff] [blame]	1188	struct inode *inode = file_inode(file);
Christoph Hellwig	739bfb2	2007-08-29 10:58:01 +1000	[diff] [blame]	1189	xfs_inode_t *ip = XFS_I(inode);
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1190	size_t bufsize;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1191
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1192	/*
				1193	* The Linux API doesn't pass down the total size of the buffer
				1194	* we read into down to the filesystem. With the filldir concept
				1195	* it's not needed for correct information, but the XFS dir2 leaf
				1196	* code wants an estimate of the buffer size to calculate it's
				1197	* readahead window and size the buffers used for mapping to
				1198	* physical blocks.
				1199	*
				1200	* Try to give it an estimate that's good enough, maybe at some
				1201	* point we can change the ->readdir prototype to include the
Eric Sandeen	a9cc799	2010-02-03 17:50:13 +0000	[diff] [blame]	1202	* buffer size. For now we use the current glibc buffer size.
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1203	*/
Darrick J. Wong	a5c46e5	2017-10-17 21:37:44 -0700	[diff] [blame]	1204	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1205
Darrick J. Wong	acb9553	2017-06-16 11:00:14 -0700	[diff] [blame]	1206	return xfs_readdir(NULL, ip, ctx, bufsize);
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1207	}
				1208
				1209	STATIC loff_t
				1210	xfs_file_llseek(
				1211	struct file *file,
				1212	loff_t offset,
Eric Sandeen	59f9c00	2014-09-09 11:57:10 +1000	[diff] [blame]	1213	int whence)
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1214	{
Christoph Hellwig	9b2970a	2017-06-29 11:43:21 -0700	[diff] [blame]	1215	struct inode *inode = file->f_mapping->host;
				1216
				1217	if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
				1218	return -EIO;
				1219
Eric Sandeen	59f9c00	2014-09-09 11:57:10 +1000	[diff] [blame]	1220	switch (whence) {
Christoph Hellwig	9b2970a	2017-06-29 11:43:21 -0700	[diff] [blame]	1221	default:
Eric Sandeen	59f9c00	2014-09-09 11:57:10 +1000	[diff] [blame]	1222	return generic_file_llseek(file, offset, whence);
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1223	case SEEK_HOLE:
Christoph Hellwig	60271ab7	2019-02-18 09:38:46 -0800	[diff] [blame]	1224	offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
Christoph Hellwig	9b2970a	2017-06-29 11:43:21 -0700	[diff] [blame]	1225	break;
Eric Sandeen	49c6959	2014-09-09 11:56:48 +1000	[diff] [blame]	1226	case SEEK_DATA:
Christoph Hellwig	60271ab7	2019-02-18 09:38:46 -0800	[diff] [blame]	1227	offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
Christoph Hellwig	9b2970a	2017-06-29 11:43:21 -0700	[diff] [blame]	1228	break;
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1229	}
Christoph Hellwig	9b2970a	2017-06-29 11:43:21 -0700	[diff] [blame]	1230
				1231	if (offset < 0)
				1232	return offset;
				1233	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1234	}
				1235
Dave Chinner	de0e8c2	2015-02-23 21:44:19 +1100	[diff] [blame]	1236	/*
				1237	* Locking for serialisation of IO during page faults. This results in a lock
				1238	* ordering of:
				1239	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	1240	* mmap_lock (MM)
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1241	* sb_start_pagefault(vfs, freeze)
Dave Chinner	13ad4fe	2015-11-03 12:37:02 +1100	[diff] [blame]	1242	* i_mmaplock (XFS - truncate serialisation)
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1243	* page_lock (MM)
				1244	* i_lock (XFS - extent map serialisation)
Dave Chinner	de0e8c2	2015-02-23 21:44:19 +1100	[diff] [blame]	1245	*/
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1246	static vm_fault_t
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1247	__xfs_filemap_fault(
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	1248	struct vm_fault *vmf,
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1249	enum page_entry_size pe_size,
				1250	bool write_fault)
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1251	{
Dave Jiang	f420039	2017-02-22 15:40:06 -0800	[diff] [blame]	1252	struct inode *inode = file_inode(vmf->vma->vm_file);
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1253	struct xfs_inode *ip = XFS_I(inode);
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1254	vm_fault_t ret;
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1255
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1256	trace_xfs_filemap_fault(ip, pe_size, write_fault);
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1257
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1258	if (write_fault) {
Dave Chinner	13ad4fe	2015-11-03 12:37:02 +1100	[diff] [blame]	1259	sb_start_pagefault(inode->i_sb);
Dave Jiang	f420039	2017-02-22 15:40:06 -0800	[diff] [blame]	1260	file_update_time(vmf->vma->vm_file);
Dave Chinner	13ad4fe	2015-11-03 12:37:02 +1100	[diff] [blame]	1261	}
				1262
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1263	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1264	if (IS_DAX(inode)) {
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1265	pfn_t pfn;
				1266
Christoph Hellwig	690c2a3	2019-10-19 09:09:45 -0700	[diff] [blame]	1267	ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
				1268	(write_fault && !vmf->cow_page) ?
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	1269	&xfs_direct_write_iomap_ops :
				1270	&xfs_read_iomap_ops);
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1271	if (ret & VM_FAULT_NEEDDSYNC)
				1272	ret = dax_finish_sync_fault(vmf, pe_size, pfn);
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1273	} else {
				1274	if (write_fault)
Christoph Hellwig	f150b42	2019-10-19 09:09:46 -0700	[diff] [blame]	1275	ret = iomap_page_mkwrite(vmf,
				1276	&xfs_buffered_write_iomap_ops);
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1277	else
				1278	ret = filemap_fault(vmf);
				1279	}
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1280	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
Dave Chinner	13ad4fe	2015-11-03 12:37:02 +1100	[diff] [blame]	1281
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1282	if (write_fault)
Dave Chinner	13ad4fe	2015-11-03 12:37:02 +1100	[diff] [blame]	1283	sb_end_pagefault(inode->i_sb);
Matthew Wilcox	acd76e7	2015-09-08 14:59:06 -0700	[diff] [blame]	1284	return ret;
				1285	}
				1286
Mikulas Patocka	b17164e	2020-09-05 08:13:02 -0400	[diff] [blame]	1287	static inline bool
				1288	xfs_is_write_fault(
				1289	struct vm_fault *vmf)
				1290	{
				1291	return (vmf->flags & FAULT_FLAG_WRITE) &&
				1292	(vmf->vma->vm_flags & VM_SHARED);
				1293	}
				1294
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1295	static vm_fault_t
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1296	xfs_filemap_fault(
				1297	struct vm_fault *vmf)
				1298	{
				1299	/* DAX can shortcut the normal fault path on write faults! */
				1300	return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
				1301	IS_DAX(file_inode(vmf->vma->vm_file)) &&
Mikulas Patocka	b17164e	2020-09-05 08:13:02 -0400	[diff] [blame]	1302	xfs_is_write_fault(vmf));
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1303	}
				1304
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1305	static vm_fault_t
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1306	xfs_filemap_huge_fault(
				1307	struct vm_fault *vmf,
				1308	enum page_entry_size pe_size)
				1309	{
				1310	if (!IS_DAX(file_inode(vmf->vma->vm_file)))
				1311	return VM_FAULT_FALLBACK;
				1312
				1313	/* DAX can shortcut the normal fault path on write faults! */
				1314	return __xfs_filemap_fault(vmf, pe_size,
Mikulas Patocka	b17164e	2020-09-05 08:13:02 -0400	[diff] [blame]	1315	xfs_is_write_fault(vmf));
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1316	}
				1317
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1318	static vm_fault_t
Christoph Hellwig	d522d56	2017-08-29 10:08:41 -0700	[diff] [blame]	1319	xfs_filemap_page_mkwrite(
				1320	struct vm_fault *vmf)
				1321	{
				1322	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
				1323	}
				1324
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1325	/*
Jan Kara	7b565c9	2017-11-01 16:36:46 +0100	[diff] [blame]	1326	* pfn_mkwrite was originally intended to ensure we capture time stamp updates
				1327	* on write faults. In reality, it needs to serialise against truncate and
				1328	* prepare memory for writing so handle is as standard write fault.
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1329	*/
Souptick Joarder	05edd88	2018-05-29 10:39:03 -0700	[diff] [blame]	1330	static vm_fault_t
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1331	xfs_filemap_pfn_mkwrite(
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1332	struct vm_fault *vmf)
				1333	{
				1334
Jan Kara	7b565c9	2017-11-01 16:36:46 +0100	[diff] [blame]	1335	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1336	}
				1337
Dave Chinner	cd647d5	2020-06-30 11:28:53 -0700	[diff] [blame]	1338	static void
				1339	xfs_filemap_map_pages(
				1340	struct vm_fault *vmf,
				1341	pgoff_t start_pgoff,
				1342	pgoff_t end_pgoff)
				1343	{
				1344	struct inode *inode = file_inode(vmf->vma->vm_file);
				1345
				1346	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
				1347	filemap_map_pages(vmf, start_pgoff, end_pgoff);
				1348	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
				1349	}
				1350
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1351	static const struct vm_operations_struct xfs_file_vm_ops = {
				1352	.fault = xfs_filemap_fault,
Dave Jiang	a2d5816	2017-02-24 14:56:59 -0800	[diff] [blame]	1353	.huge_fault = xfs_filemap_huge_fault,
Dave Chinner	cd647d5	2020-06-30 11:28:53 -0700	[diff] [blame]	1354	.map_pages = xfs_filemap_map_pages,
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1355	.page_mkwrite = xfs_filemap_page_mkwrite,
Dave Chinner	3af4928	2015-11-03 12:37:02 +1100	[diff] [blame]	1356	.pfn_mkwrite = xfs_filemap_pfn_mkwrite,
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1357	};
				1358
				1359	STATIC int
				1360	xfs_file_mmap(
Christoph Hellwig	30fa529	2019-10-24 22:25:38 -0700	[diff] [blame]	1361	struct file *file,
				1362	struct vm_area_struct *vma)
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1363	{
Christoph Hellwig	30fa529	2019-10-24 22:25:38 -0700	[diff] [blame]	1364	struct inode *inode = file_inode(file);
				1365	struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode));
Pankaj Gupta	b21fec4	2019-07-05 19:33:28 +0530	[diff] [blame]	1366
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1367	/*
Pankaj Gupta	b21fec4	2019-07-05 19:33:28 +0530	[diff] [blame]	1368	* We don't support synchronous mappings for non-DAX files and
				1369	* for DAX files if underneath dax_device is not synchronous.
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1370	*/
Christoph Hellwig	30fa529	2019-10-24 22:25:38 -0700	[diff] [blame]	1371	if (!daxdev_mapping_supported(vma, target->bt_daxdev))
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1372	return -EOPNOTSUPP;
				1373
Christoph Hellwig	30fa529	2019-10-24 22:25:38 -0700	[diff] [blame]	1374	file_accessed(file);
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1375	vma->vm_ops = &xfs_file_vm_ops;
Christoph Hellwig	30fa529	2019-10-24 22:25:38 -0700	[diff] [blame]	1376	if (IS_DAX(inode))
Dave Jiang	e1fb4a0	2018-08-17 15:43:40 -0700	[diff] [blame]	1377	vma->vm_flags \|= VM_HUGEPAGE;
Dave Chinner	6b698ed	2015-06-04 09:18:53 +1000	[diff] [blame]	1378	return 0;
Dave Chinner	075a924	2015-02-23 21:44:54 +1100	[diff] [blame]	1379	}
				1380
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	1381	const struct file_operations xfs_file_operations = {
Jeff Liu	3fe3e6b	2012-05-10 21:29:17 +0800	[diff] [blame]	1382	.llseek = xfs_file_llseek,
Al Viro	b4f5d2c	2014-04-02 14:37:59 -0400	[diff] [blame]	1383	.read_iter = xfs_file_read_iter,
Al Viro	bf97f3bc	2014-04-03 14:20:23 -0400	[diff] [blame]	1384	.write_iter = xfs_file_write_iter,
Al Viro	82c156f	2016-09-22 23:35:42 -0400	[diff] [blame]	1385	.splice_read = generic_file_splice_read,
Al Viro	8d02076	2014-04-05 04:27:08 -0400	[diff] [blame]	1386	.splice_write = iter_file_splice_write,
Christoph Hellwig	81214ba	2018-12-04 11:12:08 -0700	[diff] [blame]	1387	.iopoll = iomap_dio_iopoll,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1388	.unlocked_ioctl = xfs_file_ioctl,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1389	#ifdef CONFIG_COMPAT
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1390	.compat_ioctl = xfs_file_compat_ioctl,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1391	#endif
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1392	.mmap = xfs_file_mmap,
Christoph Hellwig	a39e596	2017-11-01 16:36:47 +0100	[diff] [blame]	1393	.mmap_supported_flags = MAP_SYNC,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1394	.open = xfs_file_open,
				1395	.release = xfs_file_release,
				1396	.fsync = xfs_file_fsync,
Toshi Kani	dbe6ec8	2016-10-07 16:59:59 -0700	[diff] [blame]	1397	.get_unmapped_area = thp_get_unmapped_area,
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1398	.fallocate = xfs_file_fallocate,
Jan Kara	40144e4	2019-08-29 09:04:12 -0700	[diff] [blame]	1399	.fadvise = xfs_file_fadvise,
Darrick J. Wong	2e5dfc9	2018-10-30 10:41:21 +1100	[diff] [blame]	1400	.remap_file_range = xfs_file_remap_range,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	};
				1402
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	1403	const struct file_operations xfs_dir_file_operations = {
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1404	.open = xfs_dir_open,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1405	.read = generic_read_dir,
Al Viro	3b0a3c1	2016-04-20 23:42:46 -0400	[diff] [blame]	1406	.iterate_shared = xfs_file_readdir,
Al Viro	59af158	2008-08-24 07:24:41 -0400	[diff] [blame]	1407	.llseek = generic_file_llseek,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1408	.unlocked_ioctl = xfs_file_ioctl,
Nathan Scott	d387039	2005-05-06 06:44:46 -0700	[diff] [blame]	1409	#ifdef CONFIG_COMPAT
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1410	.compat_ioctl = xfs_file_compat_ioctl,
Nathan Scott	d387039	2005-05-06 06:44:46 -0700	[diff] [blame]	1411	#endif
Christoph Hellwig	1da2f2d	2011-10-02 14:25:16 +0000	[diff] [blame]	1412	.fsync = xfs_dir_fsync,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1413	};