Blame - fs/ext4/file.c - SHIFTPHONES/mainline/linux

blob: 4c5f410523514a8370ab63a120f81b26a36ba80b [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	2	/*
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	3	* linux/fs/ext4/file.c
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	4	*
				5	* Copyright (C) 1992, 1993, 1994, 1995
				6	* Remy Card (card@masi.ibp.fr)
				7	* Laboratoire MASI - Institut Blaise Pascal
				8	* Universite Pierre et Marie Curie (Paris VI)
				9	*
				10	* from
				11	*
				12	* linux/fs/minix/file.c
				13	*
				14	* Copyright (C) 1991, 1992 Linus Torvalds
				15	*
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	16	* ext4 fs regular file handling primitives
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	17	*
				18	* 64-bit file support on 64-bit platforms by Jakub Jelinek
				19	* (jj@sunsite.ms.mff.cuni.cz)
				20	*/
				21
				22	#include <linux/time.h>
				23	#include <linux/fs.h>
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	24	#include <linux/iomap.h>
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	25	#include <linux/mount.h>
				26	#include <linux/path.h>
Matthew Wilcox	c94c2ac	2015-09-08 14:58:40 -0700	[diff] [blame]	27	#include <linux/dax.h>
Christoph Hellwig	871a293	2010-03-03 09:05:07 -0500	[diff] [blame]	28	#include <linux/quotaops.h>
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	29	#include <linux/pagevec.h>
Christoph Hellwig	e2e40f2	2015-02-22 08:58:50 -0800	[diff] [blame]	30	#include <linux/uio.h>
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	31	#include <linux/mman.h>
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	32	#include <linux/backing-dev.h>
Christoph Hellwig	3dcf545	2008-04-29 18:13:32 -0400	[diff] [blame]	33	#include "ext4.h"
				34	#include "ext4_jbd2.h"
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	35	#include "xattr.h"
				36	#include "acl.h"
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	37	#include "truncate.h"
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	38
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	39	static bool ext4_dio_supported(struct inode *inode)
				40	{
				41	if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
				42	return false;
				43	if (fsverity_active(inode))
				44	return false;
				45	if (ext4_should_journal_data(inode))
				46	return false;
				47	if (ext4_has_inline_data(inode))
				48	return false;
				49	return true;
				50	}
				51
				52	static ssize_t ext4_dio_read_iter(struct kiocb iocb, struct iov_iter to)
				53	{
				54	ssize_t ret;
				55	struct inode *inode = file_inode(iocb->ki_filp);
				56
				57	if (iocb->ki_flags & IOCB_NOWAIT) {
				58	if (!inode_trylock_shared(inode))
				59	return -EAGAIN;
				60	} else {
				61	inode_lock_shared(inode);
				62	}
				63
				64	if (!ext4_dio_supported(inode)) {
				65	inode_unlock_shared(inode);
				66	/*
				67	* Fallback to buffered I/O if the operation being performed on
				68	* the inode is not supported by direct I/O. The IOCB_DIRECT
				69	* flag needs to be cleared here in order to ensure that the
				70	* direct I/O path within generic_file_read_iter() is not
				71	* taken.
				72	*/
				73	iocb->ki_flags &= ~IOCB_DIRECT;
				74	return generic_file_read_iter(iocb, to);
				75	}
				76
Andreas Gruenbacher	4fdccaa	2021-07-24 12:26:41 +0200	[diff] [blame]	77	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0);
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	78	inode_unlock_shared(inode);
				79
				80	file_accessed(iocb->ki_filp);
				81	return ret;
				82	}
				83
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	84	#ifdef CONFIG_FS_DAX
				85	static ssize_t ext4_dax_read_iter(struct kiocb iocb, struct iov_iter to)
				86	{
				87	struct inode *inode = file_inode(iocb->ki_filp);
				88	ssize_t ret;
				89
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	90	if (iocb->ki_flags & IOCB_NOWAIT) {
				91	if (!inode_trylock_shared(inode))
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	92	return -EAGAIN;
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	93	} else {
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	94	inode_lock_shared(inode);
				95	}
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	96	/*
				97	* Recheck under inode lock - at this point we are sure it cannot
				98	* change anymore
				99	*/
				100	if (!IS_DAX(inode)) {
				101	inode_unlock_shared(inode);
				102	/* Fallback to buffered IO in case we cannot support DAX */
				103	return generic_file_read_iter(iocb, to);
				104	}
				105	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
				106	inode_unlock_shared(inode);
				107
				108	file_accessed(iocb->ki_filp);
				109	return ret;
				110	}
				111	#endif
				112
				113	static ssize_t ext4_file_read_iter(struct kiocb iocb, struct iov_iter to)
				114	{
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	115	struct inode *inode = file_inode(iocb->ki_filp);
				116
				117	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	118	return -EIO;
				119
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	120	if (!iov_iter_count(to))
				121	return 0; /* skip atime */
				122
				123	#ifdef CONFIG_FS_DAX
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	124	if (IS_DAX(inode))
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	125	return ext4_dax_read_iter(iocb, to);
				126	#endif
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	127	if (iocb->ki_flags & IOCB_DIRECT)
				128	return ext4_dio_read_iter(iocb, to);
				129
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	130	return generic_file_read_iter(iocb, to);
				131	}
				132
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	133	/*
				134	* Called when an inode is released. Note that this is different
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	135	* from ext4_file_open: open gets called at every open, but release
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	136	* gets called only when /all/ the files are closed.
				137	*/
Theodore Ts'o	af5bc92	2008-09-08 22:25:24 -0400	[diff] [blame]	138	static int ext4_release_file(struct inode inode, struct file filp)
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	139	{
Theodore Ts'o	19f5fb7	2010-01-24 14:34:07 -0500	[diff] [blame]	140	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
Theodore Ts'o	7d8f9f7	2009-02-24 08:21:14 -0500	[diff] [blame]	141	ext4_alloc_da_blocks(inode);
Theodore Ts'o	19f5fb7	2010-01-24 14:34:07 -0500	[diff] [blame]	142	ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
Theodore Ts'o	7d8f9f7	2009-02-24 08:21:14 -0500	[diff] [blame]	143	}
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	144	/* if we are the last writer on the inode, drop the block reservation */
				145	if ((filp->f_mode & FMODE_WRITE) &&
Aneesh Kumar K.V	d601430	2009-03-27 22:36:43 -0400	[diff] [blame]	146	(atomic_read(&inode->i_writecount) == 1) &&
Dio Putra	e030a28	2020-06-14 11:45:44 +0700	[diff] [blame]	147	!EXT4_I(inode)->i_reserved_data_blocks) {
Aneesh Kumar K.V	0e855ac	2008-01-28 23:58:26 -0500	[diff] [blame]	148	down_write(&EXT4_I(inode)->i_data_sem);
brookxu	27bc446	2020-08-17 15:36:15 +0800	[diff] [blame]	149	ext4_discard_preallocations(inode, 0);
Aneesh Kumar K.V	0e855ac	2008-01-28 23:58:26 -0500	[diff] [blame]	150	up_write(&EXT4_I(inode)->i_data_sem);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	151	}
				152	if (is_dx(inode) && filp->private_data)
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	153	ext4_htree_free_dir_info(filp->private_data);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	154
				155	return 0;
				156	}
				157
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	158	/*
				159	* This tests whether the IO in question is block-aligned or not.
				160	* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
				161	* are converted to written only after the IO is complete. Until they are
				162	* mapped, these blocks appear as holes, so dio_zero_block() will assume that
				163	* it needs to zero out portions of the start and/or end block. If 2 AIO
				164	* threads are at work on the same unwritten block, they must be synchronized
				165	* or one thread will zero the other's data, causing corruption.
				166	*/
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	167	static bool
				168	ext4_unaligned_io(struct inode inode, struct iov_iter from, loff_t pos)
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	169	{
				170	struct super_block *sb = inode->i_sb;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	171	unsigned long blockmask = sb->s_blocksize - 1;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	172
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	173	if ((pos \| iov_iter_alignment(from)) & blockmask)
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	174	return true;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	175
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	176	return false;
				177	}
				178
				179	static bool
				180	ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
				181	{
				182	if (offset + len > i_size_read(inode) \|\|
				183	offset + len > EXT4_I(inode)->i_disksize)
				184	return true;
				185	return false;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	186	}
				187
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	188	/* Is IO overwriting allocated and initialized blocks? */
				189	static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
				190	{
				191	struct ext4_map_blocks map;
				192	unsigned int blkbits = inode->i_blkbits;
				193	int err, blklen;
				194
				195	if (pos + len > i_size_read(inode))
				196	return false;
				197
				198	map.m_lblk = pos >> blkbits;
				199	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
				200	blklen = map.m_len;
				201
				202	err = ext4_map_blocks(NULL, inode, &map, 0);
				203	/*
				204	* 'err==len' means that all of the blocks have been preallocated,
				205	* regardless of whether they have been initialized or not. To exclude
				206	* unwritten extents, we need to check m_flags.
				207	*/
				208	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
				209	}
				210
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	211	static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
				212	struct iov_iter *from)
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	213	{
				214	struct inode *inode = file_inode(iocb->ki_filp);
				215	ssize_t ret;
				216
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	217	if (unlikely(IS_IMMUTABLE(inode)))
				218	return -EPERM;
				219
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	220	ret = generic_write_checks(iocb, from);
				221	if (ret <= 0)
				222	return ret;
Theodore Ts'o	02b016c	2019-06-09 22:04:33 -0400	[diff] [blame]	223
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	224	/*
				225	* If we have encountered a bitmap-format file, the size limit
				226	* is smaller than s_maxbytes, which is for extent-mapped files.
				227	*/
				228	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
				229	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				230
				231	if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
				232	return -EFBIG;
				233	iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
				234	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	235
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	236	return iov_iter_count(from);
				237	}
				238
				239	static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
				240	{
				241	ssize_t ret, count;
				242
				243	count = ext4_generic_write_checks(iocb, from);
				244	if (count <= 0)
				245	return count;
				246
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	247	ret = file_modified(iocb->ki_filp);
				248	if (ret)
				249	return ret;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	250	return count;
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	251	}
				252
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	253	static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
				254	struct iov_iter *from)
				255	{
				256	ssize_t ret;
				257	struct inode *inode = file_inode(iocb->ki_filp);
				258
				259	if (iocb->ki_flags & IOCB_NOWAIT)
				260	return -EOPNOTSUPP;
				261
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	262	ext4_fc_start_update(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	263	inode_lock(inode);
				264	ret = ext4_write_checks(iocb, from);
				265	if (ret <= 0)
				266	goto out;
				267
				268	current->backing_dev_info = inode_to_bdi(inode);
				269	ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
				270	current->backing_dev_info = NULL;
				271
				272	out:
				273	inode_unlock(inode);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	274	ext4_fc_stop_update(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	275	if (likely(ret > 0)) {
				276	iocb->ki_pos += ret;
				277	ret = generic_write_sync(iocb, ret);
				278	}
				279
				280	return ret;
				281	}
				282
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	283	static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
				284	ssize_t written, size_t count)
				285	{
				286	handle_t *handle;
				287	bool truncate = false;
				288	u8 blkbits = inode->i_blkbits;
				289	ext4_lblk_t written_blk, end_blk;
Harshad Shirwadkar	4209ae1	2020-04-26 18:34:37 -0700	[diff] [blame]	290	int ret;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	291
				292	/*
				293	* Note that EXT4_I(inode)->i_disksize can get extended up to
				294	* inode->i_size while the I/O was running due to writeback of delalloc
				295	* blocks. But, the code in ext4_iomap_alloc() is careful to use
				296	* zeroed/unwritten extents if this is possible; thus we won't leave
				297	* uninitialized blocks in a file even if we didn't succeed in writing
				298	* as much as we intended.
				299	*/
				300	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
				301	if (offset + count <= EXT4_I(inode)->i_disksize) {
				302	/*
				303	* We need to ensure that the inode is removed from the orphan
				304	* list if it has been added prematurely, due to writeback of
				305	* delalloc blocks.
				306	*/
				307	if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
				308	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				309
				310	if (IS_ERR(handle)) {
				311	ext4_orphan_del(NULL, inode);
				312	return PTR_ERR(handle);
				313	}
				314
				315	ext4_orphan_del(handle, inode);
				316	ext4_journal_stop(handle);
				317	}
				318
				319	return written;
				320	}
				321
				322	if (written < 0)
				323	goto truncate;
				324
				325	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				326	if (IS_ERR(handle)) {
				327	written = PTR_ERR(handle);
				328	goto truncate;
				329	}
				330
Harshad Shirwadkar	4209ae1	2020-04-26 18:34:37 -0700	[diff] [blame]	331	if (ext4_update_inode_size(inode, offset + written)) {
				332	ret = ext4_mark_inode_dirty(handle, inode);
				333	if (unlikely(ret)) {
				334	written = ret;
				335	ext4_journal_stop(handle);
				336	goto truncate;
				337	}
				338	}
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	339
				340	/*
				341	* We may need to truncate allocated but not written blocks beyond EOF.
				342	*/
				343	written_blk = ALIGN(offset + written, 1 << blkbits);
				344	end_blk = ALIGN(offset + count, 1 << blkbits);
				345	if (written_blk < end_blk && ext4_can_truncate(inode))
				346	truncate = true;
				347
				348	/*
				349	* Remove the inode from the orphan list if it has been extended and
				350	* everything went OK.
				351	*/
				352	if (!truncate && inode->i_nlink)
				353	ext4_orphan_del(handle, inode);
				354	ext4_journal_stop(handle);
				355
				356	if (truncate) {
				357	truncate:
				358	ext4_truncate_failed_write(inode);
				359	/*
				360	* If the truncate operation failed early, then the inode may
				361	* still be on the orphan list. In that case, we need to try
				362	* remove the inode from the in-memory linked list.
				363	*/
				364	if (inode->i_nlink)
				365	ext4_orphan_del(NULL, inode);
				366	}
				367
				368	return written;
				369	}
				370
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	371	static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
				372	int error, unsigned int flags)
				373	{
Jan Kara	5899593	2021-04-15 17:54:17 +0200	[diff] [blame]	374	loff_t pos = iocb->ki_pos;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	375	struct inode *inode = file_inode(iocb->ki_filp);
				376
				377	if (error)
				378	return error;
				379
Jan Kara	5899593	2021-04-15 17:54:17 +0200	[diff] [blame]	380	if (size && flags & IOMAP_DIO_UNWRITTEN) {
				381	error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
				382	if (error < 0)
				383	return error;
				384	}
				385	/*
				386	* If we are extending the file, we have to update i_size here before
				387	* page cache gets invalidated in iomap_dio_rw(). Otherwise racing
				388	* buffered reads could zero out too much from page cache pages. Update
				389	* of on-disk size will happen later in ext4_dio_write_iter() where
				390	* we have enough information to also perform orphan list handling etc.
				391	* Note that we perform all extending writes synchronously under
				392	* i_rwsem held exclusively so i_size update is safe here in that case.
				393	* If the write was not extending, we cannot see pos > i_size here
				394	* because operations reducing i_size like truncate wait for all
				395	* outstanding DIO before updating i_size.
				396	*/
				397	pos += size;
				398	if (pos > i_size_read(inode))
				399	i_size_write(inode, pos);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	400
				401	return 0;
				402	}
				403
				404	static const struct iomap_dio_ops ext4_dio_write_ops = {
				405	.end_io = ext4_dio_write_end_io,
				406	};
				407
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	408	/*
				409	* The intention here is to start with shared lock acquired then see if any
				410	* condition requires an exclusive inode lock. If yes, then we restart the
				411	* whole operation by releasing the shared lock and acquiring exclusive lock.
				412	*
				413	* - For unaligned_io we never take shared lock as it may cause data corruption
				414	* when two unaligned IO tries to modify the same block e.g. while zeroing.
				415	*
				416	* - For extending writes case we don't take the shared lock, since it requires
				417	* updating inode i_disksize and/or orphan handling with exclusive lock.
				418	*
Ritesh Harjani	bc6385d	2019-12-12 11:25:57 +0530	[diff] [blame]	419	* - shared locking will only be true mostly with overwrites. Otherwise we will
				420	* switch to exclusive i_rwsem lock.
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	421	*/
				422	static ssize_t ext4_dio_write_checks(struct kiocb iocb, struct iov_iter from,
				423	bool ilock_shared, bool extend)
				424	{
				425	struct file *file = iocb->ki_filp;
				426	struct inode *inode = file_inode(file);
				427	loff_t offset;
				428	size_t count;
				429	ssize_t ret;
				430
				431	restart:
				432	ret = ext4_generic_write_checks(iocb, from);
				433	if (ret <= 0)
				434	goto out;
				435
				436	offset = iocb->ki_pos;
				437	count = ret;
				438	if (ext4_extending_io(inode, offset, count))
				439	*extend = true;
				440	/*
				441	* Determine whether the IO operation will overwrite allocated
Ritesh Harjani	bc6385d	2019-12-12 11:25:57 +0530	[diff] [blame]	442	* and initialized blocks.
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	443	* We need exclusive i_rwsem for changing security info
				444	* in file_modified().
				445	*/
				446	if (ilock_shared && (!IS_NOSEC(inode) \|\| extend \|\|
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	447	!ext4_overwrite_io(inode, offset, count))) {
Jan Kara	0b3171b	2020-07-08 17:35:16 +0200	[diff] [blame]	448	if (iocb->ki_flags & IOCB_NOWAIT) {
				449	ret = -EAGAIN;
				450	goto out;
				451	}
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	452	inode_unlock_shared(inode);
				453	*ilock_shared = false;
				454	inode_lock(inode);
				455	goto restart;
				456	}
				457
				458	ret = file_modified(file);
				459	if (ret < 0)
				460	goto out;
				461
				462	return count;
				463	out:
				464	if (*ilock_shared)
				465	inode_unlock_shared(inode);
				466	else
				467	inode_unlock(inode);
				468	return ret;
				469	}
				470
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	471	static ssize_t ext4_dio_write_iter(struct kiocb iocb, struct iov_iter from)
				472	{
				473	ssize_t ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	474	handle_t *handle;
				475	struct inode *inode = file_inode(iocb->ki_filp);
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	476	loff_t offset = iocb->ki_pos;
				477	size_t count = iov_iter_count(from);
Jan Kara	8cd115b	2019-12-18 18:44:33 +0100	[diff] [blame]	478	const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	479	bool extend = false, unaligned_io = false;
				480	bool ilock_shared = true;
				481
				482	/*
				483	* We initially start with shared inode lock unless it is
				484	* unaligned IO which needs exclusive lock anyways.
				485	*/
				486	if (ext4_unaligned_io(inode, from, offset)) {
				487	unaligned_io = true;
				488	ilock_shared = false;
				489	}
				490	/*
				491	* Quick check here without any i_rwsem lock to see if it is extending
				492	* IO. A more reliable check is done in ext4_dio_write_checks() with
				493	* proper locking in place.
				494	*/
				495	if (offset + count > i_size_read(inode))
				496	ilock_shared = false;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	497
				498	if (iocb->ki_flags & IOCB_NOWAIT) {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	499	if (ilock_shared) {
				500	if (!inode_trylock_shared(inode))
				501	return -EAGAIN;
				502	} else {
				503	if (!inode_trylock(inode))
				504	return -EAGAIN;
				505	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	506	} else {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	507	if (ilock_shared)
				508	inode_lock_shared(inode);
				509	else
				510	inode_lock(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	511	}
				512
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	513	/* Fallback to buffered I/O if the inode does not support direct I/O. */
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	514	if (!ext4_dio_supported(inode)) {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	515	if (ilock_shared)
				516	inode_unlock_shared(inode);
				517	else
				518	inode_unlock(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	519	return ext4_buffered_write_iter(iocb, from);
				520	}
				521
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	522	ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
				523	if (ret <= 0)
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	524	return ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	525
Jens Axboe	6e014c6	2020-05-24 16:53:16 -0600	[diff] [blame]	526	/* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
				527	if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io \|\| extend)) {
				528	ret = -EAGAIN;
				529	goto out;
				530	}
				531
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	532	offset = iocb->ki_pos;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	533	count = ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	534
				535	/*
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	536	* Unaligned direct IO must be serialized among each other as zeroing
				537	* of partial blocks of two competing unaligned IOs can result in data
				538	* corruption.
				539	*
				540	* So we make sure we don't allow any unaligned IO in flight.
				541	* For IOs where we need not wait (like unaligned non-AIO DIO),
				542	* below inode_dio_wait() may anyway become a no-op, since we start
				543	* with exclusive lock.
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	544	*/
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	545	if (unaligned_io)
				546	inode_dio_wait(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	547
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	548	if (extend) {
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	549	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				550	if (IS_ERR(handle)) {
				551	ret = PTR_ERR(handle);
				552	goto out;
				553	}
				554
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	555	ext4_fc_start_update(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	556	ret = ext4_orphan_add(handle, inode);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	557	ext4_fc_stop_update(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	558	if (ret) {
				559	ext4_journal_stop(handle);
				560	goto out;
				561	}
				562
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	563	ext4_journal_stop(handle);
				564	}
				565
Jan Kara	8cd115b	2019-12-18 18:44:33 +0100	[diff] [blame]	566	if (ilock_shared)
				567	iomap_ops = &ext4_iomap_overwrite_ops;
				568	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
Andreas Gruenbacher	4fdccaa	2021-07-24 12:26:41 +0200	[diff] [blame]	569	(unaligned_io \|\| extend) ? IOMAP_DIO_FORCE_WAIT : 0,
				570	0);
Christoph Hellwig	60263d5	2020-07-23 22:45:59 -0700	[diff] [blame]	571	if (ret == -ENOTBLK)
				572	ret = 0;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	573
				574	if (extend)
				575	ret = ext4_handle_inode_extension(inode, offset, ret, count);
				576
				577	out:
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	578	if (ilock_shared)
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	579	inode_unlock_shared(inode);
				580	else
				581	inode_unlock(inode);
				582
				583	if (ret >= 0 && iov_iter_count(from)) {
				584	ssize_t err;
				585	loff_t endbyte;
				586
				587	offset = iocb->ki_pos;
				588	err = ext4_buffered_write_iter(iocb, from);
				589	if (err < 0)
				590	return err;
				591
				592	/*
				593	* We need to ensure that the pages within the page cache for
				594	* the range covered by this I/O are written to disk and
				595	* invalidated. This is in attempt to preserve the expected
				596	* direct I/O semantics in the case we fallback to buffered I/O
				597	* to complete off the I/O request.
				598	*/
				599	ret += err;
				600	endbyte = offset + err - 1;
				601	err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
				602	offset, endbyte);
				603	if (!err)
				604	invalidate_mapping_pages(iocb->ki_filp->f_mapping,
				605	offset >> PAGE_SHIFT,
				606	endbyte >> PAGE_SHIFT);
				607	}
				608
				609	return ret;
				610	}
				611
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	612	#ifdef CONFIG_FS_DAX
				613	static ssize_t
				614	ext4_dax_write_iter(struct kiocb iocb, struct iov_iter from)
				615	{
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	616	ssize_t ret;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	617	size_t count;
				618	loff_t offset;
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	619	handle_t *handle;
				620	bool extend = false;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	621	struct inode *inode = file_inode(iocb->ki_filp);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	622
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	623	if (iocb->ki_flags & IOCB_NOWAIT) {
				624	if (!inode_trylock(inode))
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	625	return -EAGAIN;
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	626	} else {
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	627	inode_lock(inode);
				628	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	629
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	630	ret = ext4_write_checks(iocb, from);
				631	if (ret <= 0)
				632	goto out;
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	633
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	634	offset = iocb->ki_pos;
				635	count = iov_iter_count(from);
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	636
				637	if (offset + count > EXT4_I(inode)->i_disksize) {
				638	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				639	if (IS_ERR(handle)) {
				640	ret = PTR_ERR(handle);
				641	goto out;
				642	}
				643
				644	ret = ext4_orphan_add(handle, inode);
				645	if (ret) {
				646	ext4_journal_stop(handle);
				647	goto out;
				648	}
				649
				650	extend = true;
				651	ext4_journal_stop(handle);
				652	}
				653
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	654	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	655
				656	if (extend)
				657	ret = ext4_handle_inode_extension(inode, offset, ret, count);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	658	out:
Christoph Hellwig	ff5462e	2017-02-08 14:39:27 -0500	[diff] [blame]	659	inode_unlock(inode);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	660	if (ret > 0)
				661	ret = generic_write_sync(iocb, ret);
				662	return ret;
				663	}
				664	#endif
				665
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	666	static ssize_t
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	667	ext4_file_write_iter(struct kiocb iocb, struct iov_iter from)
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	668	{
Al Viro	496ad9a	2013-01-23 17:07:38 -0500	[diff] [blame]	669	struct inode *inode = file_inode(iocb->ki_filp);
Theodore Ts'o	7608e61	2014-04-21 14:26:28 -0400	[diff] [blame]	670
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	671	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				672	return -EIO;
				673
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	674	#ifdef CONFIG_FS_DAX
				675	if (IS_DAX(inode))
				676	return ext4_dax_write_iter(iocb, from);
				677	#endif
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	678	if (iocb->ki_flags & IOCB_DIRECT)
				679	return ext4_dio_write_iter(iocb, from);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	680	else
				681	return ext4_buffered_write_iter(iocb, from);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	682	}
				683
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	684	#ifdef CONFIG_FS_DAX
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	685	static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	686	enum page_entry_size pe_size)
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	687	{
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	688	int error = 0;
				689	vm_fault_t result;
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	690	int retries = 0;
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	691	handle_t *handle = NULL;
Dave Jiang	11bac80	2017-02-24 14:56:41 -0800	[diff] [blame]	692	struct inode *inode = file_inode(vmf->vma->vm_file);
Jan Kara	ea3d720	2015-12-07 14:28:03 -0500	[diff] [blame]	693	struct super_block *sb = inode->i_sb;
Randy Dodgen	fd96b8d	2017-08-24 15:26:01 -0400	[diff] [blame]	694
				695	/*
				696	* We have to distinguish real writes from writes which will result in a
				697	* COW page; COW writes should not poke the journal (the file will not
				698	* be changed). Doing so would cause unintended failures when mounted
				699	* read-only.
				700	*
				701	* We check for VM_SHARED rather than vmf->cow_page since the latter is
				702	* unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
				703	* other sizes, dax_iomap_fault will handle splitting / fallback so that
				704	* we eventually come back with a COW page.
				705	*/
				706	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
				707	(vmf->vma->vm_flags & VM_SHARED);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	708	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	709	pfn_t pfn;
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	710
				711	if (write) {
				712	sb_start_pagefault(sb);
Dave Jiang	11bac80	2017-02-24 14:56:41 -0800	[diff] [blame]	713	file_update_time(vmf->vma->vm_file);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	714	filemap_invalidate_lock_shared(mapping);
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	715	retry:
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	716	handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				717	EXT4_DATA_TRANS_BLOCKS(sb));
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	718	if (IS_ERR(handle)) {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	719	filemap_invalidate_unlock_shared(mapping);
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	720	sb_end_pagefault(sb);
				721	return VM_FAULT_SIGBUS;
				722	}
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	723	} else {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	724	filemap_invalidate_lock_shared(mapping);
Jan Kara	1db1754	2016-10-21 11:33:49 +0200	[diff] [blame]	725	}
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	726	result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	727	if (write) {
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	728	ext4_journal_stop(handle);
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	729
				730	if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
				731	ext4_should_retry_alloc(sb, &retries))
				732	goto retry;
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	733	/* Handling synchronous page fault? */
				734	if (result & VM_FAULT_NEEDDSYNC)
				735	result = dax_finish_sync_fault(vmf, pe_size, pfn);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	736	filemap_invalidate_unlock_shared(mapping);
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	737	sb_end_pagefault(sb);
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	738	} else {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	739	filemap_invalidate_unlock_shared(mapping);
Jan Kara	fb26a1c	2017-05-12 15:46:54 -0700	[diff] [blame]	740	}
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	741
				742	return result;
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	743	}
				744
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	745	static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	746	{
				747	return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
				748	}
				749
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	750	static const struct vm_operations_struct ext4_dax_vm_ops = {
				751	.fault = ext4_dax_fault,
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	752	.huge_fault = ext4_dax_huge_fault,
Ross Zwisler	1e9d180	2016-02-27 14:01:13 -0500	[diff] [blame]	753	.page_mkwrite = ext4_dax_fault,
Ross Zwisler	91d25ba	2017-09-06 16:18:43 -0700	[diff] [blame]	754	.pfn_mkwrite = ext4_dax_fault,
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	755	};
				756	#else
				757	#define ext4_dax_vm_ops ext4_file_vm_ops
				758	#endif
				759
Alexey Dobriyan	f0f37e2f	2009-09-27 22:29:37 +0400	[diff] [blame]	760	static const struct vm_operations_struct ext4_file_vm_ops = {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	761	.fault = filemap_fault,
Kirill A. Shutemov	f182036	2014-04-07 15:37:19 -0700	[diff] [blame]	762	.map_pages = filemap_map_pages,
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	763	.page_mkwrite = ext4_page_mkwrite,
				764	};
				765
				766	static int ext4_file_mmap(struct file file, struct vm_area_struct vma)
				767	{
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	768	struct inode *inode = file->f_mapping->host;
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	769	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				770	struct dax_device *dax_dev = sbi->s_daxdev;
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	771
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	772	if (unlikely(ext4_forced_shutdown(sbi)))
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	773	return -EIO;
				774
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	775	/*
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	776	* We don't support synchronous mappings for non-DAX files and
				777	* for DAX files if underneath dax_device is not synchronous.
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	778	*/
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	779	if (!daxdev_mapping_supported(vma, dax_dev))
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	780	return -EOPNOTSUPP;
				781
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	782	file_accessed(file);
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	783	if (IS_DAX(file_inode(file))) {
				784	vma->vm_ops = &ext4_dax_vm_ops;
Dave Jiang	e1fb4a0	2018-08-17 15:43:40 -0700	[diff] [blame]	785	vma->vm_flags \|= VM_HUGEPAGE;
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	786	} else {
				787	vma->vm_ops = &ext4_file_vm_ops;
				788	}
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	789	return 0;
				790	}
				791
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	792	static int ext4_sample_last_mounted(struct super_block *sb,
				793	struct vfsmount *mnt)
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	794	{
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	795	struct ext4_sb_info *sbi = EXT4_SB(sb);
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	796	struct path path;
				797	char buf[64], *cp;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	798	handle_t *handle;
				799	int err;
				800
Harshad Shirwadkar	9b5f6c9	2020-11-05 19:59:09 -0800	[diff] [blame]	801	if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	802	return 0;
				803
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	804	if (sb_rdonly(sb) \|\| !sb_start_intwrite_trylock(sb))
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	805	return 0;
				806
Harshad Shirwadkar	9b5f6c9	2020-11-05 19:59:09 -0800	[diff] [blame]	807	ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	808	/*
				809	* Sample where the filesystem has been mounted and
				810	* store it in the superblock for sysadmin convenience
				811	* when trying to sort through large numbers of block
				812	* devices or filesystem images.
				813	*/
				814	memset(buf, 0, sizeof(buf));
				815	path.mnt = mnt;
				816	path.dentry = mnt->mnt_root;
				817	cp = d_path(&path, buf, sizeof(buf));
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	818	err = 0;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	819	if (IS_ERR(cp))
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	820	goto out;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	821
				822	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	823	err = PTR_ERR(handle);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	824	if (IS_ERR(handle))
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	825	goto out;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	826	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
Jan Kara	188c299	2021-08-16 11:57:04 +0200	[diff] [blame]	827	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
				828	EXT4_JTR_NONE);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	829	if (err)
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	830	goto out_journal;
Jan Kara	05c2c00	2020-12-16 11:18:39 +0100	[diff] [blame]	831	lock_buffer(sbi->s_sbh);
Theodore Ts'o	5a3b590	2020-12-17 13:24:15 -0500	[diff] [blame]	832	strncpy(sbi->s_es->s_last_mounted, cp,
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	833	sizeof(sbi->s_es->s_last_mounted));
Jan Kara	05c2c00	2020-12-16 11:18:39 +0100	[diff] [blame]	834	ext4_superblock_csum_set(sb);
				835	unlock_buffer(sbi->s_sbh);
Jan Kara	a3f5cf1	2020-12-16 11:18:44 +0100	[diff] [blame]	836	ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	837	out_journal:
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	838	ext4_journal_stop(handle);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	839	out:
				840	sb_end_intwrite(sb);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	841	return err;
				842	}
				843
Dio Putra	e030a28	2020-06-14 11:45:44 +0700	[diff] [blame]	844	static int ext4_file_open(struct inode inode, struct file filp)
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	845	{
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	846	int ret;
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	847
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	848	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				849	return -EIO;
				850
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	851	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
				852	if (ret)
				853	return ret;
Miklos Szeredi	9dd78d8	2016-03-26 16:14:41 -0400	[diff] [blame]	854
Eric Biggers	09a5c31	2017-10-18 20:21:57 -0400	[diff] [blame]	855	ret = fscrypt_file_open(inode, filp);
				856	if (ret)
				857	return ret;
				858
Eric Biggers	c93d8f8	2019-07-22 09:26:24 -0700	[diff] [blame]	859	ret = fsverity_file_open(inode, filp);
				860	if (ret)
				861	return ret;
				862
Theodore Ts'o	8aefcd5	2011-01-10 12:29:43 -0500	[diff] [blame]	863	/*
				864	* Set up the jbd2_inode if we are opening the inode for
				865	* writing and the journal is present
				866	*/
Jan Kara	a361293	2013-08-16 21:19:41 -0400	[diff] [blame]	867	if (filp->f_mode & FMODE_WRITE) {
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	868	ret = ext4_inode_attach_jinode(inode);
Jan Kara	a361293	2013-08-16 21:19:41 -0400	[diff] [blame]	869	if (ret < 0)
				870	return ret;
Theodore Ts'o	8aefcd5	2011-01-10 12:29:43 -0500	[diff] [blame]	871	}
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	872
Jens Axboe	766ef1e	2020-08-03 17:02:11 -0600	[diff] [blame]	873	filp->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
Theodore Ts'o	abdd438	2015-05-31 13:35:39 -0400	[diff] [blame]	874	return dquot_file_open(inode, filp);
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	875	}
				876
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	877	/*
Eric Sandeen	ec7268c	2012-04-30 13:14:03 -0500	[diff] [blame]	878	* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
				879	* by calling generic_file_llseek_size() with the appropriate maxbytes
				880	* value for each.
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	881	*/
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	882	loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	883	{
				884	struct inode *inode = file->f_mapping->host;
				885	loff_t maxbytes;
				886
				887	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
				888	maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
				889	else
				890	maxbytes = inode->i_sb->s_maxbytes;
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	891
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	892	switch (whence) {
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	893	default:
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	894	return generic_file_llseek_size(file, offset, whence,
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	895	maxbytes, i_size_read(inode));
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	896	case SEEK_HOLE:
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	897	inode_lock_shared(inode);
Matthew Bobrowski	09edf4d	2019-11-05 23:03:31 +1100	[diff] [blame]	898	offset = iomap_seek_hole(inode, offset,
				899	&ext4_iomap_report_ops);
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	900	inode_unlock_shared(inode);
				901	break;
				902	case SEEK_DATA:
				903	inode_lock_shared(inode);
Matthew Bobrowski	09edf4d	2019-11-05 23:03:31 +1100	[diff] [blame]	904	offset = iomap_seek_data(inode, offset,
				905	&ext4_iomap_report_ops);
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	906	inode_unlock_shared(inode);
				907	break;
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	908	}
				909
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	910	if (offset < 0)
				911	return offset;
				912	return vfs_setpos(file, offset, maxbytes);
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	913	}
				914
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	915	const struct file_operations ext4_file_operations = {
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	916	.llseek = ext4_llseek,
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	917	.read_iter = ext4_file_read_iter,
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	918	.write_iter = ext4_file_write_iter,
Christoph Hellwig	3e08773	2021-10-12 13:12:24 +0200	[diff] [blame]	919	.iopoll = iocb_bio_iopoll,
Andi Kleen	5cdd7b2	2008-04-29 22:03:54 -0400	[diff] [blame]	920	.unlocked_ioctl = ext4_ioctl,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	921	#ifdef CONFIG_COMPAT
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	922	.compat_ioctl = ext4_compat_ioctl,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	923	#endif
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	924	.mmap = ext4_file_mmap,
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	925	.mmap_supported_flags = MAP_SYNC,
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	926	.open = ext4_file_open,
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	927	.release = ext4_release_file,
				928	.fsync = ext4_sync_file,
Toshi Kani	dbe6ec8	2016-10-07 16:59:59 -0700	[diff] [blame]	929	.get_unmapped_area = thp_get_unmapped_area,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	930	.splice_read = generic_file_splice_read,
Al Viro	8d02076	2014-04-05 04:27:08 -0400	[diff] [blame]	931	.splice_write = iter_file_splice_write,
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	932	.fallocate = ext4_fallocate,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	933	};
				934
Arjan van de Ven	754661f	2007-02-12 00:55:38 -0800	[diff] [blame]	935	const struct inode_operations ext4_file_inode_operations = {
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	936	.setattr = ext4_setattr,
David Howells	99652ea	2017-03-31 18:31:56 +0100	[diff] [blame]	937	.getattr = ext4_file_getattr,
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	938	.listxattr = ext4_listxattr,
Christoph Hellwig	4e34e71	2011-07-23 17:37:31 +0200	[diff] [blame]	939	.get_acl = ext4_get_acl,
Christoph Hellwig	64e178a	2013-12-20 05:16:44 -0800	[diff] [blame]	940	.set_acl = ext4_set_acl,
Eric Sandeen	6873fa0	2008-10-07 00:46:36 -0400	[diff] [blame]	941	.fiemap = ext4_fiemap,
Miklos Szeredi	4db5c2e	2021-04-07 14:36:43 +0200	[diff] [blame]	942	.fileattr_get = ext4_fileattr_get,
				943	.fileattr_set = ext4_fileattr_set,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	944	};
				945