Blame - fs/xfs/xfs_inode.c - SHIFTPHONES/mainline/linux

blob: 8d204d516621282757561940f5efb62a32221369 [file] [log] [blame]

Dave Chinner	0b61f8a	2018-06-05 19:42:14 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	4	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	*/
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	6	#include <linux/iversion.h>
Robert P. J. Day	40ebd81	2007-11-23 16:30:51 +1100	[diff] [blame]	7
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	9	#include "xfs_fs.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame]	10	#include "xfs_shared.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	11	#include "xfs_format.h"
				12	#include "xfs_log_format.h"
				13	#include "xfs_trans_resv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include "xfs_mount.h"
Darrick J. Wong	3ab78df	2016-08-03 11:15:38 +1000	[diff] [blame]	15	#include "xfs_defer.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	16	#include "xfs_inode.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	17	#include "xfs_dir2.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	18	#include "xfs_attr.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	19	#include "xfs_trans_space.h"
				20	#include "xfs_trans.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include "xfs_buf_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	22	#include "xfs_inode_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_ialloc.h"
				24	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame]	25	#include "xfs_bmap_util.h"
Darrick J. Wong	e9e899a	2017-10-31 12:04:49 -0700	[diff] [blame]	26	#include "xfs_errortag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include "xfs_quota.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	29	#include "xfs_filestream.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	30	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	31	#include "xfs_icache.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	32	#include "xfs_symlink.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	33	#include "xfs_trans_priv.h"
				34	#include "xfs_log.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	35	#include "xfs_bmap_btree.h"
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	36	#include "xfs_reflink.h"
Dave Chinner	9bbafc71	2021-06-02 10:48:24 +1000	[diff] [blame]	37	#include "xfs_ag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	kmem_zone_t *xfs_inode_zone;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40
				41	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	42	* Used in xfs_itruncate_extents(). This is the maximum number of extents
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	* freed from a file in a single transaction.
				44	*/
				45	#define XFS_ITRUNC_MAX_EXTENTS 2
				46
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	47	STATIC int xfs_iunlink(struct xfs_trans , struct xfs_inode );
				48	STATIC int xfs_iunlink_remove(struct xfs_trans , struct xfs_inode );
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	49
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	50	/*
				51	* helper function to extract extent size hint from inode
				52	*/
				53	xfs_extlen_t
				54	xfs_get_extsz_hint(
				55	struct xfs_inode *ip)
				56	{
Christoph Hellwig	bdb2ed2	2019-10-14 10:07:21 -0700	[diff] [blame]	57	/*
				58	* No point in aligning allocations if we need to COW to actually
				59	* write to them.
				60	*/
				61	if (xfs_is_always_cow_inode(ip))
				62	return 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	63	if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	64	return ip->i_extsize;
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	65	if (XFS_IS_REALTIME_INODE(ip))
				66	return ip->i_mount->m_sb.sb_rextsize;
				67	return 0;
				68	}
				69
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	70	/*
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	71	* Helper function to extract CoW extent size hint from inode.
				72	* Between the extent size hint and the CoW extent size hint, we
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	73	* return the greater of the two. If the value is zero (automatic),
				74	* use the default size.
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	75	*/
				76	xfs_extlen_t
				77	xfs_get_cowextsz_hint(
				78	struct xfs_inode *ip)
				79	{
				80	xfs_extlen_t a, b;
				81
				82	a = 0;
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	83	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	84	a = ip->i_cowextsize;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	85	b = xfs_get_extsz_hint(ip);
				86
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	87	a = max(a, b);
				88	if (a == 0)
				89	return XFS_DEFAULT_COWEXTSZ_HINT;
				90	return a;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	91	}
				92
				93	/*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	94	* These two are wrapper routines around the xfs_ilock() routine used to
				95	* centralize some grungy code. They are used in places that wish to lock the
				96	* inode solely for reading the extents. The reason these places can't just
				97	* call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
				98	* bringing in of the extents from disk for a file in b-tree format. If the
				99	* inode is in b-tree format, then we need to lock the inode exclusively until
				100	* the extents are read in. Locking it exclusively all the time would limit
				101	* our parallelism unnecessarily, though. What we do instead is check to see
				102	* if the extents have been read in yet, and only lock the inode exclusively
				103	* if they have not.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	104	*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	105	* The functions return a value which should be given to the corresponding
Christoph Hellwig	01f4f32	2013-12-06 12:30:08 -0800	[diff] [blame]	106	* xfs_iunlock() call.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	107	*/
				108	uint
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	109	xfs_ilock_data_map_shared(
				110	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	111	{
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	112	uint lock_mode = XFS_ILOCK_SHARED;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	113
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	114	if (xfs_need_iread_extents(&ip->i_df))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	115	lock_mode = XFS_ILOCK_EXCL;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	116	xfs_ilock(ip, lock_mode);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	117	return lock_mode;
				118	}
				119
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	120	uint
				121	xfs_ilock_attr_map_shared(
				122	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	123	{
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	124	uint lock_mode = XFS_ILOCK_SHARED;
				125
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	126	if (ip->i_afp && xfs_need_iread_extents(ip->i_afp))
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	127	lock_mode = XFS_ILOCK_EXCL;
				128	xfs_ilock(ip, lock_mode);
				129	return lock_mode;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	130	}
				131
				132	/*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	133	* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
				134	* multi-reader locks: i_mmap_lock and the i_lock. This routine allows
				135	* various combinations of the locks to be obtained.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	136	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	137	* The 3 locks should always be ordered so that the IO lock is obtained first,
				138	* the mmap lock second and the ilock last in order to prevent deadlock.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	139	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	140	* Basic locking order:
				141	*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	142	* i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	143	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	144	* mmap_lock locking order:
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	145	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	146	* i_rwsem -> page lock -> mmap_lock
				147	* mmap_lock -> i_mmap_lock -> page_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	148	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	149	* The difference in mmap_lock locking order mean that we cannot hold the
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	150	* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	151	* fault in pages during copy in/out (for buffered IO) or require the mmap_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	152	* in get_user_pages() to map the user pages into the kernel address space for
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	153	* direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	154	* page faults already hold the mmap_lock.
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	155	*
				156	* Hence to serialise fully against both syscall and mmap based IO, we need to
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	157	* take both the i_rwsem and the i_mmap_lock. These locks should only be both
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	158	* taken in places where we need to invalidate the page cache in a race
				159	* free manner (e.g. truncate, hole punch and other extent manipulation
				160	* functions).
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	161	*/
				162	void
				163	xfs_ilock(
				164	xfs_inode_t *ip,
				165	uint lock_flags)
				166	{
				167	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
				168
				169	/*
				170	* You can't set both SHARED and EXCL for the same lock,
				171	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				172	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				173	*/
				174	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				175	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	176	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				177	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	178	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				179	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	180	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	181
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	182	if (lock_flags & XFS_IOLOCK_EXCL) {
				183	down_write_nested(&VFS_I(ip)->i_rwsem,
				184	XFS_IOLOCK_DEP(lock_flags));
				185	} else if (lock_flags & XFS_IOLOCK_SHARED) {
				186	down_read_nested(&VFS_I(ip)->i_rwsem,
				187	XFS_IOLOCK_DEP(lock_flags));
				188	}
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	189
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	190	if (lock_flags & XFS_MMAPLOCK_EXCL)
				191	mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				192	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				193	mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				194
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	195	if (lock_flags & XFS_ILOCK_EXCL)
				196	mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				197	else if (lock_flags & XFS_ILOCK_SHARED)
				198	mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				199	}
				200
				201	/*
				202	* This is just like xfs_ilock(), except that the caller
				203	* is guaranteed not to sleep. It returns 1 if it gets
				204	* the requested locks and 0 otherwise. If the IO lock is
				205	* obtained but the inode lock cannot be, then the IO lock
				206	* is dropped before returning.
				207	*
				208	* ip -- the inode being locked
				209	* lock_flags -- this parameter indicates the inode's locks to be
				210	* to be locked. See the comment for xfs_ilock() for a list
				211	* of valid values.
				212	*/
				213	int
				214	xfs_ilock_nowait(
				215	xfs_inode_t *ip,
				216	uint lock_flags)
				217	{
				218	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
				219
				220	/*
				221	* You can't set both SHARED and EXCL for the same lock,
				222	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				223	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				224	*/
				225	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				226	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	227	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				228	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	229	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				230	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	231	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	232
				233	if (lock_flags & XFS_IOLOCK_EXCL) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	234	if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	235	goto out;
				236	} else if (lock_flags & XFS_IOLOCK_SHARED) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	237	if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	238	goto out;
				239	}
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	240
				241	if (lock_flags & XFS_MMAPLOCK_EXCL) {
				242	if (!mrtryupdate(&ip->i_mmaplock))
				243	goto out_undo_iolock;
				244	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
				245	if (!mrtryaccess(&ip->i_mmaplock))
				246	goto out_undo_iolock;
				247	}
				248
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	249	if (lock_flags & XFS_ILOCK_EXCL) {
				250	if (!mrtryupdate(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	251	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	252	} else if (lock_flags & XFS_ILOCK_SHARED) {
				253	if (!mrtryaccess(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	254	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	255	}
				256	return 1;
				257
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	258	out_undo_mmaplock:
				259	if (lock_flags & XFS_MMAPLOCK_EXCL)
				260	mrunlock_excl(&ip->i_mmaplock);
				261	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				262	mrunlock_shared(&ip->i_mmaplock);
				263	out_undo_iolock:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	264	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	265	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	266	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	267	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	268	out:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	269	return 0;
				270	}
				271
				272	/*
				273	* xfs_iunlock() is used to drop the inode locks acquired with
				274	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
				275	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
				276	* that we know which locks to drop.
				277	*
				278	* ip -- the inode being unlocked
				279	* lock_flags -- this parameter indicates the inode's locks to be
				280	* to be unlocked. See the comment for xfs_ilock() for a list
				281	* of valid values for this parameter.
				282	*
				283	*/
				284	void
				285	xfs_iunlock(
				286	xfs_inode_t *ip,
				287	uint lock_flags)
				288	{
				289	/*
				290	* You can't set both SHARED and EXCL for the same lock,
				291	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				292	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				293	*/
				294	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				295	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	296	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				297	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	298	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				299	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	300	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	301	ASSERT(lock_flags != 0);
				302
				303	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	304	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	305	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	306	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	307
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	308	if (lock_flags & XFS_MMAPLOCK_EXCL)
				309	mrunlock_excl(&ip->i_mmaplock);
				310	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				311	mrunlock_shared(&ip->i_mmaplock);
				312
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	313	if (lock_flags & XFS_ILOCK_EXCL)
				314	mrunlock_excl(&ip->i_lock);
				315	else if (lock_flags & XFS_ILOCK_SHARED)
				316	mrunlock_shared(&ip->i_lock);
				317
				318	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
				319	}
				320
				321	/*
				322	* give up write locks. the i/o lock cannot be held nested
				323	* if it is being demoted.
				324	*/
				325	void
				326	xfs_ilock_demote(
				327	xfs_inode_t *ip,
				328	uint lock_flags)
				329	{
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	330	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL));
				331	ASSERT((lock_flags &
				332	~(XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	333
				334	if (lock_flags & XFS_ILOCK_EXCL)
				335	mrdemote(&ip->i_lock);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	336	if (lock_flags & XFS_MMAPLOCK_EXCL)
				337	mrdemote(&ip->i_mmaplock);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	338	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	339	downgrade_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	340
				341	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
				342	}
				343
Dave Chinner	742ae1e	2013-04-30 21:39:34 +1000	[diff] [blame]	344	#if defined(DEBUG) \|\| defined(XFS_WARN)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	345	int
				346	xfs_isilocked(
				347	xfs_inode_t *ip,
				348	uint lock_flags)
				349	{
				350	if (lock_flags & (XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED)) {
				351	if (!(lock_flags & XFS_ILOCK_SHARED))
				352	return !!ip->i_lock.mr_writer;
				353	return rwsem_is_locked(&ip->i_lock.mr_lock);
				354	}
				355
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	356	if (lock_flags & (XFS_MMAPLOCK_EXCL\|XFS_MMAPLOCK_SHARED)) {
				357	if (!(lock_flags & XFS_MMAPLOCK_SHARED))
				358	return !!ip->i_mmaplock.mr_writer;
				359	return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
				360	}
				361
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	362	if (lock_flags & (XFS_IOLOCK_EXCL\|XFS_IOLOCK_SHARED)) {
				363	if (!(lock_flags & XFS_IOLOCK_SHARED))
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	364	return !debug_locks \|\|
				365	lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
				366	return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	367	}
				368
				369	ASSERT(0);
				370	return 0;
				371	}
				372	#endif
				373
Dave Chinner	b6a9947	2015-08-25 10:05:13 +1000	[diff] [blame]	374	/*
				375	* xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
				376	* DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
				377	* when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
				378	* errors and warnings.
				379	*/
				380	#if (defined(DEBUG) \|\| defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	381	static bool
				382	xfs_lockdep_subclass_ok(
				383	int subclass)
				384	{
				385	return subclass < MAX_LOCKDEP_SUBCLASSES;
				386	}
				387	#else
				388	#define xfs_lockdep_subclass_ok(subclass) (true)
				389	#endif
				390
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	391	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	392	* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	393	* value. This can be called for any type of inode lock combination, including
				394	* parent locking. Care must be taken to ensure we don't overrun the subclass
				395	* storage fields in the class mask we build.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	396	*/
				397	static inline int
				398	xfs_lock_inumorder(int lock_mode, int subclass)
				399	{
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	400	int class = 0;
				401
				402	ASSERT(!(lock_mode & (XFS_ILOCK_PARENT \| XFS_ILOCK_RTBITMAP \|
				403	XFS_ILOCK_RTSUM)));
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	404	ASSERT(xfs_lockdep_subclass_ok(subclass));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	405
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	406	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	407	ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	408	class += subclass << XFS_IOLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	409	}
				410
				411	if (lock_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	412	ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
				413	class += subclass << XFS_MMAPLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	414	}
				415
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	416	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) {
				417	ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
				418	class += subclass << XFS_ILOCK_SHIFT;
				419	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	420
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	421	return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) \| class;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	422	}
				423
				424	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	425	* The following routine will lock n inodes in exclusive mode. We assume the
				426	* caller calls us with the inodes in i_ino order.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	427	*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	428	* We need to detect deadlock where an inode that we lock is in the AIL and we
				429	* start waiting for another inode that is locked by a thread in a long running
				430	* transaction (such as truncate). This can result in deadlock since the long
				431	* running trans might need to wait for the inode we just locked in order to
				432	* push the tail and free space in the log.
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	433	*
				434	* xfs_lock_inodes() can only be used to lock one type of lock at a time -
				435	* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
				436	* lock more than one at a time, lockdep will report false positives saying we
				437	* have violated locking orders.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	438	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	439	static void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	440	xfs_lock_inodes(
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	441	struct xfs_inode **ips,
				442	int inodes,
				443	uint lock_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	444	{
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	445	int attempts = 0, i, j, try_lock;
				446	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	447
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	448	/*
				449	* Currently supports between 2 and 5 inodes with exclusive locking. We
				450	* support an arbitrary depth of locking here, but absolute limits on
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	451	* inodes depend on the type of locking and the limits placed by
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	452	* lockdep annotations in xfs_lock_inumorder. These are all checked by
				453	* the asserts.
				454	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	455	ASSERT(ips && inodes >= 2 && inodes <= 5);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	456	ASSERT(lock_mode & (XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL \|
				457	XFS_ILOCK_EXCL));
				458	ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED \| XFS_MMAPLOCK_SHARED \|
				459	XFS_ILOCK_SHARED)));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	460	ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) \|\|
				461	inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
				462	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) \|\|
				463	inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
				464
				465	if (lock_mode & XFS_IOLOCK_EXCL) {
				466	ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL \| XFS_ILOCK_EXCL)));
				467	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
				468	ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	469
				470	try_lock = 0;
				471	i = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	472	again:
				473	for (; i < inodes; i++) {
				474	ASSERT(ips[i]);
				475
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	476	if (i && (ips[i] == ips[i - 1])) /* Already locked */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	477	continue;
				478
				479	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	480	* If try_lock is not set yet, make sure all locked inodes are
				481	* not in the AIL. If any are, set try_lock to be used later.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	482	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	483	if (!try_lock) {
				484	for (j = (i - 1); j >= 0 && !try_lock; j--) {
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	485	lp = &ips[j]->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	486	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	487	try_lock++;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	488	}
				489	}
				490
				491	/*
				492	* If any of the previous locks we have locked is in the AIL,
				493	* we must TRY to get the second and subsequent locks. If
				494	* we can't get any, we must release all we have
				495	* and try again.
				496	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	497	if (!try_lock) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	498	xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	499	continue;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	500	}
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	501
				502	/* try_lock means we have an inode locked that is in the AIL. */
				503	ASSERT(i != 0);
				504	if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
				505	continue;
				506
				507	/*
				508	* Unlock all previous guys and try again. xfs_iunlock will try
				509	* to push the tail if the inode is in the AIL.
				510	*/
				511	attempts++;
				512	for (j = i - 1; j >= 0; j--) {
				513	/*
				514	* Check to see if we've already unlocked this one. Not
				515	* the first one going back, and the inode ptr is the
				516	* same.
				517	*/
				518	if (j != (i - 1) && ips[j] == ips[j + 1])
				519	continue;
				520
				521	xfs_iunlock(ips[j], lock_mode);
				522	}
				523
				524	if ((attempts % 5) == 0) {
				525	delay(1); /* Don't just spin the CPU */
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	526	}
				527	i = 0;
				528	try_lock = 0;
				529	goto again;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	530	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	531	}
				532
				533	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	534	* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	535	* the mmaplock or the ilock, but not more than one type at a time. If we lock
				536	* more than one at a time, lockdep will report false positives saying we have
				537	* violated locking orders. The iolock must be double-locked separately since
				538	* we use i_rwsem for that. We now support taking one lock EXCL and the other
				539	* SHARED.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	540	*/
				541	void
				542	xfs_lock_two_inodes(
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	543	struct xfs_inode *ip0,
				544	uint ip0_mode,
				545	struct xfs_inode *ip1,
				546	uint ip1_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	547	{
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	548	struct xfs_inode *temp;
				549	uint mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	550	int attempts = 0;
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	551	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	552
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	553	ASSERT(hweight32(ip0_mode) == 1);
				554	ASSERT(hweight32(ip1_mode) == 1);
				555	ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				556	ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				557	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				558	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				559	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				560	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				561	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				562	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				563	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				564	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	565
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	566	ASSERT(ip0->i_ino != ip1->i_ino);
				567
				568	if (ip0->i_ino > ip1->i_ino) {
				569	temp = ip0;
				570	ip0 = ip1;
				571	ip1 = temp;
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	572	mode_temp = ip0_mode;
				573	ip0_mode = ip1_mode;
				574	ip1_mode = mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	575	}
				576
				577	again:
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	578	xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	579
				580	/*
				581	* If the first lock we have locked is in the AIL, we must TRY to get
				582	* the second lock. If we can't get it, we must release the first one
				583	* and try again.
				584	*/
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	585	lp = &ip0->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	586	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	587	if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
				588	xfs_iunlock(ip0, ip0_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	589	if ((++attempts % 5) == 0)
				590	delay(1); /* Don't just spin the CPU */
				591	goto again;
				592	}
				593	} else {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	594	xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	595	}
				596	}
				597
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	598	uint
				599	xfs_ip2xflags(
Dave Chinner	58f88ca	2016-01-04 16:44:15 +1100	[diff] [blame]	600	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	601	{
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	602	uint flags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	603
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	604	if (ip->i_diflags & XFS_DIFLAG_ANY) {
				605	if (ip->i_diflags & XFS_DIFLAG_REALTIME)
				606	flags \|= FS_XFLAG_REALTIME;
				607	if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
				608	flags \|= FS_XFLAG_PREALLOC;
				609	if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
				610	flags \|= FS_XFLAG_IMMUTABLE;
				611	if (ip->i_diflags & XFS_DIFLAG_APPEND)
				612	flags \|= FS_XFLAG_APPEND;
				613	if (ip->i_diflags & XFS_DIFLAG_SYNC)
				614	flags \|= FS_XFLAG_SYNC;
				615	if (ip->i_diflags & XFS_DIFLAG_NOATIME)
				616	flags \|= FS_XFLAG_NOATIME;
				617	if (ip->i_diflags & XFS_DIFLAG_NODUMP)
				618	flags \|= FS_XFLAG_NODUMP;
				619	if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
				620	flags \|= FS_XFLAG_RTINHERIT;
				621	if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
				622	flags \|= FS_XFLAG_PROJINHERIT;
				623	if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
				624	flags \|= FS_XFLAG_NOSYMLINKS;
				625	if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
				626	flags \|= FS_XFLAG_EXTSIZE;
				627	if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
				628	flags \|= FS_XFLAG_EXTSZINHERIT;
				629	if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
				630	flags \|= FS_XFLAG_NODEFRAG;
				631	if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
				632	flags \|= FS_XFLAG_FILESTREAM;
				633	}
				634
				635	if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
				636	if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
				637	flags \|= FS_XFLAG_DAX;
				638	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
				639	flags \|= FS_XFLAG_COWEXTSIZE;
				640	}
				641
				642	if (XFS_IFORK_Q(ip))
				643	flags \|= FS_XFLAG_HASATTR;
				644	return flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	645	}
				646
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	647	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	648	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
				649	* is allowed, otherwise it has to be an exact match. If a CI match is found,
				650	* ci_name->name will point to a the actual name (caller must free) or
				651	* will be set to NULL if an exact match is found.
				652	*/
				653	int
				654	xfs_lookup(
				655	xfs_inode_t *dp,
				656	struct xfs_name *name,
				657	xfs_inode_t **ipp,
				658	struct xfs_name *ci_name)
				659	{
				660	xfs_ino_t inum;
				661	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	662
				663	trace_xfs_lookup(dp, name);
				664
				665	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	666	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	667
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	668	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	669	if (error)
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	670	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	671
				672	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
				673	if (error)
				674	goto out_free_name;
				675
				676	return 0;
				677
				678	out_free_name:
				679	if (ci_name)
				680	kmem_free(ci_name->name);
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	681	out_unlock:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	682	*ipp = NULL;
				683	return error;
				684	}
				685
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	686	/* Propagate di_flags from a parent inode to a child inode. */
				687	static void
				688	xfs_inode_inherit_flags(
				689	struct xfs_inode *ip,
				690	const struct xfs_inode *pip)
				691	{
				692	unsigned int di_flags = 0;
				693	umode_t mode = VFS_I(ip)->i_mode;
				694
				695	if (S_ISDIR(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	696	if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	697	di_flags \|= XFS_DIFLAG_RTINHERIT;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	698	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	699	di_flags \|= XFS_DIFLAG_EXTSZINHERIT;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	700	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	701	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	702	if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	703	di_flags \|= XFS_DIFLAG_PROJINHERIT;
				704	} else if (S_ISREG(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	705	if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
Darrick J. Wong	d4f2c14	2020-09-13 10:16:41 -0700	[diff] [blame]	706	xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	707	di_flags \|= XFS_DIFLAG_REALTIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	708	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	709	di_flags \|= XFS_DIFLAG_EXTSIZE;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	710	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	711	}
				712	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	713	if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	714	xfs_inherit_noatime)
				715	di_flags \|= XFS_DIFLAG_NOATIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	716	if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	717	xfs_inherit_nodump)
				718	di_flags \|= XFS_DIFLAG_NODUMP;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	719	if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	720	xfs_inherit_sync)
				721	di_flags \|= XFS_DIFLAG_SYNC;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	722	if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	723	xfs_inherit_nosymlinks)
				724	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	725	if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	726	xfs_inherit_nodefrag)
				727	di_flags \|= XFS_DIFLAG_NODEFRAG;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	728	if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	729	di_flags \|= XFS_DIFLAG_FILESTREAM;
				730
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	731	ip->i_diflags \|= di_flags;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	732	}
				733
				734	/* Propagate di_flags2 from a parent inode to a child inode. */
				735	static void
				736	xfs_inode_inherit_flags2(
				737	struct xfs_inode *ip,
				738	const struct xfs_inode *pip)
				739	{
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	740	if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
				741	ip->i_diflags2 \|= XFS_DIFLAG2_COWEXTSIZE;
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	742	ip->i_cowextsize = pip->i_cowextsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	743	}
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	744	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
				745	ip->i_diflags2 \|= XFS_DIFLAG2_DAX;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	746	}
				747
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	748	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	749	* Initialise a newly allocated inode and return the in-core inode to the
				750	* caller locked exclusively.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	751	*/
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	752	int
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	753	xfs_init_new_inode(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	754	struct user_namespace *mnt_userns,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	755	struct xfs_trans *tp,
				756	struct xfs_inode *pip,
				757	xfs_ino_t ino,
				758	umode_t mode,
				759	xfs_nlink_t nlink,
				760	dev_t rdev,
				761	prid_t prid,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	762	bool init_xattrs,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	763	struct xfs_inode **ipp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	764	{
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	765	struct inode *dir = pip ? VFS_I(pip) : NULL;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	766	struct xfs_mount *mp = tp->t_mountp;
				767	struct xfs_inode *ip;
				768	unsigned int flags;
				769	int error;
				770	struct timespec64 tv;
				771	struct inode *inode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	772
				773	/*
Dave Chinner	8b26984	2018-04-17 17:17:35 -0700	[diff] [blame]	774	* Protect against obviously corrupt allocation btree records. Later
				775	* xfs_iget checks will catch re-allocation of other active in-memory
				776	* and on-disk inodes. If we don't catch reallocating the parent inode
				777	* here we will deadlock in xfs_iget() so we have to do these checks
				778	* first.
				779	*/
				780	if ((pip && ino == pip->i_ino) \|\| !xfs_verify_dir_ino(mp, ino)) {
				781	xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
				782	return -EFSCORRUPTED;
				783	}
				784
				785	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	786	* Get the in-core inode with the lock held exclusively to prevent
				787	* others from looking at until we're done.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	788	*/
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	789	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	790	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	791	return error;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	792
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	793	ASSERT(ip != NULL);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	794	inode = VFS_I(ip);
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	795	set_nlink(inode, nlink);
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	796	inode->i_rdev = rdev;
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	797	ip->i_projid = prid;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	799	if (dir && !(dir->i_mode & S_ISGID) &&
				800	(mp->m_flags & XFS_MOUNT_GRPID)) {
Christian Brauner	db99855	2021-03-20 13:26:24 +0100	[diff] [blame]	801	inode_fsuid_set(inode, mnt_userns);
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	802	inode->i_gid = dir->i_gid;
				803	inode->i_mode = mode;
Christoph Hellwig	3d8f282	2020-02-21 08:31:26 -0800	[diff] [blame]	804	} else {
Linus Torvalds	7d6beb7	2021-02-23 13:39:45 -0800	[diff] [blame]	805	inode_init_owner(mnt_userns, inode, dir, mode);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	806	}
				807
				808	/*
				809	* If the group ID of the new file does not match the effective group
				810	* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
				811	* (and only if the irix_sgid_inherit compatibility variable is set).
				812	*/
Christoph Hellwig	5429515	2020-02-21 08:31:27 -0800	[diff] [blame]	813	if (irix_sgid_inherit &&
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	814	(inode->i_mode & S_ISGID) &&
				815	!in_group_p(i_gid_into_mnt(mnt_userns, inode)))
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	816	inode->i_mode &= ~S_ISGID;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	817
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	818	ip->i_disk_size = 0;
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	819	ip->i_df.if_nextents = 0;
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	820	ASSERT(ip->i_nblocks == 0);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	821
Deepa Dinamani	c2050a4	2016-09-14 07:48:06 -0700	[diff] [blame]	822	tv = current_time(inode);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	823	inode->i_mtime = tv;
				824	inode->i_atime = tv;
				825	inode->i_ctime = tv;
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	826
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	827	ip->i_extsize = 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	828	ip->i_diflags = 0;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	829
Christoph Hellwig	6471e9c	2020-03-18 08:15:11 -0700	[diff] [blame]	830	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	831	inode_set_iversion(inode, 1);
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	832	ip->i_cowextsize = 0;
Christoph Hellwig	e98d5e8	2021-03-29 11:11:45 -0700	[diff] [blame]	833	ip->i_crtime = tv;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	834	}
				835
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	836	flags = XFS_ILOG_CORE;
				837	switch (mode & S_IFMT) {
				838	case S_IFIFO:
				839	case S_IFCHR:
				840	case S_IFBLK:
				841	case S_IFSOCK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	842	ip->i_df.if_format = XFS_DINODE_FMT_DEV;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	843	flags \|= XFS_ILOG_DEV;
				844	break;
				845	case S_IFREG:
				846	case S_IFDIR:
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	847	if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	848	xfs_inode_inherit_flags(ip, pip);
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	849	if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	850	xfs_inode_inherit_flags2(ip, pip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	851	/* FALLTHROUGH */
				852	case S_IFLNK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	853	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	fcacbc3	2018-07-17 16:51:50 -0700	[diff] [blame]	854	ip->i_df.if_bytes = 0;
Christoph Hellwig	6bdcf26	2017-11-03 10:34:46 -0700	[diff] [blame]	855	ip->i_df.if_u1.if_root = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	856	break;
				857	default:
				858	ASSERT(0);
				859	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	860
				861	/*
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	862	* If we need to create attributes immediately after allocating the
				863	* inode, initialise an empty attribute fork right now. We use the
				864	* default fork offset for attributes here as we don't know exactly what
				865	* size or how many attributes we might be adding. We can do this
				866	* safely here because we know the data fork is completely empty and
				867	* this saves us from needing to run a separate transaction to set the
				868	* fork offset in the immediate future.
				869	*/
Dave Chinner	2442ee1	2021-04-06 07:01:00 -0700	[diff] [blame]	870	if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	871	ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	872	ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
				873	}
				874
				875	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	876	* Log the new values stuffed into the inode.
				877	*/
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	878	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	879	xfs_trans_log_inode(tp, ip, flags);
				880
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	881	/* now that we have an i_mode we can setup the inode structure */
Christoph Hellwig	41be8be	2008-08-13 16:23:13 +1000	[diff] [blame]	882	xfs_setup_inode(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	883
				884	*ipp = ip;
				885	return 0;
				886	}
				887
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	888	/*
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	889	* Decrement the link count on an inode & log the change. If this causes the
				890	* link count to go to zero, move the inode to AGI unlinked list so that it can
				891	* be freed when the last active reference goes away via xfs_inactive().
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	892	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	893	static int /* error */
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	894	xfs_droplink(
				895	xfs_trans_t *tp,
				896	xfs_inode_t *ip)
				897	{
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	898	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				899
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	900	drop_nlink(VFS_I(ip));
				901	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				902
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	903	if (VFS_I(ip)->i_nlink)
				904	return 0;
				905
				906	return xfs_iunlink(tp, ip);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	907	}
				908
				909	/*
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	910	* Increment the link count on an inode & log the change.
				911	*/
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	912	static void
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	913	xfs_bumplink(
				914	xfs_trans_t *tp,
				915	xfs_inode_t *ip)
				916	{
				917	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				918
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	919	inc_nlink(VFS_I(ip));
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	920	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	921	}
				922
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	923	int
				924	xfs_create(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	925	struct user_namespace *mnt_userns,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	926	xfs_inode_t *dp,
				927	struct xfs_name *name,
				928	umode_t mode,
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	929	dev_t rdev,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	930	bool init_xattrs,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	931	xfs_inode_t **ipp)
				932	{
				933	int is_dir = S_ISDIR(mode);
				934	struct xfs_mount *mp = dp->i_mount;
				935	struct xfs_inode *ip = NULL;
				936	struct xfs_trans *tp = NULL;
				937	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	938	bool unlock_dp_on_error = false;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	939	prid_t prid;
				940	struct xfs_dquot *udqp = NULL;
				941	struct xfs_dquot *gdqp = NULL;
				942	struct xfs_dquot *pdqp = NULL;
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	943	struct xfs_trans_res *tres;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	944	uint resblks;
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	945	xfs_ino_t ino;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	946
				947	trace_xfs_create(dp, name);
				948
				949	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	950	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	951
Zhi Yong Wu	163467d	2013-12-18 08:22:39 +0800	[diff] [blame]	952	prid = xfs_get_initial_prid(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	953
				954	/*
				955	* Make sure that we have allocated dquot(s) on disk.
				956	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	957	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				958	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	959	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				960	&udqp, &gdqp, &pdqp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	961	if (error)
				962	return error;
				963
				964	if (is_dir) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	965	resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	966	tres = &M_RES(mp)->tr_mkdir;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	967	} else {
				968	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	969	tres = &M_RES(mp)->tr_create;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	970	}
				971
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	972	/*
				973	* Initially assume that the file does not exist and
				974	* reserve the resources for that case. If that is not
				975	* the case we'll drop the one we have and get a more
				976	* appropriate transaction later.
				977	*/
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	978	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				979	&tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	980	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	981	/* flush outstanding delalloc blocks and retry */
				982	xfs_flush_inodes(mp);
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	983	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
				984	resblks, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	985	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	986	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	987	goto out_release_dquots;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	988
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	989	xfs_ilock(dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	990	unlock_dp_on_error = true;
				991
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	992	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
				993	XFS_IEXT_DIR_MANIP_CNT(mp));
				994	if (error)
				995	goto out_trans_cancel;
				996
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	997	/*
				998	* A newly created regular or special file just has one directory
				999	* entry pointing to them, but a directory also the "." entry
				1000	* pointing to itself.
				1001	*/
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	1002	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
				1003	if (!error)
				1004	error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
				1005	is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1006	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1007	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1008
				1009	/*
				1010	* Now we join the directory inode to the transaction. We do not do it
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	1011	* earlier because xfs_dialloc might commit the previous transaction
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1012	* (and release all the locks). An error from here on will result in
				1013	* the transaction cancel unlocking dp so don't do it explicitly in the
				1014	* error path.
				1015	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1016	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1017	unlock_dp_on_error = false;
				1018
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1019	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
Kaixu Xia	63337b6	2020-03-27 08:28:39 -0700	[diff] [blame]	1020	resblks - XFS_IALLOC_SPACE_RES(mp));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1021	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1022	ASSERT(error != -ENOSPC);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1023	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1024	}
				1025	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1026	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1027
				1028	if (is_dir) {
				1029	error = xfs_dir_init(tp, ip, dp);
				1030	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	1031	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1032
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1033	xfs_bumplink(tp, dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1034	}
				1035
				1036	/*
				1037	* If this is a synchronous mount, make sure that the
				1038	* create transaction goes to disk before returning to
				1039	* the user.
				1040	*/
				1041	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				1042	xfs_trans_set_sync(tp);
				1043
				1044	/*
				1045	* Attach the dquot(s) to the inodes and modify them incore.
				1046	* These ids of the inode couldn't have changed since the new
				1047	* inode has been locked ever since it was created.
				1048	*/
				1049	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1050
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1051	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1052	if (error)
				1053	goto out_release_inode;
				1054
				1055	xfs_qm_dqrele(udqp);
				1056	xfs_qm_dqrele(gdqp);
				1057	xfs_qm_dqrele(pdqp);
				1058
				1059	*ipp = ip;
				1060	return 0;
				1061
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1062	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1063	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1064	out_release_inode:
				1065	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1066	* Wait until after the current transaction is aborted to finish the
				1067	* setup of the inode and release the inode. This prevents recursive
				1068	* transactions and deadlocks from xfs_inactive.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1069	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1070	if (ip) {
				1071	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1072	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1073	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1074	out_release_dquots:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1075	xfs_qm_dqrele(udqp);
				1076	xfs_qm_dqrele(gdqp);
				1077	xfs_qm_dqrele(pdqp);
				1078
				1079	if (unlock_dp_on_error)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1080	xfs_iunlock(dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1081	return error;
				1082	}
				1083
				1084	int
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1085	xfs_create_tmpfile(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	1086	struct user_namespace *mnt_userns,
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1087	struct xfs_inode *dp,
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1088	umode_t mode,
				1089	struct xfs_inode **ipp)
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1090	{
				1091	struct xfs_mount *mp = dp->i_mount;
				1092	struct xfs_inode *ip = NULL;
				1093	struct xfs_trans *tp = NULL;
				1094	int error;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1095	prid_t prid;
				1096	struct xfs_dquot *udqp = NULL;
				1097	struct xfs_dquot *gdqp = NULL;
				1098	struct xfs_dquot *pdqp = NULL;
				1099	struct xfs_trans_res *tres;
				1100	uint resblks;
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	1101	xfs_ino_t ino;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1102
				1103	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1104	return -EIO;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1105
				1106	prid = xfs_get_initial_prid(dp);
				1107
				1108	/*
				1109	* Make sure that we have allocated dquot(s) on disk.
				1110	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	1111	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				1112	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	1113	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1114	&udqp, &gdqp, &pdqp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1115	if (error)
				1116	return error;
				1117
				1118	resblks = XFS_IALLOC_SPACE_RES(mp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1119	tres = &M_RES(mp)->tr_create_tmpfile;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1120
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1121	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				1122	&tp);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1123	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1124	goto out_release_dquots;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1125
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame^]	1126	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
				1127	if (!error)
				1128	error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
				1129	0, 0, prid, false, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1130	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1131	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1132
				1133	if (mp->m_flags & XFS_MOUNT_WSYNC)
				1134	xfs_trans_set_sync(tp);
				1135
				1136	/*
				1137	* Attach the dquot(s) to the inodes and modify them incore.
				1138	* These ids of the inode couldn't have changed since the new
				1139	* inode has been locked ever since it was created.
				1140	*/
				1141	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1142
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1143	error = xfs_iunlink(tp, ip);
				1144	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1145	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1146
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1147	error = xfs_trans_commit(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1148	if (error)
				1149	goto out_release_inode;
				1150
				1151	xfs_qm_dqrele(udqp);
				1152	xfs_qm_dqrele(gdqp);
				1153	xfs_qm_dqrele(pdqp);
				1154
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1155	*ipp = ip;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1156	return 0;
				1157
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1158	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1159	xfs_trans_cancel(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1160	out_release_inode:
				1161	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1162	* Wait until after the current transaction is aborted to finish the
				1163	* setup of the inode and release the inode. This prevents recursive
				1164	* transactions and deadlocks from xfs_inactive.
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1165	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1166	if (ip) {
				1167	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1168	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1169	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1170	out_release_dquots:
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1171	xfs_qm_dqrele(udqp);
				1172	xfs_qm_dqrele(gdqp);
				1173	xfs_qm_dqrele(pdqp);
				1174
				1175	return error;
				1176	}
				1177
				1178	int
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1179	xfs_link(
				1180	xfs_inode_t *tdp,
				1181	xfs_inode_t *sip,
				1182	struct xfs_name *target_name)
				1183	{
				1184	xfs_mount_t *mp = tdp->i_mount;
				1185	xfs_trans_t *tp;
				1186	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1187	int resblks;
				1188
				1189	trace_xfs_link(tdp, target_name);
				1190
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1191	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1192
				1193	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1194	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1195
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1196	error = xfs_qm_dqattach(sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1197	if (error)
				1198	goto std_return;
				1199
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1200	error = xfs_qm_dqattach(tdp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1201	if (error)
				1202	goto std_return;
				1203
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1204	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1205	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1206	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1207	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1208	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1209	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1210	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1211	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1212
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	1213	xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1214
				1215	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1216	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1217
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	1218	error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
				1219	XFS_IEXT_DIR_MANIP_CNT(mp));
				1220	if (error)
				1221	goto error_return;
				1222
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1223	/*
				1224	* If we are using project inheritance, we only allow hard link
				1225	* creation in our tree when the project IDs are the same; else
				1226	* the tree quota mechanism could be circumvented.
				1227	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	1228	if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	1229	tdp->i_projid != sip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1230	error = -EXDEV;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1231	goto error_return;
				1232	}
				1233
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	1234	if (!resblks) {
				1235	error = xfs_dir_canenter(tp, tdp, target_name);
				1236	if (error)
				1237	goto error_return;
				1238	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1239
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1240	/*
				1241	* Handle initial link state of O_TMPFILE inode
				1242	*/
				1243	if (VFS_I(sip)->i_nlink == 0) {
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1244	error = xfs_iunlink_remove(tp, sip);
				1245	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1246	goto error_return;
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1247	}
				1248
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1249	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1250	resblks);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1251	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1252	goto error_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1253	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1254	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				1255
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1256	xfs_bumplink(tp, sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1257
				1258	/*
				1259	* If this is a synchronous mount, make sure that the
				1260	* link transaction goes to disk before returning to
				1261	* the user.
				1262	*/
Eric Sandeen	f6106ef	2016-01-11 11:34:01 +1100	[diff] [blame]	1263	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1264	xfs_trans_set_sync(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1265
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1266	return xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1267
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1268	error_return:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1269	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1270	std_return:
				1271	return error;
				1272	}
				1273
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1274	/* Clear the reflink flag and the cowblocks tag if possible. */
				1275	static void
				1276	xfs_itruncate_clear_reflink_flags(
				1277	struct xfs_inode *ip)
				1278	{
				1279	struct xfs_ifork *dfork;
				1280	struct xfs_ifork *cfork;
				1281
				1282	if (!xfs_is_reflink_inode(ip))
				1283	return;
				1284	dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
				1285	cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
				1286	if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	1287	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1288	if (cfork->if_bytes == 0)
				1289	xfs_inode_clear_cowblocks_tag(ip);
				1290	}
				1291
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1292	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1293	* Free up the underlying blocks past new_size. The new size must be smaller
				1294	* than the current size. This routine can be used both for the attribute and
				1295	* data fork, and does not modify the inode size, which is left to the caller.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1296	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1297	* The transaction passed to this routine must have made a permanent log
				1298	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
				1299	* given transaction and start new ones, so make sure everything involved in
				1300	* the transaction is tidy before calling here. Some transaction will be
				1301	* returned to the caller to be committed. The incoming transaction must
				1302	* already include the inode, and both inode locks must be held exclusively.
				1303	* The inode must also be "held" within the transaction. On return the inode
				1304	* will be "held" within the returned transaction. This routine does NOT
				1305	* require any disk space to be reserved for it within the transaction.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1306	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1307	* If we get an error, we must return with the inode locked and linked into the
				1308	* current transaction. This keeps things simple for the higher level code,
				1309	* because it always knows that the inode is locked and held in the transaction
				1310	* that returns to it whether errors occur or not. We don't mark the inode
				1311	* dirty on error so that transactions can be easily aborted if possible.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1312	*/
				1313	int
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1314	xfs_itruncate_extents_flags(
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1315	struct xfs_trans **tpp,
				1316	struct xfs_inode *ip,
				1317	int whichfork,
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1318	xfs_fsize_t new_size,
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1319	int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1320	{
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1321	struct xfs_mount *mp = ip->i_mount;
				1322	struct xfs_trans tp = tpp;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1323	xfs_fileoff_t first_unmap_block;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1324	xfs_filblks_t unmap_len;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1325	int error = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1326
Christoph Hellwig	0b56185	2012-07-04 11:13:31 -0400	[diff] [blame]	1327	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1328	ASSERT(!atomic_read(&VFS_I(ip)->i_count) \|\|
				1329	xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	1330	ASSERT(new_size <= XFS_ISIZE(ip));
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1331	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1332	ASSERT(ip->i_itemp != NULL);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1333	ASSERT(ip->i_itemp->ili_lock_flags == 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1334	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1335
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1336	trace_xfs_itruncate_extents_start(ip, new_size);
				1337
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1338	flags \|= xfs_bmapi_aflag(whichfork);
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1339
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1340	/*
				1341	* Since it is possible for space to become allocated beyond
				1342	* the end of the file (in a crash where the space is allocated
				1343	* but the inode size is not yet updated), simply remove any
				1344	* blocks which show up between the new EOF and the maximum
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1345	* possible file size.
				1346	*
				1347	* We have to free all the blocks to the bmbt maximum offset, even if
				1348	* the page cache can't scale that far.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1349	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1350	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
Darrick J. Wong	33005fd	2020-12-04 13:28:35 -0800	[diff] [blame]	1351	if (!xfs_verify_fileoff(mp, first_unmap_block)) {
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1352	WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1353	return 0;
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1354	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1355
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1356	unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
				1357	while (unmap_len > 0) {
Brian Foster	02dff7b	2018-07-24 13:43:07 -0700	[diff] [blame]	1358	ASSERT(tp->t_firstblock == NULLFSBLOCK);
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1359	error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
				1360	flags, XFS_ITRUNC_MAX_EXTENTS);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1361	if (error)
Brian Foster	d5a2e28	2018-09-29 13:41:58 +1000	[diff] [blame]	1362	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1363
Brian Foster	6dd379c	2020-09-15 20:44:46 -0700	[diff] [blame]	1364	/* free the just unmapped extents */
Brian Foster	9e28a24	2018-07-24 13:43:15 -0700	[diff] [blame]	1365	error = xfs_defer_finish(&tp);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1366	if (error)
Brian Foster	9b1f4e9	2018-08-01 07:20:33 -0700	[diff] [blame]	1367	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1368	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1369
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1370	if (whichfork == XFS_DATA_FORK) {
				1371	/* Remove all pending CoW reservations. */
				1372	error = xfs_reflink_cancel_cow_blocks(ip, &tp,
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1373	first_unmap_block, XFS_MAX_FILEOFF, true);
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1374	if (error)
				1375	goto out;
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1376
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1377	xfs_itruncate_clear_reflink_flags(ip);
				1378	}
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1379
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1380	/*
				1381	* Always re-log the inode so that our permanent transaction can keep
				1382	* on rolling it forward in the log.
				1383	*/
				1384	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1385
				1386	trace_xfs_itruncate_extents_end(ip, new_size);
				1387
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1388	out:
				1389	*tpp = tp;
				1390	return error;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1391	}
				1392
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1393	int
				1394	xfs_release(
				1395	xfs_inode_t *ip)
				1396	{
				1397	xfs_mount_t *mp = ip->i_mount;
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1398	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1399
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1400	if (!S_ISREG(VFS_I(ip)->i_mode) \|\| (VFS_I(ip)->i_mode == 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1401	return 0;
				1402
				1403	/* If this is a read-only mount, don't do this (would generate I/O) */
				1404	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1405	return 0;
				1406
				1407	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1408	int truncated;
				1409
				1410	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1411	* If we previously truncated this file and removed old data
				1412	* in the process, we want to initiate "early" writeout on
				1413	* the last close. This is an attempt to combat the notorious
				1414	* NULL files problem which is particularly noticeable from a
				1415	* truncate down, buffered (re-)write (delalloc), followed by
				1416	* a crash. What we are effectively doing here is
				1417	* significantly reducing the time window where we'd otherwise
				1418	* be exposed to that problem.
				1419	*/
				1420	truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
				1421	if (truncated) {
				1422	xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
Dave Chinner	eac152b	2014-08-04 13:22:49 +1000	[diff] [blame]	1423	if (ip->i_delayed_blks > 0) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1424	error = filemap_flush(VFS_I(ip)->i_mapping);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1425	if (error)
				1426	return error;
				1427	}
				1428	}
				1429	}
				1430
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1431	if (VFS_I(ip)->i_nlink == 0)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1432	return 0;
				1433
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1434	/*
				1435	* If we can't get the iolock just skip truncating the blocks past EOF
				1436	* because we could deadlock with the mmap_lock otherwise. We'll get
				1437	* another chance to drop them once the last reference to the inode is
				1438	* dropped, so we'll never leak blocks permanently.
				1439	*/
				1440	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
				1441	return 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1442
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1443	if (xfs_can_free_eofblocks(ip, false)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1444	/*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1445	* Check if the inode is being opened, written and closed
				1446	* frequently and we have delayed allocation blocks outstanding
				1447	* (e.g. streaming writes from the NFS server), truncating the
				1448	* blocks past EOF will cause fragmentation to occur.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1449	*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1450	* In this case don't do the truncation, but we have to be
				1451	* careful how we detect this case. Blocks beyond EOF show up as
				1452	* i_delayed_blks even when the inode is clean, so we need to
				1453	* truncate them away first before checking for a dirty release.
				1454	* Hence on the first dirty close we will still remove the
				1455	* speculative allocation, but after that we will leave it in
				1456	* place.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1457	*/
				1458	if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1459	goto out_unlock;
				1460
				1461	error = xfs_free_eofblocks(ip);
				1462	if (error)
				1463	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1464
				1465	/* delalloc blocks after truncation means it really is dirty */
				1466	if (ip->i_delayed_blks)
				1467	xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
				1468	}
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1469
				1470	out_unlock:
				1471	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1472	return error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1473	}
				1474
				1475	/*
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1476	* xfs_inactive_truncate
				1477	*
				1478	* Called to perform a truncate when an inode becomes unlinked.
				1479	*/
				1480	STATIC int
				1481	xfs_inactive_truncate(
				1482	struct xfs_inode *ip)
				1483	{
				1484	struct xfs_mount *mp = ip->i_mount;
				1485	struct xfs_trans *tp;
				1486	int error;
				1487
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1488	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1489	if (error) {
				1490	ASSERT(XFS_FORCED_SHUTDOWN(mp));
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1491	return error;
				1492	}
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1493	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1494	xfs_trans_ijoin(tp, ip, 0);
				1495
				1496	/*
				1497	* Log the inode size first to prevent stale data exposure in the event
				1498	* of a system crash before the truncate completes. See the related
Jan Kara	69bca80	2016-05-26 14:46:43 +0200	[diff] [blame]	1499	* comment in xfs_vn_setattr_size() for details.
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1500	*/
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1501	ip->i_disk_size = 0;
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1502	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1503
				1504	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
				1505	if (error)
				1506	goto error_trans_cancel;
				1507
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1508	ASSERT(ip->i_df.if_nextents == 0);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1509
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1510	error = xfs_trans_commit(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1511	if (error)
				1512	goto error_unlock;
				1513
				1514	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1515	return 0;
				1516
				1517	error_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1518	xfs_trans_cancel(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1519	error_unlock:
				1520	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1521	return error;
				1522	}
				1523
				1524	/*
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1525	* xfs_inactive_ifree()
				1526	*
				1527	* Perform the inode free when an inode is unlinked.
				1528	*/
				1529	STATIC int
				1530	xfs_inactive_ifree(
				1531	struct xfs_inode *ip)
				1532	{
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1533	struct xfs_mount *mp = ip->i_mount;
				1534	struct xfs_trans *tp;
				1535	int error;
				1536
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1537	/*
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1538	* We try to use a per-AG reservation for any block needed by the finobt
				1539	* tree, but as the finobt feature predates the per-AG reservation
				1540	* support a degraded file system might not have enough space for the
				1541	* reservation at mount time. In that case try to dip into the reserved
				1542	* pool and pray.
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1543	*
				1544	* Send a warning if the reservation does happen to fail, as the inode
				1545	* now remains allocated and sits on the unlinked list until the fs is
				1546	* repaired.
				1547	*/
Darrick J. Wong	e1f6ca1	2019-02-14 09:33:15 -0800	[diff] [blame]	1548	if (unlikely(mp->m_finobt_nores)) {
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1549	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
				1550	XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
				1551	&tp);
				1552	} else {
				1553	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
				1554	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1555	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1556	if (error == -ENOSPC) {
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1557	xfs_warn_ratelimited(mp,
				1558	"Failed to remove inode(s) from unlinked list. "
				1559	"Please free space, unmount and run xfs_repair.");
				1560	} else {
				1561	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1562	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1563	return error;
				1564	}
				1565
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1566	/*
				1567	* We do not hold the inode locked across the entire rolling transaction
				1568	* here. We only need to hold it for the first transaction that
				1569	* xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
				1570	* underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
				1571	* here breaks the relationship between cluster buffer invalidation and
				1572	* stale inode invalidation on cluster buffer item journal commit
				1573	* completion, and can result in leaving dirty stale inodes hanging
				1574	* around in memory.
				1575	*
				1576	* We have no need for serialising this inode operation against other
				1577	* operations - we freed the inode and hence reallocation is required
				1578	* and that will serialise on reallocating the space the deferops need
				1579	* to free. Hence we can unlock the inode on the first commit of
				1580	* the transaction rather than roll it right through the deferops. This
				1581	* avoids relogging the XFS_ISTALE inode.
				1582	*
				1583	* We check that xfs_ifree() hasn't grown an internal transaction roll
				1584	* by asserting that the inode is still locked when it returns.
				1585	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1586	xfs_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1587	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1588
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	1589	error = xfs_ifree(tp, ip);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1590	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1591	if (error) {
				1592	/*
				1593	* If we fail to free the inode, shut down. The cancel
				1594	* might do that, we need to make sure. Otherwise the
				1595	* inode might be lost for a long time or forever.
				1596	*/
				1597	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1598	xfs_notice(mp, "%s: xfs_ifree returned error %d",
				1599	__func__, error);
				1600	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
				1601	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1602	xfs_trans_cancel(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1603	return error;
				1604	}
				1605
				1606	/*
				1607	* Credit the quota account(s). The inode is gone.
				1608	*/
				1609	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1610
				1611	/*
Brian Foster	d4a97a0	2015-08-19 10:01:40 +1000	[diff] [blame]	1612	* Just ignore errors at this point. There is nothing we can do except
				1613	* to try to keep going. Make sure it's not a silent error.
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1614	*/
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1615	error = xfs_trans_commit(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1616	if (error)
				1617	xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
				1618	__func__, error);
				1619
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1620	return 0;
				1621	}
				1622
				1623	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1624	* xfs_inactive
				1625	*
				1626	* This is called when the vnode reference count for the vnode
				1627	* goes to zero. If the file has been unlinked, then it must
				1628	* now be truncated. Also, we clear all of the read-ahead state
				1629	* kept for the inode here since the file is now closed.
				1630	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1631	void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1632	xfs_inactive(
				1633	xfs_inode_t *ip)
				1634	{
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1635	struct xfs_mount *mp;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1636	int error;
				1637	int truncate = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1638
				1639	/*
				1640	* If the inode is already free, then there can be nothing
				1641	* to clean up here.
				1642	*/
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1643	if (VFS_I(ip)->i_mode == 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1644	ASSERT(ip->i_df.if_broot_bytes == 0);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1645	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1646	}
				1647
				1648	mp = ip->i_mount;
Darrick J. Wong	17c12bc	2016-10-03 09:11:29 -0700	[diff] [blame]	1649	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1650
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1651	/* If this is a read-only mount, don't do this (would generate I/O) */
				1652	if (mp->m_flags & XFS_MOUNT_RDONLY)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1653	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1654
Darrick J. Wong	383e32b	2021-03-22 09:51:54 -0700	[diff] [blame]	1655	/* Metadata inodes require explicit resource cleanup. */
				1656	if (xfs_is_metadata_inode(ip))
				1657	return;
				1658
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1659	/* Try to clean out the cow blocks if there are any. */
Christoph Hellwig	51d6269	2018-07-17 16:51:51 -0700	[diff] [blame]	1660	if (xfs_inode_has_cow_data(ip))
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1661	xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
				1662
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1663	if (VFS_I(ip)->i_nlink != 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1664	/*
				1665	* force is true because we are evicting an inode from the
				1666	* cache. Post-eof blocks must be freed, lest we end up with
				1667	* broken free space accounting.
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1668	*
				1669	* Note: don't bother with iolock here since lockdep complains
				1670	* about acquiring it in reclaim context. We have the only
				1671	* reference to the inode at this point anyways.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1672	*/
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1673	if (xfs_can_free_eofblocks(ip, true))
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1674	xfs_free_eofblocks(ip);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1675
				1676	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1677	}
				1678
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1679	if (S_ISREG(VFS_I(ip)->i_mode) &&
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1680	(ip->i_disk_size != 0 \|\| XFS_ISIZE(ip) != 0 \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1681	ip->i_df.if_nextents > 0 \|\| ip->i_delayed_blks > 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1682	truncate = 1;
				1683
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1684	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1685	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1686	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1687
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1688	if (S_ISLNK(VFS_I(ip)->i_mode))
Brian Foster	36b21dd	2013-09-20 11:06:09 -0400	[diff] [blame]	1689	error = xfs_inactive_symlink(ip);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1690	else if (truncate)
				1691	error = xfs_inactive_truncate(ip);
				1692	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1693	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1694
				1695	/*
				1696	* If there are attributes associated with the file then blow them away
				1697	* now. The code calls a routine that recursively deconstructs the
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1698	* attribute fork. If also blows away the in-core attribute fork.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1699	*/
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1700	if (XFS_IFORK_Q(ip)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1701	error = xfs_attr_inactive(ip);
				1702	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1703	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1704	}
				1705
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1706	ASSERT(!ip->i_afp);
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	1707	ASSERT(ip->i_forkoff == 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1708
				1709	/*
				1710	* Free the inode.
				1711	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1712	error = xfs_inactive_ifree(ip);
				1713	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1714	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1715
				1716	/*
				1717	* Release the dquots held by inode, if any.
				1718	*/
				1719	xfs_qm_dqdetach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1720	}
				1721
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1722	/*
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1723	* In-Core Unlinked List Lookups
				1724	* =============================
				1725	*
				1726	* Every inode is supposed to be reachable from some other piece of metadata
				1727	* with the exception of the root directory. Inodes with a connection to a
				1728	* file descriptor but not linked from anywhere in the on-disk directory tree
				1729	* are collectively known as unlinked inodes, though the filesystem itself
				1730	* maintains links to these inodes so that on-disk metadata are consistent.
				1731	*
				1732	* XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
				1733	* header contains a number of buckets that point to an inode, and each inode
				1734	* record has a pointer to the next inode in the hash chain. This
				1735	* singly-linked list causes scaling problems in the iunlink remove function
				1736	* because we must walk that list to find the inode that points to the inode
				1737	* being removed from the unlinked hash bucket list.
				1738	*
				1739	* What if we modelled the unlinked list as a collection of records capturing
				1740	* "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd
				1741	* have a fast way to look up unlinked list predecessors, which avoids the
				1742	* slow list walk. That's exactly what we do here (in-core) with a per-AG
				1743	* rhashtable.
				1744	*
				1745	* Because this is a backref cache, we ignore operational failures since the
				1746	* iunlink code can fall back to the slow bucket walk. The only errors that
				1747	* should bubble out are for obviously incorrect situations.
				1748	*
				1749	* All users of the backref cache MUST hold the AGI buffer lock to serialize
				1750	* access or have otherwise provided for concurrency control.
				1751	*/
				1752
				1753	/* Capture a "X.next_unlinked = Y" relationship. */
				1754	struct xfs_iunlink {
				1755	struct rhash_head iu_rhash_head;
				1756	xfs_agino_t iu_agino; /* X */
				1757	xfs_agino_t iu_next_unlinked; /* Y */
				1758	};
				1759
				1760	/* Unlinked list predecessor lookup hashtable construction */
				1761	static int
				1762	xfs_iunlink_obj_cmpfn(
				1763	struct rhashtable_compare_arg *arg,
				1764	const void *obj)
				1765	{
				1766	const xfs_agino_t *key = arg->key;
				1767	const struct xfs_iunlink *iu = obj;
				1768
				1769	if (iu->iu_next_unlinked != *key)
				1770	return 1;
				1771	return 0;
				1772	}
				1773
				1774	static const struct rhashtable_params xfs_iunlink_hash_params = {
				1775	.min_size = XFS_AGI_UNLINKED_BUCKETS,
				1776	.key_len = sizeof(xfs_agino_t),
				1777	.key_offset = offsetof(struct xfs_iunlink,
				1778	iu_next_unlinked),
				1779	.head_offset = offsetof(struct xfs_iunlink, iu_rhash_head),
				1780	.automatic_shrinking = true,
				1781	.obj_cmpfn = xfs_iunlink_obj_cmpfn,
				1782	};
				1783
				1784	/*
				1785	* Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such
				1786	* relation is found.
				1787	*/
				1788	static xfs_agino_t
				1789	xfs_iunlink_lookup_backref(
				1790	struct xfs_perag *pag,
				1791	xfs_agino_t agino)
				1792	{
				1793	struct xfs_iunlink *iu;
				1794
				1795	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1796	xfs_iunlink_hash_params);
				1797	return iu ? iu->iu_agino : NULLAGINO;
				1798	}
				1799
				1800	/*
				1801	* Take ownership of an iunlink cache entry and insert it into the hash table.
				1802	* If successful, the entry will be owned by the cache; if not, it is freed.
				1803	* Either way, the caller does not own @iu after this call.
				1804	*/
				1805	static int
				1806	xfs_iunlink_insert_backref(
				1807	struct xfs_perag *pag,
				1808	struct xfs_iunlink *iu)
				1809	{
				1810	int error;
				1811
				1812	error = rhashtable_insert_fast(&pag->pagi_unlinked_hash,
				1813	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1814	/*
				1815	* Fail loudly if there already was an entry because that's a sign of
				1816	* corruption of in-memory data. Also fail loudly if we see an error
				1817	* code we didn't anticipate from the rhashtable code. Currently we
				1818	* only anticipate ENOMEM.
				1819	*/
				1820	if (error) {
				1821	WARN(error != -ENOMEM, "iunlink cache insert error %d", error);
				1822	kmem_free(iu);
				1823	}
				1824	/*
				1825	* Absorb any runtime errors that aren't a result of corruption because
				1826	* this is a cache and we can always fall back to bucket list scanning.
				1827	*/
				1828	if (error != 0 && error != -EEXIST)
				1829	error = 0;
				1830	return error;
				1831	}
				1832
				1833	/* Remember that @prev_agino.next_unlinked = @this_agino. */
				1834	static int
				1835	xfs_iunlink_add_backref(
				1836	struct xfs_perag *pag,
				1837	xfs_agino_t prev_agino,
				1838	xfs_agino_t this_agino)
				1839	{
				1840	struct xfs_iunlink *iu;
				1841
				1842	if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
				1843	return 0;
				1844
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	1845	iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1846	iu->iu_agino = prev_agino;
				1847	iu->iu_next_unlinked = this_agino;
				1848
				1849	return xfs_iunlink_insert_backref(pag, iu);
				1850	}
				1851
				1852	/*
				1853	* Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked.
				1854	* If @next_unlinked is NULLAGINO, we drop the backref and exit. If there
				1855	* wasn't any such entry then we don't bother.
				1856	*/
				1857	static int
				1858	xfs_iunlink_change_backref(
				1859	struct xfs_perag *pag,
				1860	xfs_agino_t agino,
				1861	xfs_agino_t next_unlinked)
				1862	{
				1863	struct xfs_iunlink *iu;
				1864	int error;
				1865
				1866	/* Look up the old entry; if there wasn't one then exit. */
				1867	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1868	xfs_iunlink_hash_params);
				1869	if (!iu)
				1870	return 0;
				1871
				1872	/*
				1873	* Remove the entry. This shouldn't ever return an error, but if we
				1874	* couldn't remove the old entry we don't want to add it again to the
				1875	* hash table, and if the entry disappeared on us then someone's
				1876	* violated the locking rules and we need to fail loudly. Either way
				1877	* we cannot remove the inode because internal state is or would have
				1878	* been corrupt.
				1879	*/
				1880	error = rhashtable_remove_fast(&pag->pagi_unlinked_hash,
				1881	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1882	if (error)
				1883	return error;
				1884
				1885	/* If there is no new next entry just free our item and return. */
				1886	if (next_unlinked == NULLAGINO) {
				1887	kmem_free(iu);
				1888	return 0;
				1889	}
				1890
				1891	/* Update the entry and re-add it to the hash table. */
				1892	iu->iu_next_unlinked = next_unlinked;
				1893	return xfs_iunlink_insert_backref(pag, iu);
				1894	}
				1895
				1896	/* Set up the in-core predecessor structures. */
				1897	int
				1898	xfs_iunlink_init(
				1899	struct xfs_perag *pag)
				1900	{
				1901	return rhashtable_init(&pag->pagi_unlinked_hash,
				1902	&xfs_iunlink_hash_params);
				1903	}
				1904
				1905	/* Free the in-core predecessor structures. */
				1906	static void
				1907	xfs_iunlink_free_item(
				1908	void *ptr,
				1909	void *arg)
				1910	{
				1911	struct xfs_iunlink *iu = ptr;
				1912	bool *freed_anything = arg;
				1913
				1914	*freed_anything = true;
				1915	kmem_free(iu);
				1916	}
				1917
				1918	void
				1919	xfs_iunlink_destroy(
				1920	struct xfs_perag *pag)
				1921	{
				1922	bool freed_anything = false;
				1923
				1924	rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
				1925	xfs_iunlink_free_item, &freed_anything);
				1926
				1927	ASSERT(freed_anything == false \|\| XFS_FORCED_SHUTDOWN(pag->pag_mount));
				1928	}
				1929
				1930	/*
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1931	* Point the AGI unlinked bucket at an inode and log the results. The caller
				1932	* is responsible for validating the old value.
				1933	*/
				1934	STATIC int
				1935	xfs_iunlink_update_bucket(
				1936	struct xfs_trans *tp,
				1937	xfs_agnumber_t agno,
				1938	struct xfs_buf *agibp,
				1939	unsigned int bucket_index,
				1940	xfs_agino_t new_agino)
				1941	{
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	1942	struct xfs_agi *agi = agibp->b_addr;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1943	xfs_agino_t old_value;
				1944	int offset;
				1945
				1946	ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino));
				1947
				1948	old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				1949	trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index,
				1950	old_value, new_agino);
				1951
				1952	/*
				1953	* We should never find the head of the list already set to the value
				1954	* passed in because either we're adding or removing ourselves from the
				1955	* head of the list.
				1956	*/
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	1957	if (old_value == new_agino) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	1958	xfs_buf_mark_corrupt(agibp);
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1959	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	1960	}
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1961
				1962	agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
				1963	offset = offsetof(struct xfs_agi, agi_unlinked) +
				1964	(sizeof(xfs_agino_t) * bucket_index);
				1965	xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
				1966	return 0;
				1967	}
				1968
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	1969	/* Set an on-disk inode's next_unlinked pointer. */
				1970	STATIC void
				1971	xfs_iunlink_update_dinode(
				1972	struct xfs_trans *tp,
				1973	xfs_agnumber_t agno,
				1974	xfs_agino_t agino,
				1975	struct xfs_buf *ibp,
				1976	struct xfs_dinode *dip,
				1977	struct xfs_imap *imap,
				1978	xfs_agino_t next_agino)
				1979	{
				1980	struct xfs_mount *mp = tp->t_mountp;
				1981	int offset;
				1982
				1983	ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
				1984
				1985	trace_xfs_iunlink_update_dinode(mp, agno, agino,
				1986	be32_to_cpu(dip->di_next_unlinked), next_agino);
				1987
				1988	dip->di_next_unlinked = cpu_to_be32(next_agino);
				1989	offset = imap->im_boffset +
				1990	offsetof(struct xfs_dinode, di_next_unlinked);
				1991
				1992	/* need to recalc the inode CRC if appropriate */
				1993	xfs_dinode_calc_crc(mp, dip);
				1994	xfs_trans_inode_buf(tp, ibp);
				1995	xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	1996	}
				1997
				1998	/* Set an in-core inode's unlinked pointer and return the old value. */
				1999	STATIC int
				2000	xfs_iunlink_update_inode(
				2001	struct xfs_trans *tp,
				2002	struct xfs_inode *ip,
				2003	xfs_agnumber_t agno,
				2004	xfs_agino_t next_agino,
				2005	xfs_agino_t *old_next_agino)
				2006	{
				2007	struct xfs_mount *mp = tp->t_mountp;
				2008	struct xfs_dinode *dip;
				2009	struct xfs_buf *ibp;
				2010	xfs_agino_t old_value;
				2011	int error;
				2012
				2013	ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
				2014
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2015	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2016	if (error)
				2017	return error;
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2018	dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2019
				2020	/* Make sure the old pointer isn't garbage. */
				2021	old_value = be32_to_cpu(dip->di_next_unlinked);
				2022	if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2023	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
				2024	sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2025	error = -EFSCORRUPTED;
				2026	goto out;
				2027	}
				2028
				2029	/*
				2030	* Since we're updating a linked list, we should never find that the
				2031	* current pointer is the same as the new value, unless we're
				2032	* terminating the list.
				2033	*/
				2034	*old_next_agino = old_value;
				2035	if (old_value == next_agino) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2036	if (next_agino != NULLAGINO) {
				2037	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
				2038	dip, sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2039	error = -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2040	}
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2041	goto out;
				2042	}
				2043
				2044	/* Ok, update the new pointer. */
				2045	xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino),
				2046	ibp, dip, &ip->i_imap, next_agino);
				2047	return 0;
				2048	out:
				2049	xfs_trans_brelse(tp, ibp);
				2050	return error;
				2051	}
				2052
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2053	/*
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2054	* This is called when the inode's link count has gone to 0 or we are creating
				2055	* a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2056	*
				2057	* We place the on-disk inode on a list in the AGI. It will be pulled from this
				2058	* list when the inode is freed.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2059	*/
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2060	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2061	xfs_iunlink(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2062	struct xfs_trans *tp,
				2063	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2064	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2065	struct xfs_mount *mp = tp->t_mountp;
				2066	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2067	struct xfs_buf *agibp;
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2068	xfs_agino_t next_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2069	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
				2070	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2071	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2072	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2073
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2074	ASSERT(VFS_I(ip)->i_nlink == 0);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2075	ASSERT(VFS_I(ip)->i_mode != 0);
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2076	trace_xfs_iunlink(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2077
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2078	/* Get the agi buffer first. It ensures lock ordering on the list. */
				2079	error = xfs_read_agi(mp, tp, agno, &agibp);
Vlad Apostolov	859d718	2007-10-11 17:44:18 +1000	[diff] [blame]	2080	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2081	return error;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2082	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2083
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2084	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2085	* Get the index into the agi hash table for the list this inode will
				2086	* go on. Make sure the pointer isn't garbage and that this inode
				2087	* isn't already on the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2088	*/
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2089	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				2090	if (next_agino == agino \|\|
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2091	!xfs_verify_agino_or_null(mp, agno, next_agino)) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	2092	xfs_buf_mark_corrupt(agibp);
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2093	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2094	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2095
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2096	if (next_agino != NULLAGINO) {
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2097	xfs_agino_t old_agino;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2098
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2099	/*
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2100	* There is already another inode in the bucket, so point this
				2101	* inode to the current head of the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2102	*/
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2103	error = xfs_iunlink_update_inode(tp, ip, agno, next_agino,
				2104	&old_agino);
Vlad Apostolov	c319b58	2007-11-23 16:27:51 +1100	[diff] [blame]	2105	if (error)
				2106	return error;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2107	ASSERT(old_agino == NULLAGINO);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2108
				2109	/*
				2110	* agino has been unlinked, add a backref from the next inode
				2111	* back to agino.
				2112	*/
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2113	error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2114	if (error)
				2115	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2116	}
				2117
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2118	/* Point the head of the list to point to this inode. */
				2119	return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2120	}
				2121
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2122	/* Return the imap, dinode pointer, and buffer for an inode. */
				2123	STATIC int
				2124	xfs_iunlink_map_ino(
				2125	struct xfs_trans *tp,
				2126	xfs_agnumber_t agno,
				2127	xfs_agino_t agino,
				2128	struct xfs_imap *imap,
				2129	struct xfs_dinode **dipp,
				2130	struct xfs_buf **bpp)
				2131	{
				2132	struct xfs_mount *mp = tp->t_mountp;
				2133	int error;
				2134
				2135	imap->im_blkno = 0;
				2136	error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0);
				2137	if (error) {
				2138	xfs_warn(mp, "%s: xfs_imap returned error %d.",
				2139	__func__, error);
				2140	return error;
				2141	}
				2142
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2143	error = xfs_imap_to_bp(mp, tp, imap, bpp);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2144	if (error) {
				2145	xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
				2146	__func__, error);
				2147	return error;
				2148	}
				2149
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2150	dipp = xfs_buf_offset(bpp, imap->im_boffset);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2151	return 0;
				2152	}
				2153
				2154	/*
				2155	* Walk the unlinked chain from @head_agino until we find the inode that
				2156	* points to @target_agino. Return the inode number, map, dinode pointer,
				2157	* and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp.
				2158	*
				2159	* @tp, @pag, @head_agino, and @target_agino are input parameters.
				2160	* @agino, @imap, @dipp, and @bpp are all output parameters.
				2161	*
				2162	* Do not call this function if @target_agino is the head of the list.
				2163	*/
				2164	STATIC int
				2165	xfs_iunlink_map_prev(
				2166	struct xfs_trans *tp,
				2167	xfs_agnumber_t agno,
				2168	xfs_agino_t head_agino,
				2169	xfs_agino_t target_agino,
				2170	xfs_agino_t *agino,
				2171	struct xfs_imap *imap,
				2172	struct xfs_dinode **dipp,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2173	struct xfs_buf **bpp,
				2174	struct xfs_perag *pag)
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2175	{
				2176	struct xfs_mount *mp = tp->t_mountp;
				2177	xfs_agino_t next_agino;
				2178	int error;
				2179
				2180	ASSERT(head_agino != target_agino);
				2181	*bpp = NULL;
				2182
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2183	/* See if our backref cache can find it faster. */
				2184	*agino = xfs_iunlink_lookup_backref(pag, target_agino);
				2185	if (*agino != NULLAGINO) {
				2186	error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp);
				2187	if (error)
				2188	return error;
				2189
				2190	if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino)
				2191	return 0;
				2192
				2193	/*
				2194	* If we get here the cache contents were corrupt, so drop the
				2195	* buffer and fall back to walking the bucket list.
				2196	*/
				2197	xfs_trans_brelse(tp, *bpp);
				2198	*bpp = NULL;
				2199	WARN_ON_ONCE(1);
				2200	}
				2201
				2202	trace_xfs_iunlink_map_prev_fallback(mp, agno);
				2203
				2204	/* Otherwise, walk the entire bucket until we find it. */
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2205	next_agino = head_agino;
				2206	while (next_agino != target_agino) {
				2207	xfs_agino_t unlinked_agino;
				2208
				2209	if (*bpp)
				2210	xfs_trans_brelse(tp, *bpp);
				2211
				2212	*agino = next_agino;
				2213	error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp,
				2214	bpp);
				2215	if (error)
				2216	return error;
				2217
				2218	unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked);
				2219	/*
				2220	* Make sure this pointer is valid and isn't an obvious
				2221	* infinite loop.
				2222	*/
				2223	if (!xfs_verify_agino(mp, agno, unlinked_agino) \|\|
				2224	next_agino == unlinked_agino) {
				2225	XFS_CORRUPTION_ERROR(__func__,
				2226	XFS_ERRLEVEL_LOW, mp,
				2227	dipp, sizeof(*dipp));
				2228	error = -EFSCORRUPTED;
				2229	return error;
				2230	}
				2231	next_agino = unlinked_agino;
				2232	}
				2233
				2234	return 0;
				2235	}
				2236
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2237	/*
				2238	* Pull the on-disk inode from the AGI unlinked list.
				2239	*/
				2240	STATIC int
				2241	xfs_iunlink_remove(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2242	struct xfs_trans *tp,
				2243	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2244	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2245	struct xfs_mount *mp = tp->t_mountp;
				2246	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2247	struct xfs_buf *agibp;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2248	struct xfs_buf *last_ibp;
				2249	struct xfs_dinode *last_dip = NULL;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2250	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
				2251	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2252	xfs_agino_t next_agino;
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2253	xfs_agino_t head_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2254	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2255	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2256
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2257	trace_xfs_iunlink_remove(ip);
				2258
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2259	/* Get the agi buffer first. It ensures lock ordering on the list. */
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2260	error = xfs_read_agi(mp, tp, agno, &agibp);
				2261	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2262	return error;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2263	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2264
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2265	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2266	* Get the index into the agi hash table for the list this inode will
				2267	* go on. Make sure the head pointer isn't garbage.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2268	*/
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2269	head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				2270	if (!xfs_verify_agino(mp, agno, head_agino)) {
Darrick J. Wong	d2e7366	2018-06-04 11:27:51 -0700	[diff] [blame]	2271	XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
				2272	agi, sizeof(*agi));
				2273	return -EFSCORRUPTED;
				2274	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2275
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2276	/*
				2277	* Set our inode's next_unlinked pointer to NULL and then return
				2278	* the old pointer value so that we can update whatever was previous
				2279	* to us in the list to point to whatever was next in the list.
				2280	*/
				2281	error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino);
				2282	if (error)
				2283	return error;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2284
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2285	/*
				2286	* If there was a backref pointing from the next inode back to this
				2287	* one, remove it because we've removed this inode from the list.
				2288	*
				2289	* Later, if this inode was in the middle of the list we'll update
				2290	* this inode's backref to point from the next inode.
				2291	*/
				2292	if (next_agino != NULLAGINO) {
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2293	error = xfs_iunlink_change_backref(agibp->b_pag, next_agino,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2294	NULLAGINO);
				2295	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2296	return error;
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2297	}
				2298
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2299	if (head_agino != agino) {
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2300	struct xfs_imap imap;
				2301	xfs_agino_t prev_agino;
				2302
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2303	/* We need to search the list for the inode being freed. */
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2304	error = xfs_iunlink_map_prev(tp, agno, head_agino, agino,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2305	&prev_agino, &imap, &last_dip, &last_ibp,
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2306	agibp->b_pag);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2307	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2308	return error;
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2309
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2310	/* Point the previous inode on the list to the next inode. */
				2311	xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp,
				2312	last_dip, &imap, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2313
				2314	/*
				2315	* Now we deal with the backref for this inode. If this inode
				2316	* pointed at a real inode, change the backref that pointed to
				2317	* us to point to our old next. If this inode was the end of
				2318	* the list, delete the backref that pointed to us. Note that
				2319	* change_backref takes care of deleting the backref if
				2320	* next_agino is NULLAGINO.
				2321	*/
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2322	return xfs_iunlink_change_backref(agibp->b_pag, agino,
				2323	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2324	}
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2325
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2326	/* Point the head of the list to the next unlinked inode. */
				2327	return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index,
				2328	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2329	}
				2330
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2331	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2332	* Look up the inode number specified and if it is not already marked XFS_ISTALE
				2333	* mark it stale. We should only find clean inodes in this lookup that aren't
				2334	* already stale.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2335	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2336	static void
				2337	xfs_ifree_mark_inode_stale(
				2338	struct xfs_buf *bp,
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2339	struct xfs_inode *free_ip,
Brian Foster	d9fdd0a	2020-04-02 08:18:57 -0700	[diff] [blame]	2340	xfs_ino_t inum)
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2341	{
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2342	struct xfs_mount *mp = bp->b_mount;
				2343	struct xfs_perag *pag = bp->b_pag;
				2344	struct xfs_inode_log_item *iip;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2345	struct xfs_inode *ip;
				2346
				2347	retry:
				2348	rcu_read_lock();
				2349	ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
				2350
				2351	/* Inode not in memory, nothing to do */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2352	if (!ip) {
				2353	rcu_read_unlock();
				2354	return;
				2355	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2356
				2357	/*
				2358	* because this is an RCU protected lookup, we could find a recently
				2359	* freed or even reallocated inode during the lookup. We need to check
				2360	* under the i_flags_lock for a valid inode here. Skip it if it is not
				2361	* valid, the wrong inode or stale.
				2362	*/
				2363	spin_lock(&ip->i_flags_lock);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2364	if (ip->i_ino != inum \|\| __xfs_iflags_test(ip, XFS_ISTALE))
				2365	goto out_iflags_unlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2366
				2367	/*
				2368	* Don't try to lock/unlock the current inode, but we _cannot_ skip the
				2369	* other inodes that we did not find in the list attached to the buffer
				2370	* and are not already marked stale. If we can't lock it, back off and
				2371	* retry.
				2372	*/
				2373	if (ip != free_ip) {
				2374	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2375	spin_unlock(&ip->i_flags_lock);
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2376	rcu_read_unlock();
				2377	delay(1);
				2378	goto retry;
				2379	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2380	}
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2381	ip->i_flags \|= XFS_ISTALE;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2382
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2383	/*
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2384	* If the inode is flushing, it is already attached to the buffer. All
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2385	* we needed to do here is mark the inode stale so buffer IO completion
				2386	* will remove it from the AIL.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2387	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2388	iip = ip->i_itemp;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2389	if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2390	ASSERT(!list_empty(&iip->ili_item.li_bio_list));
				2391	ASSERT(iip->ili_last_fields);
				2392	goto out_iunlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2393	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2394
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2395	/*
Dave Chinner	48d55e2	2020-06-29 14:49:18 -0700	[diff] [blame]	2396	* Inodes not attached to the buffer can be released immediately.
				2397	* Everything else has to go through xfs_iflush_abort() on journal
				2398	* commit as the flock synchronises removal of the inode from the
				2399	* cluster buffer against inode reclaim.
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2400	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2401	if (!iip \|\| list_empty(&iip->ili_item.li_bio_list))
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2402	goto out_iunlock;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2403
				2404	__xfs_iflags_set(ip, XFS_IFLUSHING);
				2405	spin_unlock(&ip->i_flags_lock);
				2406	rcu_read_unlock();
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2407
				2408	/* we have a dirty inode in memory that has not yet been flushed. */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2409	spin_lock(&iip->ili_lock);
				2410	iip->ili_last_fields = iip->ili_fields;
				2411	iip->ili_fields = 0;
				2412	iip->ili_fsync_fields = 0;
				2413	spin_unlock(&iip->ili_lock);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2414	ASSERT(iip->ili_last_fields);
				2415
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2416	if (ip != free_ip)
				2417	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				2418	return;
				2419
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2420	out_iunlock:
				2421	if (ip != free_ip)
				2422	xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2423	out_iflags_unlock:
				2424	spin_unlock(&ip->i_flags_lock);
				2425	rcu_read_unlock();
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2426	}
				2427
				2428	/*
Zhi Yong Wu	0b8182d	2013-08-12 03:14:59 +0000	[diff] [blame]	2429	* A big issue when freeing the inode cluster is that we _cannot_ skip any
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2430	* inodes that are in memory - they all must be marked stale and attached to
				2431	* the cluster buffer.
				2432	*/
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2433	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2434	xfs_ifree_cluster(
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2435	struct xfs_inode *free_ip,
				2436	struct xfs_trans *tp,
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2437	struct xfs_icluster *xic)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2438	{
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2439	struct xfs_mount *mp = free_ip->i_mount;
				2440	struct xfs_ino_geometry *igeo = M_IGEO(mp);
				2441	struct xfs_buf *bp;
				2442	xfs_daddr_t blkno;
				2443	xfs_ino_t inum = xic->first_ino;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2444	int nbufs;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2445	int i, j;
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2446	int ioffset;
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2447	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2448
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2449	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2450
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2451	for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2452	/*
				2453	* The allocation bitmap tells us which inodes of the chunk were
				2454	* physically allocated. Skip the cluster if an inode falls into
				2455	* a sparse region.
				2456	*/
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2457	ioffset = inum - xic->first_ino;
				2458	if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2459	ASSERT(ioffset % igeo->inodes_per_cluster == 0);
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2460	continue;
				2461	}
				2462
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2463	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
				2464	XFS_INO_TO_AGBNO(mp, inum));
				2465
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2466	/*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2467	* We obtain and lock the backing buffer first in the process
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2468	* here to ensure dirty inodes attached to the buffer remain in
				2469	* the flushing state while we mark them stale.
				2470	*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2471	* If we scan the in-memory inodes first, then buffer IO can
				2472	* complete before we get a lock on it, and hence we may fail
				2473	* to mark all the active inodes on the buffer stale.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2474	*/
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2475	error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
				2476	mp->m_bsize * igeo->blocks_per_cluster,
				2477	XBF_UNMAPPED, &bp);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2478	if (error)
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2479	return error;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2480
				2481	/*
				2482	* This buffer may not have been correctly initialised as we
				2483	* didn't read it from disk. That's not important because we are
				2484	* only using to mark the buffer as stale in the log, and to
				2485	* attach stale cached inodes on it. That means it will never be
				2486	* dispatched for IO. If it is, we want to know about it, and we
				2487	* want it to fail. We can acheive this by adding a write
				2488	* verifier to the buffer.
				2489	*/
Colin Ian King	8c4ce79	2018-12-12 08:46:20 -0800	[diff] [blame]	2490	bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2491
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2492	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2493	* Now we need to set all the cached clean inodes as XFS_ISTALE,
				2494	* too. This requires lookups, and will skip inodes that we've
				2495	* already marked XFS_ISTALE.
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2496	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2497	for (i = 0; i < igeo->inodes_per_cluster; i++)
				2498	xfs_ifree_mark_inode_stale(bp, free_ip, inum + i);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2499
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2500	xfs_trans_stale_inode_buf(tp, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2501	xfs_trans_binval(tp, bp);
				2502	}
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2503	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2504	}
				2505
				2506	/*
				2507	* This is called to return an inode to the inode free list.
				2508	* The inode should already be truncated to 0 length and have
				2509	* no pages associated with it. This routine also assumes that
				2510	* the inode is already a part of the transaction.
				2511	*
				2512	* The on-disk copy of the inode will have been added to the list
				2513	* of unlinked inodes in the AGI. We need to remove the inode from
				2514	* that list atomically with respect to freeing it here.
				2515	*/
				2516	int
				2517	xfs_ifree(
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	2518	struct xfs_trans *tp,
				2519	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2520	{
				2521	int error;
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2522	struct xfs_icluster xic = { 0 };
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2523	struct xfs_inode_log_item *iip = ip->i_itemp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2524
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2525	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2526	ASSERT(VFS_I(ip)->i_nlink == 0);
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	2527	ASSERT(ip->i_df.if_nextents == 0);
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	2528	ASSERT(ip->i_disk_size == 0 \|\| !S_ISREG(VFS_I(ip)->i_mode));
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	2529	ASSERT(ip->i_nblocks == 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2530
				2531	/*
				2532	* Pull the on-disk inode from the AGI unlinked list.
				2533	*/
				2534	error = xfs_iunlink_remove(tp, ip);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2535	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2536	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2537
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	2538	error = xfs_difree(tp, ip->i_ino, &xic);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2539	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2540	return error;
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2541
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2542	/*
				2543	* Free any local-format data sitting around before we reset the
				2544	* data fork to extents format. Note that the attr fork data has
				2545	* already been freed by xfs_attr_inactive.
				2546	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2547	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2548	kmem_free(ip->i_df.if_u1.if_data);
				2549	ip->i_df.if_u1.if_data = NULL;
				2550	ip->i_df.if_bytes = 0;
				2551	}
Darrick J. Wong	98c4f78	2017-11-22 12:21:07 -0800	[diff] [blame]	2552
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2553	VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	2554	ip->i_diflags = 0;
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	2555	ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2;
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	2556	ip->i_forkoff = 0; /* mark the attr fork not in use */
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2557	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	9b3beb0	2021-03-29 11:11:38 -0700	[diff] [blame]	2558	if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
				2559	xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2560
				2561	/* Don't attempt to replay owner changes for a deleted inode */
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2562	spin_lock(&iip->ili_lock);
				2563	iip->ili_fields &= ~(XFS_ILOG_AOWNER \| XFS_ILOG_DOWNER);
				2564	spin_unlock(&iip->ili_lock);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2565
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2566	/*
				2567	* Bump the generation count so no one will be confused
				2568	* by reincarnations of this inode.
				2569	*/
Dave Chinner	9e9a267	2016-02-09 16:54:58 +1100	[diff] [blame]	2570	VFS_I(ip)->i_generation++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2571	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				2572
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2573	if (xic.deleted)
				2574	error = xfs_ifree_cluster(ip, tp, &xic);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2575
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2576	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2577	}
				2578
				2579	/*
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2580	* This is called to unpin an inode. The caller must have the inode locked
				2581	* in at least shared mode so that the buffer cannot be subsequently pinned
				2582	* once someone is waiting for it to be unpinned.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2583	*/
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2584	static void
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2585	xfs_iunpin(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2586	struct xfs_inode *ip)
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2587	{
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2588	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2589
Dave Chinner	4aaf15d	2010-03-08 11:24:07 +1100	[diff] [blame]	2590	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
				2591
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2592	/* Give the log a push to start the unpinning I/O */
Christoph Hellwig	656de4f	2018-03-13 23:15:28 -0700	[diff] [blame]	2593	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	2594
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2595	}
				2596
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2597	static void
				2598	__xfs_iunpin_wait(
				2599	struct xfs_inode *ip)
				2600	{
				2601	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
				2602	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
				2603
				2604	xfs_iunpin(ip);
				2605
				2606	do {
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2607	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2608	if (xfs_ipincount(ip))
				2609	io_schedule();
				2610	} while (xfs_ipincount(ip));
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2611	finish_wait(wq, &wait.wq_entry);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2612	}
				2613
Dave Chinner	777df5a	2010-02-06 12:37:26 +1100	[diff] [blame]	2614	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2615	xfs_iunpin_wait(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2616	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2617	{
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2618	if (xfs_ipincount(ip))
				2619	__xfs_iunpin_wait(ip);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2620	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2621
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2622	/*
				2623	* Removing an inode from the namespace involves removing the directory entry
				2624	* and dropping the link count on the inode. Removing the directory entry can
				2625	* result in locking an AGF (directory blocks were freed) and removing a link
				2626	* count can result in placing the inode on an unlinked list which results in
				2627	* locking an AGI.
				2628	*
				2629	* The big problem here is that we have an ordering constraint on AGF and AGI
				2630	* locking - inode allocation locks the AGI, then can allocate a new extent for
				2631	* new inodes, locking the AGF after the AGI. Similarly, freeing the inode
				2632	* removes the inode from the unlinked list, requiring that we lock the AGI
				2633	* first, and then freeing the inode can result in an inode chunk being freed
				2634	* and hence freeing disk space requiring that we lock an AGF.
				2635	*
				2636	* Hence the ordering that is imposed by other parts of the code is AGI before
				2637	* AGF. This means we cannot remove the directory entry before we drop the inode
				2638	* reference count and put it on the unlinked list as this results in a lock
				2639	* order of AGF then AGI, and this can deadlock against inode allocation and
				2640	* freeing. Therefore we must drop the link counts before we remove the
				2641	* directory entry.
				2642	*
				2643	* This is still safe from a transactional point of view - it is not until we
Darrick J. Wong	310a75a	2016-08-03 11:18:10 +1000	[diff] [blame]	2644	* get to xfs_defer_finish() that we have the possibility of multiple
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2645	* transactions in this operation. Hence as long as we remove the directory
				2646	* entry and drop the link count in the first transaction of the remove
				2647	* operation, there are no transactional constraints on the ordering here.
				2648	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2649	int
				2650	xfs_remove(
				2651	xfs_inode_t *dp,
				2652	struct xfs_name *name,
				2653	xfs_inode_t *ip)
				2654	{
				2655	xfs_mount_t *mp = dp->i_mount;
				2656	xfs_trans_t *tp = NULL;
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2657	int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2658	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2659	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2660
				2661	trace_xfs_remove(dp, name);
				2662
				2663	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2664	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2665
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2666	error = xfs_qm_dqattach(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2667	if (error)
				2668	goto std_return;
				2669
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2670	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2671	if (error)
				2672	goto std_return;
				2673
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2674	/*
				2675	* We try to get the real space reservation first,
				2676	* allowing for directory btree deletion(s) implying
				2677	* possible bmap insert(s). If we can't get the space
				2678	* reservation then we use 0 instead, and avoid the bmap
				2679	* btree insert(s) in the directory code by, if the bmap
				2680	* insert tries to happen, instead trimming the LAST
				2681	* block from the directory.
				2682	*/
				2683	resblks = XFS_REMOVE_SPACE_RES(mp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2684	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2685	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2686	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2687	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
				2688	&tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2689	}
				2690	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2691	ASSERT(error != -ENOSPC);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2692	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2693	}
				2694
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	2695	xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2696
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	2697	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2698	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2699
				2700	/*
				2701	* If we're removing a directory perform some additional validation.
				2702	*/
				2703	if (is_dir) {
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2704	ASSERT(VFS_I(ip)->i_nlink >= 2);
				2705	if (VFS_I(ip)->i_nlink != 2) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2706	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2707	goto out_trans_cancel;
				2708	}
				2709	if (!xfs_dir_isempty(ip)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2710	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2711	goto out_trans_cancel;
				2712	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2713
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2714	/* Drop the link from ip's "..". */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2715	error = xfs_droplink(tp, dp);
				2716	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2717	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2718
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2719	/* Drop the "." link from ip to self. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2720	error = xfs_droplink(tp, ip);
				2721	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2722	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2723	} else {
				2724	/*
				2725	* When removing a non-directory we need to log the parent
				2726	* inode here. For a directory this is done implicitly
				2727	* by the xfs_droplink call for the ".." entry.
				2728	*/
				2729	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2730	}
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2731	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2732
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2733	/* Drop the link from dp to ip. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2734	error = xfs_droplink(tp, ip);
				2735	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2736	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2737
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2738	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2739	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2740	ASSERT(error != -ENOENT);
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	2741	goto out_trans_cancel;
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2742	}
				2743
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2744	/*
				2745	* If this is a synchronous mount, make sure that the
				2746	* remove transaction goes to disk before returning to
				2747	* the user.
				2748	*/
				2749	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2750	xfs_trans_set_sync(tp);
				2751
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2752	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2753	if (error)
				2754	goto std_return;
				2755
Christoph Hellwig	2cd2ef6	2014-04-23 07:11:51 +1000	[diff] [blame]	2756	if (is_dir && xfs_inode_is_filestream(ip))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2757	xfs_filestream_deassociate(ip);
				2758
				2759	return 0;
				2760
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2761	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2762	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2763	std_return:
				2764	return error;
				2765	}
				2766
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2767	/*
				2768	* Enter all inodes for a rename transaction into a sorted array.
				2769	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2770	#define __XFS_SORT_INODES 5
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2771	STATIC void
				2772	xfs_sort_for_rename(
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2773	struct xfs_inode dp1, / in: old (source) directory inode */
				2774	struct xfs_inode dp2, / in: new (target) directory inode */
				2775	struct xfs_inode ip1, / in: inode of old entry */
				2776	struct xfs_inode ip2, / in: inode of new entry */
				2777	struct xfs_inode wip, / in: whiteout inode */
				2778	struct xfs_inode *i_tab,/ out: sorted array of inodes */
				2779	int num_inodes) / in/out: inodes in array */
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2780	{
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2781	int i, j;
				2782
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2783	ASSERT(*num_inodes == __XFS_SORT_INODES);
				2784	memset(i_tab, 0, num_inodes sizeof(struct xfs_inode *));
				2785
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2786	/*
				2787	* i_tab contains a list of pointers to inodes. We initialize
				2788	* the table here & we'll sort it. We will then use it to
				2789	* order the acquisition of the inode locks.
				2790	*
				2791	* Note that the table may contain duplicates. e.g., dp1 == dp2.
				2792	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2793	i = 0;
				2794	i_tab[i++] = dp1;
				2795	i_tab[i++] = dp2;
				2796	i_tab[i++] = ip1;
				2797	if (ip2)
				2798	i_tab[i++] = ip2;
				2799	if (wip)
				2800	i_tab[i++] = wip;
				2801	*num_inodes = i;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2802
				2803	/*
				2804	* Sort the elements via bubble sort. (Remember, there are at
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2805	* most 5 elements to sort, so this is adequate.)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2806	*/
				2807	for (i = 0; i < *num_inodes; i++) {
				2808	for (j = 1; j < *num_inodes; j++) {
				2809	if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2810	struct xfs_inode *temp = i_tab[j];
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2811	i_tab[j] = i_tab[j-1];
				2812	i_tab[j-1] = temp;
				2813	}
				2814	}
				2815	}
				2816	}
				2817
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2818	static int
				2819	xfs_finish_rename(
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	2820	struct xfs_trans *tp)
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2821	{
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2822	/*
				2823	* If this is a synchronous mount, make sure that the rename transaction
				2824	* goes to disk before returning to the user.
				2825	*/
				2826	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2827	xfs_trans_set_sync(tp);
				2828
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2829	return xfs_trans_commit(tp);
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2830	}
				2831
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2832	/*
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2833	* xfs_cross_rename()
				2834	*
Bhaskar Chowdhury	0145225	2021-03-23 16:59:30 -0700	[diff] [blame]	2835	* responsible for handling RENAME_EXCHANGE flag in renameat2() syscall
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2836	*/
				2837	STATIC int
				2838	xfs_cross_rename(
				2839	struct xfs_trans *tp,
				2840	struct xfs_inode *dp1,
				2841	struct xfs_name *name1,
				2842	struct xfs_inode *ip1,
				2843	struct xfs_inode *dp2,
				2844	struct xfs_name *name2,
				2845	struct xfs_inode *ip2,
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2846	int spaceres)
				2847	{
				2848	int error = 0;
				2849	int ip1_flags = 0;
				2850	int ip2_flags = 0;
				2851	int dp2_flags = 0;
				2852
				2853	/* Swap inode number for dirent in first parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2854	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2855	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2856	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2857
				2858	/* Swap inode number for dirent in second parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2859	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2860	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2861	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2862
				2863	/*
				2864	* If we're renaming one or more directories across different parents,
				2865	* update the respective ".." entries (and link counts) to match the new
				2866	* parents.
				2867	*/
				2868	if (dp1 != dp2) {
				2869	dp2_flags = XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2870
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2871	if (S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2872	error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2873	dp1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2874	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2875	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2876
				2877	/* transfer ip2 ".." reference to dp1 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2878	if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2879	error = xfs_droplink(tp, dp2);
				2880	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2881	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2882	xfs_bumplink(tp, dp1);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2883	}
				2884
				2885	/*
				2886	* Although ip1 isn't changed here, userspace needs
				2887	* to be warned about the change, so that applications
				2888	* relying on it (like backup ones), will properly
				2889	* notify the change
				2890	*/
				2891	ip1_flags \|= XFS_ICHGTIME_CHG;
				2892	ip2_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2893	}
				2894
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2895	if (S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2896	error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2897	dp2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2898	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2899	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2900
				2901	/* transfer ip1 ".." reference to dp2 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2902	if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2903	error = xfs_droplink(tp, dp1);
				2904	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2905	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2906	xfs_bumplink(tp, dp2);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2907	}
				2908
				2909	/*
				2910	* Although ip2 isn't changed here, userspace needs
				2911	* to be warned about the change, so that applications
				2912	* relying on it (like backup ones), will properly
				2913	* notify the change
				2914	*/
				2915	ip1_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2916	ip2_flags \|= XFS_ICHGTIME_CHG;
				2917	}
				2918	}
				2919
				2920	if (ip1_flags) {
				2921	xfs_trans_ichgtime(tp, ip1, ip1_flags);
				2922	xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
				2923	}
				2924	if (ip2_flags) {
				2925	xfs_trans_ichgtime(tp, ip2, ip2_flags);
				2926	xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
				2927	}
				2928	if (dp2_flags) {
				2929	xfs_trans_ichgtime(tp, dp2, dp2_flags);
				2930	xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
				2931	}
				2932	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2933	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	2934	return xfs_finish_rename(tp);
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2935
				2936	out_trans_abort:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2937	xfs_trans_cancel(tp);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2938	return error;
				2939	}
				2940
				2941	/*
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2942	* xfs_rename_alloc_whiteout()
				2943	*
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	2944	* Return a referenced, unlinked, unlocked inode that can be used as a
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2945	* whiteout in a rename transaction. We use a tmpfile inode here so that if we
				2946	* crash between allocating the inode and linking it into the rename transaction
				2947	* recovery will free the inode and we won't leak it.
				2948	*/
				2949	static int
				2950	xfs_rename_alloc_whiteout(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	2951	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2952	struct xfs_inode *dp,
				2953	struct xfs_inode **wip)
				2954	{
				2955	struct xfs_inode *tmpfile;
				2956	int error;
				2957
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	2958	error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR \| WHITEOUT_MODE,
				2959	&tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2960	if (error)
				2961	return error;
				2962
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	2963	/*
				2964	* Prepare the tmpfile inode as if it were created through the VFS.
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2965	* Complete the inode setup and flag it as linkable. nlink is already
				2966	* zero, so we can skip the drop_nlink.
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	2967	*/
Christoph Hellwig	2b3d1d4	2016-04-06 07:48:27 +1000	[diff] [blame]	2968	xfs_setup_iops(tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2969	xfs_finish_inode_setup(tmpfile);
				2970	VFS_I(tmpfile)->i_state \|= I_LINKABLE;
				2971
				2972	*wip = tmpfile;
				2973	return 0;
				2974	}
				2975
				2976	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2977	* xfs_rename
				2978	*/
				2979	int
				2980	xfs_rename(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	2981	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2982	struct xfs_inode *src_dp,
				2983	struct xfs_name *src_name,
				2984	struct xfs_inode *src_ip,
				2985	struct xfs_inode *target_dp,
				2986	struct xfs_name *target_name,
				2987	struct xfs_inode *target_ip,
				2988	unsigned int flags)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2989	{
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2990	struct xfs_mount *mp = src_dp->i_mount;
				2991	struct xfs_trans *tp;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2992	struct xfs_inode wip = NULL; / whiteout inode */
				2993	struct xfs_inode *inodes[__XFS_SORT_INODES];
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	2994	int i;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2995	int num_inodes = __XFS_SORT_INODES;
Dave Chinner	2b93681	2015-03-25 15:12:30 +1100	[diff] [blame]	2996	bool new_parent = (src_dp != target_dp);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2997	bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2998	int spaceres;
				2999	int error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3000
				3001	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
				3002
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3003	if ((flags & RENAME_EXCHANGE) && !target_ip)
				3004	return -EINVAL;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3005
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3006	/*
				3007	* If we are doing a whiteout operation, allocate the whiteout inode
				3008	* we will be placing at the target and ensure the type is set
				3009	* appropriately.
				3010	*/
				3011	if (flags & RENAME_WHITEOUT) {
				3012	ASSERT(!(flags & (RENAME_NOREPLACE \| RENAME_EXCHANGE)));
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3013	error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3014	if (error)
				3015	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3016
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3017	/* setup target dirent info as whiteout */
				3018	src_name->type = XFS_DIR3_FT_CHRDEV;
				3019	}
				3020
				3021	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3022	inodes, &num_inodes);
				3023
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3024	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3025	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3026	if (error == -ENOSPC) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3027	spaceres = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3028	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
				3029	&tp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3030	}
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3031	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3032	goto out_release_wip;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3033
				3034	/*
				3035	* Attach the dquots to the inodes
				3036	*/
				3037	error = xfs_qm_vop_rename_dqattach(inodes);
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3038	if (error)
				3039	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3040
				3041	/*
				3042	* Lock all the participating inodes. Depending upon whether
				3043	* the target_name exists in the target directory, and
				3044	* whether the target directory is the same as the source
				3045	* directory, we can lock from 2 to 4 inodes.
				3046	*/
				3047	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
				3048
				3049	/*
				3050	* Join all the inodes to the transaction. From this point on,
				3051	* we can rely on either trans_commit or trans_cancel to unlock
				3052	* them.
				3053	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3054	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3055	if (new_parent)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3056	xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3057	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
				3058	if (target_ip)
				3059	xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3060	if (wip)
				3061	xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3062
				3063	/*
				3064	* If we are using project inheritance, we only allow renames
				3065	* into our tree when the project IDs are the same; else the
				3066	* tree quota mechanism would be circumvented.
				3067	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	3068	if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	3069	target_dp->i_projid != src_ip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3070	error = -EXDEV;
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3071	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3072	}
				3073
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3074	/* RENAME_EXCHANGE is unique from here on. */
				3075	if (flags & RENAME_EXCHANGE)
				3076	return xfs_cross_rename(tp, src_dp, src_name, src_ip,
				3077	target_dp, target_name, target_ip,
Brian Foster	f16dea5	2018-07-11 22:26:20 -0700	[diff] [blame]	3078	spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3079
				3080	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3081	* Check for expected errors before we dirty the transaction
				3082	* so we can return an error without a transaction abort.
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3083	*
				3084	* Extent count overflow check:
				3085	*
				3086	* From the perspective of src_dp, a rename operation is essentially a
				3087	* directory entry remove operation. Hence the only place where we check
				3088	* for extent count overflow for src_dp is in
				3089	* xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
				3090	* -ENOSPC when it detects a possible extent count overflow and in
				3091	* response, the higher layers of directory handling code do the
				3092	* following:
				3093	* 1. Data/Free blocks: XFS lets these blocks linger until a
				3094	* future remove operation removes them.
				3095	* 2. Dabtree blocks: XFS swaps the blocks with the last block in the
				3096	* Leaf space and unmaps the last block.
				3097	*
				3098	* For target_dp, there are two cases depending on whether the
				3099	* destination directory entry exists or not.
				3100	*
				3101	* When destination directory entry does not exist (i.e. target_ip ==
				3102	* NULL), extent count overflow check is performed only when transaction
				3103	* has a non-zero sized space reservation associated with it. With a
				3104	* zero-sized space reservation, XFS allows a rename operation to
				3105	* continue only when the directory has sufficient free space in its
				3106	* data/leaf/free space blocks to hold the new entry.
				3107	*
				3108	* When destination directory entry exists (i.e. target_ip != NULL), all
				3109	* we need to do is change the inode number associated with the already
				3110	* existing entry. Hence there is no need to perform an extent count
				3111	* overflow check.
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3112	*/
				3113	if (target_ip == NULL) {
				3114	/*
				3115	* If there's no space reservation, check the entry will
				3116	* fit before actually inserting it.
				3117	*/
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3118	if (!spaceres) {
				3119	error = xfs_dir_canenter(tp, target_dp, target_name);
				3120	if (error)
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3121	goto out_trans_cancel;
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3122	} else {
				3123	error = xfs_iext_count_may_overflow(target_dp,
				3124	XFS_DATA_FORK,
				3125	XFS_IEXT_DIR_MANIP_CNT(mp));
				3126	if (error)
				3127	goto out_trans_cancel;
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3128	}
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3129	} else {
				3130	/*
				3131	* If target exists and it's a directory, check that whether
				3132	* it can be destroyed.
				3133	*/
				3134	if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
				3135	(!xfs_dir_isempty(target_ip) \|\|
				3136	(VFS_I(target_ip)->i_nlink > 2))) {
				3137	error = -EEXIST;
				3138	goto out_trans_cancel;
				3139	}
				3140	}
				3141
				3142	/*
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	3143	* Lock the AGI buffers we need to handle bumping the nlink of the
				3144	* whiteout inode off the unlinked list and to handle dropping the
				3145	* nlink of the target inode. Per locking order rules, do this in
				3146	* increasing AG order and before directory block allocation tries to
				3147	* grab AGFs because we grab AGIs before AGFs.
				3148	*
				3149	* The (vfs) caller must ensure that if src is a directory then
				3150	* target_ip is either null or an empty directory.
				3151	*/
				3152	for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
				3153	if (inodes[i] == wip \|\|
				3154	(inodes[i] == target_ip &&
				3155	(VFS_I(target_ip)->i_nlink == 1 \|\| src_is_directory))) {
				3156	struct xfs_buf *bp;
				3157	xfs_agnumber_t agno;
				3158
				3159	agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino);
				3160	error = xfs_read_agi(mp, tp, agno, &bp);
				3161	if (error)
				3162	goto out_trans_cancel;
				3163	}
				3164	}
				3165
				3166	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3167	* Directory entry creation below may acquire the AGF. Remove
				3168	* the whiteout from the unlinked list first to preserve correct
				3169	* AGI/AGF locking order. This dirties the transaction so failures
				3170	* after this point will abort and log recovery will clean up the
				3171	* mess.
				3172	*
				3173	* For whiteouts, we need to bump the link count on the whiteout
				3174	* inode. After this point, we have a real link, clear the tmpfile
				3175	* state flag from the inode so it doesn't accidentally get misused
				3176	* in future.
				3177	*/
				3178	if (wip) {
				3179	ASSERT(VFS_I(wip)->i_nlink == 0);
				3180	error = xfs_iunlink_remove(tp, wip);
				3181	if (error)
				3182	goto out_trans_cancel;
				3183
				3184	xfs_bumplink(tp, wip);
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3185	VFS_I(wip)->i_state &= ~I_LINKABLE;
				3186	}
				3187
				3188	/*
				3189	* Set up the target.
				3190	*/
				3191	if (target_ip == NULL) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3192	/*
				3193	* If target does not exist and the rename crosses
				3194	* directories, adjust the target directory link count
				3195	* to account for the ".." reference from the new entry.
				3196	*/
				3197	error = xfs_dir_createname(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3198	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3199	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3200	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3201
				3202	xfs_trans_ichgtime(tp, target_dp,
				3203	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3204
				3205	if (new_parent && src_is_directory) {
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	3206	xfs_bumplink(tp, target_dp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3207	}
				3208	} else { /* target_ip != NULL */
				3209	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3210	* Link the source inode under the target name.
				3211	* If the source inode is a directory and we are moving
				3212	* it across directories, its ".." entry will be
				3213	* inconsistent until we replace that down below.
				3214	*
				3215	* In case there is already an entry with the same
				3216	* name at the destination directory, remove it first.
				3217	*/
				3218	error = xfs_dir_replace(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3219	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3220	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3221	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3222
				3223	xfs_trans_ichgtime(tp, target_dp,
				3224	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3225
				3226	/*
				3227	* Decrement the link count on the target since the target
				3228	* dir no longer points to it.
				3229	*/
				3230	error = xfs_droplink(tp, target_ip);
				3231	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3232	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3233
				3234	if (src_is_directory) {
				3235	/*
				3236	* Drop the link from the old "." entry.
				3237	*/
				3238	error = xfs_droplink(tp, target_ip);
				3239	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3240	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3241	}
				3242	} /* target_ip != NULL */
				3243
				3244	/*
				3245	* Remove the source.
				3246	*/
				3247	if (new_parent && src_is_directory) {
				3248	/*
				3249	* Rewrite the ".." entry to point to the new
				3250	* directory.
				3251	*/
				3252	error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3253	target_dp->i_ino, spaceres);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3254	ASSERT(error != -EEXIST);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3255	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3256	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3257	}
				3258
				3259	/*
				3260	* We always want to hit the ctime on the source inode.
				3261	*
				3262	* This isn't strictly required by the standards since the source
				3263	* inode isn't really being changed, but old unix file systems did
				3264	* it and some incremental backup programs won't work without it.
				3265	*/
				3266	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
				3267	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
				3268
				3269	/*
				3270	* Adjust the link count on src_dp. This is necessary when
				3271	* renaming a directory, either within one parent when
				3272	* the target existed, or across two parent directories.
				3273	*/
				3274	if (src_is_directory && (new_parent \|\| target_ip != NULL)) {
				3275
				3276	/*
				3277	* Decrement link count on src_directory since the
				3278	* entry that's moved no longer points to it.
				3279	*/
				3280	error = xfs_droplink(tp, src_dp);
				3281	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3282	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3283	}
				3284
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3285	/*
				3286	* For whiteouts, we only need to update the source dirent with the
				3287	* inode number of the whiteout inode rather than removing it
				3288	* altogether.
				3289	*/
				3290	if (wip) {
				3291	error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3292	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3293	} else {
				3294	/*
				3295	* NOTE: We don't need to check for extent count overflow here
				3296	* because the dir remove name code will leave the dir block in
				3297	* place if the extent count would overflow.
				3298	*/
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3299	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3300	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3301	}
				3302
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3303	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3304	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3305
				3306	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3307	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
				3308	if (new_parent)
				3309	xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
				3310
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	3311	error = xfs_finish_rename(tp);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3312	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3313	xfs_irele(wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3314	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3315
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3316	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	3317	xfs_trans_cancel(tp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3318	out_release_wip:
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3319	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3320	xfs_irele(wip);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3321	return error;
				3322	}
				3323
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3324	static int
				3325	xfs_iflush(
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3326	struct xfs_inode *ip,
				3327	struct xfs_buf *bp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3328	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3329	struct xfs_inode_log_item *iip = ip->i_itemp;
				3330	struct xfs_dinode *dip;
				3331	struct xfs_mount *mp = ip->i_mount;
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3332	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3333
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3334	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3335	ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3336	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3337	ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Dave Chinner	90c60e1	2020-06-29 14:49:19 -0700	[diff] [blame]	3338	ASSERT(iip->ili_item.li_buf == bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3339
Christoph Hellwig	88ee2df	2015-06-22 09:44:29 +1000	[diff] [blame]	3340	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3341
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3342	/*
				3343	* We don't flush the inode if any of the following checks fail, but we
				3344	* do still update the log item and attach to the backing buffer as if
				3345	* the flush happened. This is a formality to facilitate predictable
				3346	* error handling as the caller will shutdown and fail the buffer.
				3347	*/
				3348	error = -EFSCORRUPTED;
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	3349	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3350	mp, XFS_ERRTAG_IFLUSH_1)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3351	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3352	"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3353	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3354	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3355	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3356	if (S_ISREG(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3357	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3358	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3359	ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3360	mp, XFS_ERRTAG_IFLUSH_3)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3361	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3362	"%s: Bad regular inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3363	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3364	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3365	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3366	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3367	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3368	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3369	ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
				3370	ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3371	mp, XFS_ERRTAG_IFLUSH_4)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3372	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3373	"%s: Bad directory inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3374	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3375	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3376	}
				3377	}
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3378	if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3379	ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3380	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3381	"%s: detected corrupt incore inode %Lu, "
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3382	"total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3383	__func__, ip->i_ino,
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3384	ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3385	ip->i_nblocks, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3386	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3387	}
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3388	if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3389	mp, XFS_ERRTAG_IFLUSH_6)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3390	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3391	"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3392	__func__, ip->i_ino, ip->i_forkoff, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3393	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3394	}
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3395
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3396	/*
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3397	* Inode item log recovery for v2 inodes are dependent on the flushiter
				3398	* count for correct sequencing. We bump the flush iteration count so
				3399	* we can detect flushes which postdate a log record during recovery.
				3400	* This is redundant as we now log every change and hence this can't
				3401	* happen but we need to still do it to ensure backwards compatibility
				3402	* with old kernels that predate logging all inode changes.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3403	*/
Christoph Hellwig	6471e9c	2020-03-18 08:15:11 -0700	[diff] [blame]	3404	if (!xfs_sb_version_has_v3inode(&mp->m_sb))
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3405	ip->i_flushiter++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3406
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3407	/*
				3408	* If there are inline format data / attr forks attached to this inode,
				3409	* make sure they are not corrupt.
				3410	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3411	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3412	xfs_ifork_verify_local_data(ip))
				3413	goto flush_out;
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3414	if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3415	xfs_ifork_verify_local_attr(ip))
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3416	goto flush_out;
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3417
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3418	/*
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	3419	* Copy the dirty parts of the inode into the on-disk inode. We always
				3420	* copy out the core of the inode, because if the inode is dirty at all
				3421	* the core must be.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3422	*/
Dave Chinner	93f958f	2016-02-09 16:54:58 +1100	[diff] [blame]	3423	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3424
				3425	/* Wrap, we never let the log put out DI_MAX_FLUSH */
Christoph Hellwig	ee7b83f	2021-03-29 11:11:43 -0700	[diff] [blame]	3426	if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
				3427	if (ip->i_flushiter == DI_MAX_FLUSH)
				3428	ip->i_flushiter = 0;
				3429	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3430
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3431	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
				3432	if (XFS_IFORK_Q(ip))
				3433	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3434
				3435	/*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3436	* We've recorded everything logged in the inode, so we'd like to clear
				3437	* the ili_fields bits so we don't log and flush things unnecessarily.
				3438	* However, we can't stop logging all this information until the data
				3439	* we've copied into the disk buffer is written to disk. If we did we
				3440	* might overwrite the copy of the inode in the log with all the data
				3441	* after re-logging only part of it, and in the face of a crash we
				3442	* wouldn't have all the data we need to recover.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3443	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3444	* What we do is move the bits to the ili_last_fields field. When
				3445	* logging the inode, these bits are moved back to the ili_fields field.
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3446	* In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
				3447	* we know that the information those bits represent is permanently on
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3448	* disk. As long as the flush completes before the inode is logged
				3449	* again, then both ili_fields and ili_last_fields will be cleared.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3450	*/
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3451	error = 0;
				3452	flush_out:
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3453	spin_lock(&iip->ili_lock);
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3454	iip->ili_last_fields = iip->ili_fields;
				3455	iip->ili_fields = 0;
Dave Chinner	fc0561c	2015-11-03 13:14:59 +1100	[diff] [blame]	3456	iip->ili_fsync_fields = 0;
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3457	spin_unlock(&iip->ili_lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3458
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3459	/*
				3460	* Store the current LSN of the inode so that we can tell whether the
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3461	* item has moved in the AIL from xfs_buf_inode_iodone().
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3462	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3463	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				3464	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3465
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3466	/* generate the checksum. */
				3467	xfs_dinode_calc_crc(mp, dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3468	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3469	}
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3470
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3471	/*
				3472	* Non-blocking flush of dirty inode metadata into the backing buffer.
				3473	*
				3474	* The caller must have a reference to the inode and hold the cluster buffer
				3475	* locked. The function will walk across all the inodes on the cluster buffer it
				3476	* can find and lock without blocking, and flush them to the cluster buffer.
				3477	*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3478	* On successful flushing of at least one inode, the caller must write out the
				3479	* buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
				3480	* the caller needs to release the buffer. On failure, the filesystem will be
				3481	* shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
				3482	* will be returned.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3483	*/
				3484	int
				3485	xfs_iflush_cluster(
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3486	struct xfs_buf *bp)
				3487	{
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3488	struct xfs_mount *mp = bp->b_mount;
				3489	struct xfs_log_item lip, n;
				3490	struct xfs_inode *ip;
				3491	struct xfs_inode_log_item *iip;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3492	int clcount = 0;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3493	int error = 0;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3494
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3495	/*
				3496	* We must use the safe variant here as on shutdown xfs_iflush_abort()
				3497	* can remove itself from the list.
				3498	*/
				3499	list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
				3500	iip = (struct xfs_inode_log_item *)lip;
				3501	ip = iip->ili_inode;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3502
				3503	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3504	* Quick and dirty check to avoid locks if possible.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3505	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3506	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING))
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3507	continue;
				3508	if (xfs_ipincount(ip))
				3509	continue;
				3510
				3511	/*
				3512	* The inode is still attached to the buffer, which means it is
				3513	* dirty but reclaim might try to grab it. Check carefully for
				3514	* that, and grab the ilock while still holding the i_flags_lock
				3515	* to guarantee reclaim will not be able to reclaim this inode
				3516	* once we drop the i_flags_lock.
				3517	*/
				3518	spin_lock(&ip->i_flags_lock);
				3519	ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3520	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING)) {
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3521	spin_unlock(&ip->i_flags_lock);
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3522	continue;
				3523	}
				3524
				3525	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3526	* ILOCK will pin the inode against reclaim and prevent
				3527	* concurrent transactions modifying the inode while we are
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3528	* flushing the inode. If we get the lock, set the flushing
				3529	* state before we drop the i_flags_lock.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3530	*/
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3531	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
				3532	spin_unlock(&ip->i_flags_lock);
				3533	continue;
				3534	}
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3535	__xfs_iflags_set(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3536	spin_unlock(&ip->i_flags_lock);
				3537
				3538	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3539	* Abort flushing this inode if we are shut down because the
				3540	* inode may not currently be in the AIL. This can occur when
				3541	* log I/O failure unpins the inode without inserting into the
				3542	* AIL, leaving a dirty/unpinned inode attached to the buffer
				3543	* that otherwise looks like it should be flushed.
				3544	*/
				3545	if (XFS_FORCED_SHUTDOWN(mp)) {
				3546	xfs_iunpin_wait(ip);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3547	xfs_iflush_abort(ip);
				3548	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3549	error = -EIO;
				3550	continue;
				3551	}
				3552
				3553	/* don't block waiting on a log force to unpin dirty inodes */
				3554	if (xfs_ipincount(ip)) {
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3555	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3556	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3557	continue;
				3558	}
				3559
				3560	if (!xfs_inode_clean(ip))
				3561	error = xfs_iflush(ip, bp);
				3562	else
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3563	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3564	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3565	if (error)
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3566	break;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3567	clcount++;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3568	}
				3569
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3570	if (error) {
				3571	bp->b_flags \|= XBF_ASYNC;
				3572	xfs_buf_ioend_fail(bp);
				3573	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3574	return error;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3575	}
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3576
				3577	if (!clcount)
				3578	return -EAGAIN;
				3579
				3580	XFS_STATS_INC(mp, xs_icluster_flushcnt);
				3581	XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
				3582	return 0;
				3583
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3584	}
				3585
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3586	/* Release an inode. */
				3587	void
				3588	xfs_irele(
				3589	struct xfs_inode *ip)
				3590	{
				3591	trace_xfs_irele(ip, _RET_IP_);
				3592	iput(VFS_I(ip));
				3593	}
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3594
				3595	/*
				3596	* Ensure all commited transactions touching the inode are written to the log.
				3597	*/
				3598	int
				3599	xfs_log_force_inode(
				3600	struct xfs_inode *ip)
				3601	{
				3602	xfs_lsn_t lsn = 0;
				3603
				3604	xfs_ilock(ip, XFS_ILOCK_SHARED);
				3605	if (xfs_ipincount(ip))
				3606	lsn = ip->i_itemp->ili_last_lsn;
				3607	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3608
				3609	if (!lsn)
				3610	return 0;
				3611	return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
				3612	}
Darrick J. Wong	e2aaee9	2020-06-29 14:47:20 -0700	[diff] [blame]	3613
				3614	/*
				3615	* Grab the exclusive iolock for a data copy from src to dest, making sure to
				3616	* abide vfs locking order (lowest pointer value goes first) and breaking the
				3617	* layout leases before proceeding. The loop is needed because we cannot call
				3618	* the blocking break_layout() with the iolocks held, and therefore have to
				3619	* back out both locks.
				3620	*/
				3621	static int
				3622	xfs_iolock_two_inodes_and_break_layout(
				3623	struct inode *src,
				3624	struct inode *dest)
				3625	{
				3626	int error;
				3627
				3628	if (src > dest)
				3629	swap(src, dest);
				3630
				3631	retry:
				3632	/* Wait to break both inodes' layouts before we start locking. */
				3633	error = break_layout(src, true);
				3634	if (error)
				3635	return error;
				3636	if (src != dest) {
				3637	error = break_layout(dest, true);
				3638	if (error)
				3639	return error;
				3640	}
				3641
				3642	/* Lock one inode and make sure nobody got in and leased it. */
				3643	inode_lock(src);
				3644	error = break_layout(src, false);
				3645	if (error) {
				3646	inode_unlock(src);
				3647	if (error == -EWOULDBLOCK)
				3648	goto retry;
				3649	return error;
				3650	}
				3651
				3652	if (src == dest)
				3653	return 0;
				3654
				3655	/* Lock the other inode and make sure nobody got in and leased it. */
				3656	inode_lock_nested(dest, I_MUTEX_NONDIR2);
				3657	error = break_layout(dest, false);
				3658	if (error) {
				3659	inode_unlock(src);
				3660	inode_unlock(dest);
				3661	if (error == -EWOULDBLOCK)
				3662	goto retry;
				3663	return error;
				3664	}
				3665
				3666	return 0;
				3667	}
				3668
				3669	/*
				3670	* Lock two inodes so that userspace cannot initiate I/O via file syscalls or
				3671	* mmap activity.
				3672	*/
				3673	int
				3674	xfs_ilock2_io_mmap(
				3675	struct xfs_inode *ip1,
				3676	struct xfs_inode *ip2)
				3677	{
				3678	int ret;
				3679
				3680	ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
				3681	if (ret)
				3682	return ret;
				3683	if (ip1 == ip2)
				3684	xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
				3685	else
				3686	xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
				3687	ip2, XFS_MMAPLOCK_EXCL);
				3688	return 0;
				3689	}
				3690
				3691	/* Unlock both inodes to allow IO and mmap activity. */
				3692	void
				3693	xfs_iunlock2_io_mmap(
				3694	struct xfs_inode *ip1,
				3695	struct xfs_inode *ip2)
				3696	{
				3697	bool same_inode = (ip1 == ip2);
				3698
				3699	xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
				3700	if (!same_inode)
				3701	xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
				3702	inode_unlock(VFS_I(ip2));
				3703	if (!same_inode)
				3704	inode_unlock(VFS_I(ip1));
				3705	}