Blame - fs/xfs/xfs_inode.c - SHIFTPHONES/mainline/linux

blob: 822f73d60f92af7af90a5945bc7d7f2684328779 [file] [log] [blame]

Dave Chinner	0b61f8a	2018-06-05 19:42:14 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	4	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	*/
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	6	#include <linux/iversion.h>
Robert P. J. Day	40ebd81	2007-11-23 16:30:51 +1100	[diff] [blame]	7
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	9	#include "xfs_fs.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame]	10	#include "xfs_shared.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	11	#include "xfs_format.h"
				12	#include "xfs_log_format.h"
				13	#include "xfs_trans_resv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include "xfs_mount.h"
Darrick J. Wong	3ab78df	2016-08-03 11:15:38 +1000	[diff] [blame]	15	#include "xfs_defer.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	16	#include "xfs_inode.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	17	#include "xfs_dir2.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	18	#include "xfs_attr.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	19	#include "xfs_trans_space.h"
				20	#include "xfs_trans.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include "xfs_buf_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	22	#include "xfs_inode_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_ialloc.h"
				24	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame]	25	#include "xfs_bmap_util.h"
Darrick J. Wong	e9e899a	2017-10-31 12:04:49 -0700	[diff] [blame]	26	#include "xfs_errortag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include "xfs_quota.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	29	#include "xfs_filestream.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	30	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	31	#include "xfs_icache.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	32	#include "xfs_symlink.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	33	#include "xfs_trans_priv.h"
				34	#include "xfs_log.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	35	#include "xfs_bmap_btree.h"
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	36	#include "xfs_reflink.h"
Dave Chinner	9bbafc71	2021-06-02 10:48:24 +1000	[diff] [blame]	37	#include "xfs_ag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	kmem_zone_t *xfs_inode_zone;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40
				41	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	42	* Used in xfs_itruncate_extents(). This is the maximum number of extents
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	* freed from a file in a single transaction.
				44	*/
				45	#define XFS_ITRUNC_MAX_EXTENTS 2
				46
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	47	STATIC int xfs_iunlink(struct xfs_trans , struct xfs_inode );
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	48	STATIC int xfs_iunlink_remove(struct xfs_trans tp, struct xfs_perag pag,
				49	struct xfs_inode *);
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	50
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	51	/*
				52	* helper function to extract extent size hint from inode
				53	*/
				54	xfs_extlen_t
				55	xfs_get_extsz_hint(
				56	struct xfs_inode *ip)
				57	{
Christoph Hellwig	bdb2ed2	2019-10-14 10:07:21 -0700	[diff] [blame]	58	/*
				59	* No point in aligning allocations if we need to COW to actually
				60	* write to them.
				61	*/
				62	if (xfs_is_always_cow_inode(ip))
				63	return 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	64	if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	65	return ip->i_extsize;
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	66	if (XFS_IS_REALTIME_INODE(ip))
				67	return ip->i_mount->m_sb.sb_rextsize;
				68	return 0;
				69	}
				70
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	71	/*
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	72	* Helper function to extract CoW extent size hint from inode.
				73	* Between the extent size hint and the CoW extent size hint, we
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	74	* return the greater of the two. If the value is zero (automatic),
				75	* use the default size.
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	76	*/
				77	xfs_extlen_t
				78	xfs_get_cowextsz_hint(
				79	struct xfs_inode *ip)
				80	{
				81	xfs_extlen_t a, b;
				82
				83	a = 0;
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	84	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	85	a = ip->i_cowextsize;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	86	b = xfs_get_extsz_hint(ip);
				87
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	88	a = max(a, b);
				89	if (a == 0)
				90	return XFS_DEFAULT_COWEXTSZ_HINT;
				91	return a;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	92	}
				93
				94	/*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	95	* These two are wrapper routines around the xfs_ilock() routine used to
				96	* centralize some grungy code. They are used in places that wish to lock the
				97	* inode solely for reading the extents. The reason these places can't just
				98	* call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
				99	* bringing in of the extents from disk for a file in b-tree format. If the
				100	* inode is in b-tree format, then we need to lock the inode exclusively until
				101	* the extents are read in. Locking it exclusively all the time would limit
				102	* our parallelism unnecessarily, though. What we do instead is check to see
				103	* if the extents have been read in yet, and only lock the inode exclusively
				104	* if they have not.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	105	*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	106	* The functions return a value which should be given to the corresponding
Christoph Hellwig	01f4f32	2013-12-06 12:30:08 -0800	[diff] [blame]	107	* xfs_iunlock() call.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	108	*/
				109	uint
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	110	xfs_ilock_data_map_shared(
				111	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	112	{
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	113	uint lock_mode = XFS_ILOCK_SHARED;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	114
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	115	if (xfs_need_iread_extents(&ip->i_df))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	116	lock_mode = XFS_ILOCK_EXCL;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	117	xfs_ilock(ip, lock_mode);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	118	return lock_mode;
				119	}
				120
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	121	uint
				122	xfs_ilock_attr_map_shared(
				123	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	124	{
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	125	uint lock_mode = XFS_ILOCK_SHARED;
				126
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	127	if (ip->i_afp && xfs_need_iread_extents(ip->i_afp))
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	128	lock_mode = XFS_ILOCK_EXCL;
				129	xfs_ilock(ip, lock_mode);
				130	return lock_mode;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	131	}
				132
				133	/*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	134	* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
				135	* multi-reader locks: i_mmap_lock and the i_lock. This routine allows
				136	* various combinations of the locks to be obtained.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	137	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	138	* The 3 locks should always be ordered so that the IO lock is obtained first,
				139	* the mmap lock second and the ilock last in order to prevent deadlock.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	140	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	141	* Basic locking order:
				142	*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	143	* i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	144	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	145	* mmap_lock locking order:
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	146	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	147	* i_rwsem -> page lock -> mmap_lock
				148	* mmap_lock -> i_mmap_lock -> page_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	149	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	150	* The difference in mmap_lock locking order mean that we cannot hold the
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	151	* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	152	* fault in pages during copy in/out (for buffered IO) or require the mmap_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	153	* in get_user_pages() to map the user pages into the kernel address space for
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	154	* direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	155	* page faults already hold the mmap_lock.
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	156	*
				157	* Hence to serialise fully against both syscall and mmap based IO, we need to
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	158	* take both the i_rwsem and the i_mmap_lock. These locks should only be both
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	159	* taken in places where we need to invalidate the page cache in a race
				160	* free manner (e.g. truncate, hole punch and other extent manipulation
				161	* functions).
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	162	*/
				163	void
				164	xfs_ilock(
				165	xfs_inode_t *ip,
				166	uint lock_flags)
				167	{
				168	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
				169
				170	/*
				171	* You can't set both SHARED and EXCL for the same lock,
				172	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				173	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				174	*/
				175	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				176	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	177	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				178	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	179	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				180	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	181	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	182
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	183	if (lock_flags & XFS_IOLOCK_EXCL) {
				184	down_write_nested(&VFS_I(ip)->i_rwsem,
				185	XFS_IOLOCK_DEP(lock_flags));
				186	} else if (lock_flags & XFS_IOLOCK_SHARED) {
				187	down_read_nested(&VFS_I(ip)->i_rwsem,
				188	XFS_IOLOCK_DEP(lock_flags));
				189	}
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	190
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	191	if (lock_flags & XFS_MMAPLOCK_EXCL)
				192	mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				193	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				194	mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				195
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	196	if (lock_flags & XFS_ILOCK_EXCL)
				197	mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				198	else if (lock_flags & XFS_ILOCK_SHARED)
				199	mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				200	}
				201
				202	/*
				203	* This is just like xfs_ilock(), except that the caller
				204	* is guaranteed not to sleep. It returns 1 if it gets
				205	* the requested locks and 0 otherwise. If the IO lock is
				206	* obtained but the inode lock cannot be, then the IO lock
				207	* is dropped before returning.
				208	*
				209	* ip -- the inode being locked
				210	* lock_flags -- this parameter indicates the inode's locks to be
				211	* to be locked. See the comment for xfs_ilock() for a list
				212	* of valid values.
				213	*/
				214	int
				215	xfs_ilock_nowait(
				216	xfs_inode_t *ip,
				217	uint lock_flags)
				218	{
				219	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
				220
				221	/*
				222	* You can't set both SHARED and EXCL for the same lock,
				223	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				224	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				225	*/
				226	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				227	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	228	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				229	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	230	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				231	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	232	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	233
				234	if (lock_flags & XFS_IOLOCK_EXCL) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	235	if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	236	goto out;
				237	} else if (lock_flags & XFS_IOLOCK_SHARED) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	238	if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	239	goto out;
				240	}
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	241
				242	if (lock_flags & XFS_MMAPLOCK_EXCL) {
				243	if (!mrtryupdate(&ip->i_mmaplock))
				244	goto out_undo_iolock;
				245	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
				246	if (!mrtryaccess(&ip->i_mmaplock))
				247	goto out_undo_iolock;
				248	}
				249
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	250	if (lock_flags & XFS_ILOCK_EXCL) {
				251	if (!mrtryupdate(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	252	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	253	} else if (lock_flags & XFS_ILOCK_SHARED) {
				254	if (!mrtryaccess(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	255	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	256	}
				257	return 1;
				258
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	259	out_undo_mmaplock:
				260	if (lock_flags & XFS_MMAPLOCK_EXCL)
				261	mrunlock_excl(&ip->i_mmaplock);
				262	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				263	mrunlock_shared(&ip->i_mmaplock);
				264	out_undo_iolock:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	265	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	266	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	267	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	268	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	269	out:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	270	return 0;
				271	}
				272
				273	/*
				274	* xfs_iunlock() is used to drop the inode locks acquired with
				275	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
				276	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
				277	* that we know which locks to drop.
				278	*
				279	* ip -- the inode being unlocked
				280	* lock_flags -- this parameter indicates the inode's locks to be
				281	* to be unlocked. See the comment for xfs_ilock() for a list
				282	* of valid values for this parameter.
				283	*
				284	*/
				285	void
				286	xfs_iunlock(
				287	xfs_inode_t *ip,
				288	uint lock_flags)
				289	{
				290	/*
				291	* You can't set both SHARED and EXCL for the same lock,
				292	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				293	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				294	*/
				295	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				296	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	297	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				298	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	299	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				300	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	301	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	302	ASSERT(lock_flags != 0);
				303
				304	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	305	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	306	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	307	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	308
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	309	if (lock_flags & XFS_MMAPLOCK_EXCL)
				310	mrunlock_excl(&ip->i_mmaplock);
				311	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				312	mrunlock_shared(&ip->i_mmaplock);
				313
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	314	if (lock_flags & XFS_ILOCK_EXCL)
				315	mrunlock_excl(&ip->i_lock);
				316	else if (lock_flags & XFS_ILOCK_SHARED)
				317	mrunlock_shared(&ip->i_lock);
				318
				319	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
				320	}
				321
				322	/*
				323	* give up write locks. the i/o lock cannot be held nested
				324	* if it is being demoted.
				325	*/
				326	void
				327	xfs_ilock_demote(
				328	xfs_inode_t *ip,
				329	uint lock_flags)
				330	{
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	331	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL));
				332	ASSERT((lock_flags &
				333	~(XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	334
				335	if (lock_flags & XFS_ILOCK_EXCL)
				336	mrdemote(&ip->i_lock);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	337	if (lock_flags & XFS_MMAPLOCK_EXCL)
				338	mrdemote(&ip->i_mmaplock);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	339	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	340	downgrade_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	341
				342	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
				343	}
				344
Dave Chinner	742ae1e	2013-04-30 21:39:34 +1000	[diff] [blame]	345	#if defined(DEBUG) \|\| defined(XFS_WARN)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	346	int
				347	xfs_isilocked(
				348	xfs_inode_t *ip,
				349	uint lock_flags)
				350	{
				351	if (lock_flags & (XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED)) {
				352	if (!(lock_flags & XFS_ILOCK_SHARED))
				353	return !!ip->i_lock.mr_writer;
				354	return rwsem_is_locked(&ip->i_lock.mr_lock);
				355	}
				356
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	357	if (lock_flags & (XFS_MMAPLOCK_EXCL\|XFS_MMAPLOCK_SHARED)) {
				358	if (!(lock_flags & XFS_MMAPLOCK_SHARED))
				359	return !!ip->i_mmaplock.mr_writer;
				360	return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
				361	}
				362
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	363	if (lock_flags & (XFS_IOLOCK_EXCL\|XFS_IOLOCK_SHARED)) {
				364	if (!(lock_flags & XFS_IOLOCK_SHARED))
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	365	return !debug_locks \|\|
				366	lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
				367	return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	368	}
				369
				370	ASSERT(0);
				371	return 0;
				372	}
				373	#endif
				374
Dave Chinner	b6a9947	2015-08-25 10:05:13 +1000	[diff] [blame]	375	/*
				376	* xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
				377	* DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
				378	* when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
				379	* errors and warnings.
				380	*/
				381	#if (defined(DEBUG) \|\| defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	382	static bool
				383	xfs_lockdep_subclass_ok(
				384	int subclass)
				385	{
				386	return subclass < MAX_LOCKDEP_SUBCLASSES;
				387	}
				388	#else
				389	#define xfs_lockdep_subclass_ok(subclass) (true)
				390	#endif
				391
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	392	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	393	* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	394	* value. This can be called for any type of inode lock combination, including
				395	* parent locking. Care must be taken to ensure we don't overrun the subclass
				396	* storage fields in the class mask we build.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	397	*/
				398	static inline int
				399	xfs_lock_inumorder(int lock_mode, int subclass)
				400	{
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	401	int class = 0;
				402
				403	ASSERT(!(lock_mode & (XFS_ILOCK_PARENT \| XFS_ILOCK_RTBITMAP \|
				404	XFS_ILOCK_RTSUM)));
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	405	ASSERT(xfs_lockdep_subclass_ok(subclass));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	406
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	407	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	408	ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	409	class += subclass << XFS_IOLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	410	}
				411
				412	if (lock_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	413	ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
				414	class += subclass << XFS_MMAPLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	415	}
				416
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	417	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) {
				418	ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
				419	class += subclass << XFS_ILOCK_SHIFT;
				420	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	421
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	422	return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) \| class;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	423	}
				424
				425	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	426	* The following routine will lock n inodes in exclusive mode. We assume the
				427	* caller calls us with the inodes in i_ino order.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	428	*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	429	* We need to detect deadlock where an inode that we lock is in the AIL and we
				430	* start waiting for another inode that is locked by a thread in a long running
				431	* transaction (such as truncate). This can result in deadlock since the long
				432	* running trans might need to wait for the inode we just locked in order to
				433	* push the tail and free space in the log.
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	434	*
				435	* xfs_lock_inodes() can only be used to lock one type of lock at a time -
				436	* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
				437	* lock more than one at a time, lockdep will report false positives saying we
				438	* have violated locking orders.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	439	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	440	static void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	441	xfs_lock_inodes(
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	442	struct xfs_inode **ips,
				443	int inodes,
				444	uint lock_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	445	{
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	446	int attempts = 0, i, j, try_lock;
				447	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	448
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	449	/*
				450	* Currently supports between 2 and 5 inodes with exclusive locking. We
				451	* support an arbitrary depth of locking here, but absolute limits on
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	452	* inodes depend on the type of locking and the limits placed by
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	453	* lockdep annotations in xfs_lock_inumorder. These are all checked by
				454	* the asserts.
				455	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	456	ASSERT(ips && inodes >= 2 && inodes <= 5);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	457	ASSERT(lock_mode & (XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL \|
				458	XFS_ILOCK_EXCL));
				459	ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED \| XFS_MMAPLOCK_SHARED \|
				460	XFS_ILOCK_SHARED)));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	461	ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) \|\|
				462	inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
				463	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) \|\|
				464	inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
				465
				466	if (lock_mode & XFS_IOLOCK_EXCL) {
				467	ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL \| XFS_ILOCK_EXCL)));
				468	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
				469	ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	470
				471	try_lock = 0;
				472	i = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	473	again:
				474	for (; i < inodes; i++) {
				475	ASSERT(ips[i]);
				476
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	477	if (i && (ips[i] == ips[i - 1])) /* Already locked */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	478	continue;
				479
				480	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	481	* If try_lock is not set yet, make sure all locked inodes are
				482	* not in the AIL. If any are, set try_lock to be used later.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	483	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	484	if (!try_lock) {
				485	for (j = (i - 1); j >= 0 && !try_lock; j--) {
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	486	lp = &ips[j]->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	487	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	488	try_lock++;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	489	}
				490	}
				491
				492	/*
				493	* If any of the previous locks we have locked is in the AIL,
				494	* we must TRY to get the second and subsequent locks. If
				495	* we can't get any, we must release all we have
				496	* and try again.
				497	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	498	if (!try_lock) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	499	xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	500	continue;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	501	}
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	502
				503	/* try_lock means we have an inode locked that is in the AIL. */
				504	ASSERT(i != 0);
				505	if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
				506	continue;
				507
				508	/*
				509	* Unlock all previous guys and try again. xfs_iunlock will try
				510	* to push the tail if the inode is in the AIL.
				511	*/
				512	attempts++;
				513	for (j = i - 1; j >= 0; j--) {
				514	/*
				515	* Check to see if we've already unlocked this one. Not
				516	* the first one going back, and the inode ptr is the
				517	* same.
				518	*/
				519	if (j != (i - 1) && ips[j] == ips[j + 1])
				520	continue;
				521
				522	xfs_iunlock(ips[j], lock_mode);
				523	}
				524
				525	if ((attempts % 5) == 0) {
				526	delay(1); /* Don't just spin the CPU */
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	527	}
				528	i = 0;
				529	try_lock = 0;
				530	goto again;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	531	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	532	}
				533
				534	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	535	* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	536	* the mmaplock or the ilock, but not more than one type at a time. If we lock
				537	* more than one at a time, lockdep will report false positives saying we have
				538	* violated locking orders. The iolock must be double-locked separately since
				539	* we use i_rwsem for that. We now support taking one lock EXCL and the other
				540	* SHARED.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	541	*/
				542	void
				543	xfs_lock_two_inodes(
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	544	struct xfs_inode *ip0,
				545	uint ip0_mode,
				546	struct xfs_inode *ip1,
				547	uint ip1_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	548	{
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	549	struct xfs_inode *temp;
				550	uint mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	551	int attempts = 0;
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	552	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	553
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	554	ASSERT(hweight32(ip0_mode) == 1);
				555	ASSERT(hweight32(ip1_mode) == 1);
				556	ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				557	ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				558	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				559	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				560	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				561	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				562	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				563	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				564	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				565	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	566
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	567	ASSERT(ip0->i_ino != ip1->i_ino);
				568
				569	if (ip0->i_ino > ip1->i_ino) {
				570	temp = ip0;
				571	ip0 = ip1;
				572	ip1 = temp;
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	573	mode_temp = ip0_mode;
				574	ip0_mode = ip1_mode;
				575	ip1_mode = mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	576	}
				577
				578	again:
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	579	xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	580
				581	/*
				582	* If the first lock we have locked is in the AIL, we must TRY to get
				583	* the second lock. If we can't get it, we must release the first one
				584	* and try again.
				585	*/
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	586	lp = &ip0->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	587	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	588	if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
				589	xfs_iunlock(ip0, ip0_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	590	if ((++attempts % 5) == 0)
				591	delay(1); /* Don't just spin the CPU */
				592	goto again;
				593	}
				594	} else {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	595	xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	596	}
				597	}
				598
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	599	uint
				600	xfs_ip2xflags(
Dave Chinner	58f88ca	2016-01-04 16:44:15 +1100	[diff] [blame]	601	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	602	{
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	603	uint flags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	604
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	605	if (ip->i_diflags & XFS_DIFLAG_ANY) {
				606	if (ip->i_diflags & XFS_DIFLAG_REALTIME)
				607	flags \|= FS_XFLAG_REALTIME;
				608	if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
				609	flags \|= FS_XFLAG_PREALLOC;
				610	if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
				611	flags \|= FS_XFLAG_IMMUTABLE;
				612	if (ip->i_diflags & XFS_DIFLAG_APPEND)
				613	flags \|= FS_XFLAG_APPEND;
				614	if (ip->i_diflags & XFS_DIFLAG_SYNC)
				615	flags \|= FS_XFLAG_SYNC;
				616	if (ip->i_diflags & XFS_DIFLAG_NOATIME)
				617	flags \|= FS_XFLAG_NOATIME;
				618	if (ip->i_diflags & XFS_DIFLAG_NODUMP)
				619	flags \|= FS_XFLAG_NODUMP;
				620	if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
				621	flags \|= FS_XFLAG_RTINHERIT;
				622	if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
				623	flags \|= FS_XFLAG_PROJINHERIT;
				624	if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
				625	flags \|= FS_XFLAG_NOSYMLINKS;
				626	if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
				627	flags \|= FS_XFLAG_EXTSIZE;
				628	if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
				629	flags \|= FS_XFLAG_EXTSZINHERIT;
				630	if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
				631	flags \|= FS_XFLAG_NODEFRAG;
				632	if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
				633	flags \|= FS_XFLAG_FILESTREAM;
				634	}
				635
				636	if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
				637	if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
				638	flags \|= FS_XFLAG_DAX;
				639	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
				640	flags \|= FS_XFLAG_COWEXTSIZE;
				641	}
				642
				643	if (XFS_IFORK_Q(ip))
				644	flags \|= FS_XFLAG_HASATTR;
				645	return flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	646	}
				647
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	648	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	649	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
				650	* is allowed, otherwise it has to be an exact match. If a CI match is found,
				651	* ci_name->name will point to a the actual name (caller must free) or
				652	* will be set to NULL if an exact match is found.
				653	*/
				654	int
				655	xfs_lookup(
				656	xfs_inode_t *dp,
				657	struct xfs_name *name,
				658	xfs_inode_t **ipp,
				659	struct xfs_name *ci_name)
				660	{
				661	xfs_ino_t inum;
				662	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	663
				664	trace_xfs_lookup(dp, name);
				665
				666	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	667	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	668
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	669	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	670	if (error)
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	671	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	672
				673	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
				674	if (error)
				675	goto out_free_name;
				676
				677	return 0;
				678
				679	out_free_name:
				680	if (ci_name)
				681	kmem_free(ci_name->name);
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	682	out_unlock:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	683	*ipp = NULL;
				684	return error;
				685	}
				686
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	687	/* Propagate di_flags from a parent inode to a child inode. */
				688	static void
				689	xfs_inode_inherit_flags(
				690	struct xfs_inode *ip,
				691	const struct xfs_inode *pip)
				692	{
				693	unsigned int di_flags = 0;
Darrick J. Wong	603f000	2021-05-12 12:51:26 -0700	[diff] [blame]	694	xfs_failaddr_t failaddr;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	695	umode_t mode = VFS_I(ip)->i_mode;
				696
				697	if (S_ISDIR(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	698	if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	699	di_flags \|= XFS_DIFLAG_RTINHERIT;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	700	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	701	di_flags \|= XFS_DIFLAG_EXTSZINHERIT;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	702	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	703	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	704	if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	705	di_flags \|= XFS_DIFLAG_PROJINHERIT;
				706	} else if (S_ISREG(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	707	if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	708	xfs_has_realtime(ip->i_mount))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	709	di_flags \|= XFS_DIFLAG_REALTIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	710	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	711	di_flags \|= XFS_DIFLAG_EXTSIZE;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	712	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	713	}
				714	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	715	if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	716	xfs_inherit_noatime)
				717	di_flags \|= XFS_DIFLAG_NOATIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	718	if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	719	xfs_inherit_nodump)
				720	di_flags \|= XFS_DIFLAG_NODUMP;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	721	if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	722	xfs_inherit_sync)
				723	di_flags \|= XFS_DIFLAG_SYNC;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	724	if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	725	xfs_inherit_nosymlinks)
				726	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	727	if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	728	xfs_inherit_nodefrag)
				729	di_flags \|= XFS_DIFLAG_NODEFRAG;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	730	if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	731	di_flags \|= XFS_DIFLAG_FILESTREAM;
				732
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	733	ip->i_diflags \|= di_flags;
Darrick J. Wong	603f000	2021-05-12 12:51:26 -0700	[diff] [blame]	734
				735	/*
				736	* Inode verifiers on older kernels only check that the extent size
				737	* hint is an integer multiple of the rt extent size on realtime files.
				738	* They did not check the hint alignment on a directory with both
				739	* rtinherit and extszinherit flags set. If the misaligned hint is
				740	* propagated from a directory into a new realtime file, new file
				741	* allocations will fail due to math errors in the rt allocator and/or
				742	* trip the verifiers. Validate the hint settings in the new file so
				743	* that we don't let broken hints propagate.
				744	*/
				745	failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
				746	VFS_I(ip)->i_mode, ip->i_diflags);
				747	if (failaddr) {
				748	ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE \|
				749	XFS_DIFLAG_EXTSZINHERIT);
				750	ip->i_extsize = 0;
				751	}
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	752	}
				753
				754	/* Propagate di_flags2 from a parent inode to a child inode. */
				755	static void
				756	xfs_inode_inherit_flags2(
				757	struct xfs_inode *ip,
				758	const struct xfs_inode *pip)
				759	{
Darrick J. Wong	603f000	2021-05-12 12:51:26 -0700	[diff] [blame]	760	xfs_failaddr_t failaddr;
				761
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	762	if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
				763	ip->i_diflags2 \|= XFS_DIFLAG2_COWEXTSIZE;
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	764	ip->i_cowextsize = pip->i_cowextsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	765	}
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	766	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
				767	ip->i_diflags2 \|= XFS_DIFLAG2_DAX;
Darrick J. Wong	603f000	2021-05-12 12:51:26 -0700	[diff] [blame]	768
				769	/* Don't let invalid cowextsize hints propagate. */
				770	failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
				771	VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
				772	if (failaddr) {
				773	ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
				774	ip->i_cowextsize = 0;
				775	}
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	776	}
				777
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	778	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	779	* Initialise a newly allocated inode and return the in-core inode to the
				780	* caller locked exclusively.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	781	*/
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	782	int
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	783	xfs_init_new_inode(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	784	struct user_namespace *mnt_userns,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	785	struct xfs_trans *tp,
				786	struct xfs_inode *pip,
				787	xfs_ino_t ino,
				788	umode_t mode,
				789	xfs_nlink_t nlink,
				790	dev_t rdev,
				791	prid_t prid,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	792	bool init_xattrs,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	793	struct xfs_inode **ipp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	794	{
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	795	struct inode *dir = pip ? VFS_I(pip) : NULL;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	796	struct xfs_mount *mp = tp->t_mountp;
				797	struct xfs_inode *ip;
				798	unsigned int flags;
				799	int error;
				800	struct timespec64 tv;
				801	struct inode *inode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	802
				803	/*
Dave Chinner	8b26984	2018-04-17 17:17:35 -0700	[diff] [blame]	804	* Protect against obviously corrupt allocation btree records. Later
				805	* xfs_iget checks will catch re-allocation of other active in-memory
				806	* and on-disk inodes. If we don't catch reallocating the parent inode
				807	* here we will deadlock in xfs_iget() so we have to do these checks
				808	* first.
				809	*/
				810	if ((pip && ino == pip->i_ino) \|\| !xfs_verify_dir_ino(mp, ino)) {
				811	xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
				812	return -EFSCORRUPTED;
				813	}
				814
				815	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	816	* Get the in-core inode with the lock held exclusively to prevent
				817	* others from looking at until we're done.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	818	*/
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	819	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	820	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	821	return error;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	822
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	823	ASSERT(ip != NULL);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	824	inode = VFS_I(ip);
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	825	set_nlink(inode, nlink);
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	826	inode->i_rdev = rdev;
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	827	ip->i_projid = prid;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	828
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	829	if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
Christian Brauner	db99855	2021-03-20 13:26:24 +0100	[diff] [blame]	830	inode_fsuid_set(inode, mnt_userns);
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	831	inode->i_gid = dir->i_gid;
				832	inode->i_mode = mode;
Christoph Hellwig	3d8f282	2020-02-21 08:31:26 -0800	[diff] [blame]	833	} else {
Linus Torvalds	7d6beb7	2021-02-23 13:39:45 -0800	[diff] [blame]	834	inode_init_owner(mnt_userns, inode, dir, mode);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	835	}
				836
				837	/*
				838	* If the group ID of the new file does not match the effective group
				839	* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
				840	* (and only if the irix_sgid_inherit compatibility variable is set).
				841	*/
Christoph Hellwig	5429515	2020-02-21 08:31:27 -0800	[diff] [blame]	842	if (irix_sgid_inherit &&
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	843	(inode->i_mode & S_ISGID) &&
				844	!in_group_p(i_gid_into_mnt(mnt_userns, inode)))
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	845	inode->i_mode &= ~S_ISGID;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	846
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	847	ip->i_disk_size = 0;
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	848	ip->i_df.if_nextents = 0;
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	849	ASSERT(ip->i_nblocks == 0);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	850
Deepa Dinamani	c2050a4	2016-09-14 07:48:06 -0700	[diff] [blame]	851	tv = current_time(inode);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	852	inode->i_mtime = tv;
				853	inode->i_atime = tv;
				854	inode->i_ctime = tv;
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	855
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	856	ip->i_extsize = 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	857	ip->i_diflags = 0;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	858
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	859	if (xfs_has_v3inodes(mp)) {
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	860	inode_set_iversion(inode, 1);
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	861	ip->i_cowextsize = 0;
Christoph Hellwig	e98d5e8	2021-03-29 11:11:45 -0700	[diff] [blame]	862	ip->i_crtime = tv;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	863	}
				864
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	865	flags = XFS_ILOG_CORE;
				866	switch (mode & S_IFMT) {
				867	case S_IFIFO:
				868	case S_IFCHR:
				869	case S_IFBLK:
				870	case S_IFSOCK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	871	ip->i_df.if_format = XFS_DINODE_FMT_DEV;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	872	flags \|= XFS_ILOG_DEV;
				873	break;
				874	case S_IFREG:
				875	case S_IFDIR:
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	876	if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	877	xfs_inode_inherit_flags(ip, pip);
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	878	if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	879	xfs_inode_inherit_flags2(ip, pip);
Gustavo A. R. Silva	53004ee	2021-04-20 17:54:36 -0500	[diff] [blame]	880	fallthrough;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	881	case S_IFLNK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	882	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	fcacbc3	2018-07-17 16:51:50 -0700	[diff] [blame]	883	ip->i_df.if_bytes = 0;
Christoph Hellwig	6bdcf26	2017-11-03 10:34:46 -0700	[diff] [blame]	884	ip->i_df.if_u1.if_root = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	885	break;
				886	default:
				887	ASSERT(0);
				888	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	889
				890	/*
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	891	* If we need to create attributes immediately after allocating the
				892	* inode, initialise an empty attribute fork right now. We use the
				893	* default fork offset for attributes here as we don't know exactly what
				894	* size or how many attributes we might be adding. We can do this
				895	* safely here because we know the data fork is completely empty and
				896	* this saves us from needing to run a separate transaction to set the
				897	* fork offset in the immediate future.
				898	*/
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	899	if (init_xattrs && xfs_has_attr(mp)) {
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	900	ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	901	ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
				902	}
				903
				904	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	905	* Log the new values stuffed into the inode.
				906	*/
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	907	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	908	xfs_trans_log_inode(tp, ip, flags);
				909
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	910	/* now that we have an i_mode we can setup the inode structure */
Christoph Hellwig	41be8be	2008-08-13 16:23:13 +1000	[diff] [blame]	911	xfs_setup_inode(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	912
				913	*ipp = ip;
				914	return 0;
				915	}
				916
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	917	/*
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	918	* Decrement the link count on an inode & log the change. If this causes the
				919	* link count to go to zero, move the inode to AGI unlinked list so that it can
				920	* be freed when the last active reference goes away via xfs_inactive().
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	921	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	922	static int /* error */
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	923	xfs_droplink(
				924	xfs_trans_t *tp,
				925	xfs_inode_t *ip)
				926	{
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	927	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				928
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	929	drop_nlink(VFS_I(ip));
				930	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				931
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	932	if (VFS_I(ip)->i_nlink)
				933	return 0;
				934
				935	return xfs_iunlink(tp, ip);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	936	}
				937
				938	/*
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	939	* Increment the link count on an inode & log the change.
				940	*/
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	941	static void
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	942	xfs_bumplink(
				943	xfs_trans_t *tp,
				944	xfs_inode_t *ip)
				945	{
				946	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				947
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	948	inc_nlink(VFS_I(ip));
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	949	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	950	}
				951
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	952	int
				953	xfs_create(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	954	struct user_namespace *mnt_userns,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	955	xfs_inode_t *dp,
				956	struct xfs_name *name,
				957	umode_t mode,
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	958	dev_t rdev,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	959	bool init_xattrs,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	960	xfs_inode_t **ipp)
				961	{
				962	int is_dir = S_ISDIR(mode);
				963	struct xfs_mount *mp = dp->i_mount;
				964	struct xfs_inode *ip = NULL;
				965	struct xfs_trans *tp = NULL;
				966	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	967	bool unlock_dp_on_error = false;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	968	prid_t prid;
				969	struct xfs_dquot *udqp = NULL;
				970	struct xfs_dquot *gdqp = NULL;
				971	struct xfs_dquot *pdqp = NULL;
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	972	struct xfs_trans_res *tres;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	973	uint resblks;
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	974	xfs_ino_t ino;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	975
				976	trace_xfs_create(dp, name);
				977
				978	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	979	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	980
Zhi Yong Wu	163467d	2013-12-18 08:22:39 +0800	[diff] [blame]	981	prid = xfs_get_initial_prid(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	982
				983	/*
				984	* Make sure that we have allocated dquot(s) on disk.
				985	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	986	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				987	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	988	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				989	&udqp, &gdqp, &pdqp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	990	if (error)
				991	return error;
				992
				993	if (is_dir) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	994	resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	995	tres = &M_RES(mp)->tr_mkdir;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	996	} else {
				997	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	998	tres = &M_RES(mp)->tr_create;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	999	}
				1000
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1001	/*
				1002	* Initially assume that the file does not exist and
				1003	* reserve the resources for that case. If that is not
				1004	* the case we'll drop the one we have and get a more
				1005	* appropriate transaction later.
				1006	*/
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1007	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				1008	&tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1009	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1010	/* flush outstanding delalloc blocks and retry */
				1011	xfs_flush_inodes(mp);
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1012	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
				1013	resblks, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1014	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1015	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1016	goto out_release_dquots;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1017
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1018	xfs_ilock(dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1019	unlock_dp_on_error = true;
				1020
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	1021	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
				1022	XFS_IEXT_DIR_MANIP_CNT(mp));
				1023	if (error)
				1024	goto out_trans_cancel;
				1025
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1026	/*
				1027	* A newly created regular or special file just has one directory
				1028	* entry pointing to them, but a directory also the "." entry
				1029	* pointing to itself.
				1030	*/
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	1031	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
				1032	if (!error)
				1033	error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
				1034	is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1035	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1036	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1037
				1038	/*
				1039	* Now we join the directory inode to the transaction. We do not do it
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	1040	* earlier because xfs_dialloc might commit the previous transaction
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1041	* (and release all the locks). An error from here on will result in
				1042	* the transaction cancel unlocking dp so don't do it explicitly in the
				1043	* error path.
				1044	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1045	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1046	unlock_dp_on_error = false;
				1047
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1048	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
Kaixu Xia	63337b6	2020-03-27 08:28:39 -0700	[diff] [blame]	1049	resblks - XFS_IALLOC_SPACE_RES(mp));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1050	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1051	ASSERT(error != -ENOSPC);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1052	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1053	}
				1054	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1055	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1056
				1057	if (is_dir) {
				1058	error = xfs_dir_init(tp, ip, dp);
				1059	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	1060	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1061
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1062	xfs_bumplink(tp, dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1063	}
				1064
				1065	/*
				1066	* If this is a synchronous mount, make sure that the
				1067	* create transaction goes to disk before returning to
				1068	* the user.
				1069	*/
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	1070	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1071	xfs_trans_set_sync(tp);
				1072
				1073	/*
				1074	* Attach the dquot(s) to the inodes and modify them incore.
				1075	* These ids of the inode couldn't have changed since the new
				1076	* inode has been locked ever since it was created.
				1077	*/
				1078	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1079
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1080	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1081	if (error)
				1082	goto out_release_inode;
				1083
				1084	xfs_qm_dqrele(udqp);
				1085	xfs_qm_dqrele(gdqp);
				1086	xfs_qm_dqrele(pdqp);
				1087
				1088	*ipp = ip;
				1089	return 0;
				1090
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1091	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1092	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1093	out_release_inode:
				1094	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1095	* Wait until after the current transaction is aborted to finish the
				1096	* setup of the inode and release the inode. This prevents recursive
				1097	* transactions and deadlocks from xfs_inactive.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1098	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1099	if (ip) {
				1100	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1101	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1102	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1103	out_release_dquots:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1104	xfs_qm_dqrele(udqp);
				1105	xfs_qm_dqrele(gdqp);
				1106	xfs_qm_dqrele(pdqp);
				1107
				1108	if (unlock_dp_on_error)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1109	xfs_iunlock(dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1110	return error;
				1111	}
				1112
				1113	int
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1114	xfs_create_tmpfile(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	1115	struct user_namespace *mnt_userns,
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1116	struct xfs_inode *dp,
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1117	umode_t mode,
				1118	struct xfs_inode **ipp)
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1119	{
				1120	struct xfs_mount *mp = dp->i_mount;
				1121	struct xfs_inode *ip = NULL;
				1122	struct xfs_trans *tp = NULL;
				1123	int error;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1124	prid_t prid;
				1125	struct xfs_dquot *udqp = NULL;
				1126	struct xfs_dquot *gdqp = NULL;
				1127	struct xfs_dquot *pdqp = NULL;
				1128	struct xfs_trans_res *tres;
				1129	uint resblks;
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	1130	xfs_ino_t ino;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1131
				1132	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1133	return -EIO;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1134
				1135	prid = xfs_get_initial_prid(dp);
				1136
				1137	/*
				1138	* Make sure that we have allocated dquot(s) on disk.
				1139	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	1140	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				1141	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	1142	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1143	&udqp, &gdqp, &pdqp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1144	if (error)
				1145	return error;
				1146
				1147	resblks = XFS_IALLOC_SPACE_RES(mp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1148	tres = &M_RES(mp)->tr_create_tmpfile;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1149
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1150	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				1151	&tp);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1152	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1153	goto out_release_dquots;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1154
Dave Chinner	b652afd	2021-06-02 10:48:24 +1000	[diff] [blame]	1155	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
				1156	if (!error)
				1157	error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode,
				1158	0, 0, prid, false, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1159	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1160	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1161
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	1162	if (xfs_has_wsync(mp))
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1163	xfs_trans_set_sync(tp);
				1164
				1165	/*
				1166	* Attach the dquot(s) to the inodes and modify them incore.
				1167	* These ids of the inode couldn't have changed since the new
				1168	* inode has been locked ever since it was created.
				1169	*/
				1170	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1171
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1172	error = xfs_iunlink(tp, ip);
				1173	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1174	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1175
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1176	error = xfs_trans_commit(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1177	if (error)
				1178	goto out_release_inode;
				1179
				1180	xfs_qm_dqrele(udqp);
				1181	xfs_qm_dqrele(gdqp);
				1182	xfs_qm_dqrele(pdqp);
				1183
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1184	*ipp = ip;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1185	return 0;
				1186
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1187	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1188	xfs_trans_cancel(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1189	out_release_inode:
				1190	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1191	* Wait until after the current transaction is aborted to finish the
				1192	* setup of the inode and release the inode. This prevents recursive
				1193	* transactions and deadlocks from xfs_inactive.
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1194	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1195	if (ip) {
				1196	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1197	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1198	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1199	out_release_dquots:
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1200	xfs_qm_dqrele(udqp);
				1201	xfs_qm_dqrele(gdqp);
				1202	xfs_qm_dqrele(pdqp);
				1203
				1204	return error;
				1205	}
				1206
				1207	int
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1208	xfs_link(
				1209	xfs_inode_t *tdp,
				1210	xfs_inode_t *sip,
				1211	struct xfs_name *target_name)
				1212	{
				1213	xfs_mount_t *mp = tdp->i_mount;
				1214	xfs_trans_t *tp;
				1215	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1216	int resblks;
				1217
				1218	trace_xfs_link(tdp, target_name);
				1219
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1220	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1221
				1222	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1223	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1224
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1225	error = xfs_qm_dqattach(sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1226	if (error)
				1227	goto std_return;
				1228
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1229	error = xfs_qm_dqattach(tdp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1230	if (error)
				1231	goto std_return;
				1232
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1233	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1234	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1235	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1236	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1237	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1238	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1239	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1240	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1241
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	1242	xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1243
				1244	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1245	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1246
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	1247	error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
				1248	XFS_IEXT_DIR_MANIP_CNT(mp));
				1249	if (error)
				1250	goto error_return;
				1251
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1252	/*
				1253	* If we are using project inheritance, we only allow hard link
				1254	* creation in our tree when the project IDs are the same; else
				1255	* the tree quota mechanism could be circumvented.
				1256	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	1257	if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	1258	tdp->i_projid != sip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1259	error = -EXDEV;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1260	goto error_return;
				1261	}
				1262
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	1263	if (!resblks) {
				1264	error = xfs_dir_canenter(tp, tdp, target_name);
				1265	if (error)
				1266	goto error_return;
				1267	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1268
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1269	/*
				1270	* Handle initial link state of O_TMPFILE inode
				1271	*/
				1272	if (VFS_I(sip)->i_nlink == 0) {
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	1273	struct xfs_perag *pag;
				1274
				1275	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino));
				1276	error = xfs_iunlink_remove(tp, pag, sip);
				1277	xfs_perag_put(pag);
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1278	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1279	goto error_return;
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1280	}
				1281
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1282	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1283	resblks);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1284	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1285	goto error_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1286	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1287	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				1288
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1289	xfs_bumplink(tp, sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1290
				1291	/*
				1292	* If this is a synchronous mount, make sure that the
				1293	* link transaction goes to disk before returning to
				1294	* the user.
				1295	*/
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	1296	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1297	xfs_trans_set_sync(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1298
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1299	return xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1300
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1301	error_return:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1302	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1303	std_return:
				1304	return error;
				1305	}
				1306
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1307	/* Clear the reflink flag and the cowblocks tag if possible. */
				1308	static void
				1309	xfs_itruncate_clear_reflink_flags(
				1310	struct xfs_inode *ip)
				1311	{
				1312	struct xfs_ifork *dfork;
				1313	struct xfs_ifork *cfork;
				1314
				1315	if (!xfs_is_reflink_inode(ip))
				1316	return;
				1317	dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
				1318	cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
				1319	if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	1320	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1321	if (cfork->if_bytes == 0)
				1322	xfs_inode_clear_cowblocks_tag(ip);
				1323	}
				1324
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1325	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1326	* Free up the underlying blocks past new_size. The new size must be smaller
				1327	* than the current size. This routine can be used both for the attribute and
				1328	* data fork, and does not modify the inode size, which is left to the caller.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1329	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1330	* The transaction passed to this routine must have made a permanent log
				1331	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
				1332	* given transaction and start new ones, so make sure everything involved in
				1333	* the transaction is tidy before calling here. Some transaction will be
				1334	* returned to the caller to be committed. The incoming transaction must
				1335	* already include the inode, and both inode locks must be held exclusively.
				1336	* The inode must also be "held" within the transaction. On return the inode
				1337	* will be "held" within the returned transaction. This routine does NOT
				1338	* require any disk space to be reserved for it within the transaction.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1339	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1340	* If we get an error, we must return with the inode locked and linked into the
				1341	* current transaction. This keeps things simple for the higher level code,
				1342	* because it always knows that the inode is locked and held in the transaction
				1343	* that returns to it whether errors occur or not. We don't mark the inode
				1344	* dirty on error so that transactions can be easily aborted if possible.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1345	*/
				1346	int
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1347	xfs_itruncate_extents_flags(
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1348	struct xfs_trans **tpp,
				1349	struct xfs_inode *ip,
				1350	int whichfork,
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1351	xfs_fsize_t new_size,
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1352	int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1353	{
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1354	struct xfs_mount *mp = ip->i_mount;
				1355	struct xfs_trans tp = tpp;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1356	xfs_fileoff_t first_unmap_block;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1357	xfs_filblks_t unmap_len;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1358	int error = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1359
Christoph Hellwig	0b56185	2012-07-04 11:13:31 -0400	[diff] [blame]	1360	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1361	ASSERT(!atomic_read(&VFS_I(ip)->i_count) \|\|
				1362	xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	1363	ASSERT(new_size <= XFS_ISIZE(ip));
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1364	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1365	ASSERT(ip->i_itemp != NULL);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1366	ASSERT(ip->i_itemp->ili_lock_flags == 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1367	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1368
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1369	trace_xfs_itruncate_extents_start(ip, new_size);
				1370
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1371	flags \|= xfs_bmapi_aflag(whichfork);
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1372
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1373	/*
				1374	* Since it is possible for space to become allocated beyond
				1375	* the end of the file (in a crash where the space is allocated
				1376	* but the inode size is not yet updated), simply remove any
				1377	* blocks which show up between the new EOF and the maximum
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1378	* possible file size.
				1379	*
				1380	* We have to free all the blocks to the bmbt maximum offset, even if
				1381	* the page cache can't scale that far.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1382	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1383	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
Darrick J. Wong	33005fd	2020-12-04 13:28:35 -0800	[diff] [blame]	1384	if (!xfs_verify_fileoff(mp, first_unmap_block)) {
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1385	WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1386	return 0;
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1387	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1388
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1389	unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
				1390	while (unmap_len > 0) {
Brian Foster	02dff7b	2018-07-24 13:43:07 -0700	[diff] [blame]	1391	ASSERT(tp->t_firstblock == NULLFSBLOCK);
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1392	error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
				1393	flags, XFS_ITRUNC_MAX_EXTENTS);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1394	if (error)
Brian Foster	d5a2e28	2018-09-29 13:41:58 +1000	[diff] [blame]	1395	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1396
Brian Foster	6dd379c	2020-09-15 20:44:46 -0700	[diff] [blame]	1397	/* free the just unmapped extents */
Brian Foster	9e28a24	2018-07-24 13:43:15 -0700	[diff] [blame]	1398	error = xfs_defer_finish(&tp);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1399	if (error)
Brian Foster	9b1f4e9	2018-08-01 07:20:33 -0700	[diff] [blame]	1400	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1402
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1403	if (whichfork == XFS_DATA_FORK) {
				1404	/* Remove all pending CoW reservations. */
				1405	error = xfs_reflink_cancel_cow_blocks(ip, &tp,
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1406	first_unmap_block, XFS_MAX_FILEOFF, true);
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1407	if (error)
				1408	goto out;
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1409
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1410	xfs_itruncate_clear_reflink_flags(ip);
				1411	}
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1412
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1413	/*
				1414	* Always re-log the inode so that our permanent transaction can keep
				1415	* on rolling it forward in the log.
				1416	*/
				1417	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1418
				1419	trace_xfs_itruncate_extents_end(ip, new_size);
				1420
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1421	out:
				1422	*tpp = tp;
				1423	return error;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1424	}
				1425
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1426	int
				1427	xfs_release(
				1428	xfs_inode_t *ip)
				1429	{
				1430	xfs_mount_t *mp = ip->i_mount;
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1431	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1432
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1433	if (!S_ISREG(VFS_I(ip)->i_mode) \|\| (VFS_I(ip)->i_mode == 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1434	return 0;
				1435
				1436	/* If this is a read-only mount, don't do this (would generate I/O) */
				1437	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1438	return 0;
				1439
				1440	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1441	int truncated;
				1442
				1443	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1444	* If we previously truncated this file and removed old data
				1445	* in the process, we want to initiate "early" writeout on
				1446	* the last close. This is an attempt to combat the notorious
				1447	* NULL files problem which is particularly noticeable from a
				1448	* truncate down, buffered (re-)write (delalloc), followed by
				1449	* a crash. What we are effectively doing here is
				1450	* significantly reducing the time window where we'd otherwise
				1451	* be exposed to that problem.
				1452	*/
				1453	truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
				1454	if (truncated) {
				1455	xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
Dave Chinner	eac152b	2014-08-04 13:22:49 +1000	[diff] [blame]	1456	if (ip->i_delayed_blks > 0) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1457	error = filemap_flush(VFS_I(ip)->i_mapping);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1458	if (error)
				1459	return error;
				1460	}
				1461	}
				1462	}
				1463
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1464	if (VFS_I(ip)->i_nlink == 0)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1465	return 0;
				1466
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1467	/*
				1468	* If we can't get the iolock just skip truncating the blocks past EOF
				1469	* because we could deadlock with the mmap_lock otherwise. We'll get
				1470	* another chance to drop them once the last reference to the inode is
				1471	* dropped, so we'll never leak blocks permanently.
				1472	*/
				1473	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
				1474	return 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1475
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1476	if (xfs_can_free_eofblocks(ip, false)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1477	/*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1478	* Check if the inode is being opened, written and closed
				1479	* frequently and we have delayed allocation blocks outstanding
				1480	* (e.g. streaming writes from the NFS server), truncating the
				1481	* blocks past EOF will cause fragmentation to occur.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1482	*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1483	* In this case don't do the truncation, but we have to be
				1484	* careful how we detect this case. Blocks beyond EOF show up as
				1485	* i_delayed_blks even when the inode is clean, so we need to
				1486	* truncate them away first before checking for a dirty release.
				1487	* Hence on the first dirty close we will still remove the
				1488	* speculative allocation, but after that we will leave it in
				1489	* place.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1490	*/
				1491	if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1492	goto out_unlock;
				1493
				1494	error = xfs_free_eofblocks(ip);
				1495	if (error)
				1496	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1497
				1498	/* delalloc blocks after truncation means it really is dirty */
				1499	if (ip->i_delayed_blks)
				1500	xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
				1501	}
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1502
				1503	out_unlock:
				1504	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1505	return error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1506	}
				1507
				1508	/*
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1509	* xfs_inactive_truncate
				1510	*
				1511	* Called to perform a truncate when an inode becomes unlinked.
				1512	*/
				1513	STATIC int
				1514	xfs_inactive_truncate(
				1515	struct xfs_inode *ip)
				1516	{
				1517	struct xfs_mount *mp = ip->i_mount;
				1518	struct xfs_trans *tp;
				1519	int error;
				1520
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1521	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1522	if (error) {
				1523	ASSERT(XFS_FORCED_SHUTDOWN(mp));
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1524	return error;
				1525	}
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1526	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1527	xfs_trans_ijoin(tp, ip, 0);
				1528
				1529	/*
				1530	* Log the inode size first to prevent stale data exposure in the event
				1531	* of a system crash before the truncate completes. See the related
Jan Kara	69bca80	2016-05-26 14:46:43 +0200	[diff] [blame]	1532	* comment in xfs_vn_setattr_size() for details.
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1533	*/
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1534	ip->i_disk_size = 0;
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1535	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1536
				1537	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
				1538	if (error)
				1539	goto error_trans_cancel;
				1540
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1541	ASSERT(ip->i_df.if_nextents == 0);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1542
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1543	error = xfs_trans_commit(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1544	if (error)
				1545	goto error_unlock;
				1546
				1547	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1548	return 0;
				1549
				1550	error_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1551	xfs_trans_cancel(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1552	error_unlock:
				1553	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1554	return error;
				1555	}
				1556
				1557	/*
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1558	* xfs_inactive_ifree()
				1559	*
				1560	* Perform the inode free when an inode is unlinked.
				1561	*/
				1562	STATIC int
				1563	xfs_inactive_ifree(
				1564	struct xfs_inode *ip)
				1565	{
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1566	struct xfs_mount *mp = ip->i_mount;
				1567	struct xfs_trans *tp;
				1568	int error;
				1569
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1570	/*
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1571	* We try to use a per-AG reservation for any block needed by the finobt
				1572	* tree, but as the finobt feature predates the per-AG reservation
				1573	* support a degraded file system might not have enough space for the
				1574	* reservation at mount time. In that case try to dip into the reserved
				1575	* pool and pray.
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1576	*
				1577	* Send a warning if the reservation does happen to fail, as the inode
				1578	* now remains allocated and sits on the unlinked list until the fs is
				1579	* repaired.
				1580	*/
Darrick J. Wong	e1f6ca1	2019-02-14 09:33:15 -0800	[diff] [blame]	1581	if (unlikely(mp->m_finobt_nores)) {
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1582	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
				1583	XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
				1584	&tp);
				1585	} else {
				1586	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
				1587	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1588	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1589	if (error == -ENOSPC) {
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1590	xfs_warn_ratelimited(mp,
				1591	"Failed to remove inode(s) from unlinked list. "
				1592	"Please free space, unmount and run xfs_repair.");
				1593	} else {
				1594	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1595	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1596	return error;
				1597	}
				1598
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1599	/*
				1600	* We do not hold the inode locked across the entire rolling transaction
				1601	* here. We only need to hold it for the first transaction that
				1602	* xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
				1603	* underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
				1604	* here breaks the relationship between cluster buffer invalidation and
				1605	* stale inode invalidation on cluster buffer item journal commit
				1606	* completion, and can result in leaving dirty stale inodes hanging
				1607	* around in memory.
				1608	*
				1609	* We have no need for serialising this inode operation against other
				1610	* operations - we freed the inode and hence reallocation is required
				1611	* and that will serialise on reallocating the space the deferops need
				1612	* to free. Hence we can unlock the inode on the first commit of
				1613	* the transaction rather than roll it right through the deferops. This
				1614	* avoids relogging the XFS_ISTALE inode.
				1615	*
				1616	* We check that xfs_ifree() hasn't grown an internal transaction roll
				1617	* by asserting that the inode is still locked when it returns.
				1618	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1619	xfs_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1620	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1621
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	1622	error = xfs_ifree(tp, ip);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1623	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1624	if (error) {
				1625	/*
				1626	* If we fail to free the inode, shut down. The cancel
				1627	* might do that, we need to make sure. Otherwise the
				1628	* inode might be lost for a long time or forever.
				1629	*/
				1630	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1631	xfs_notice(mp, "%s: xfs_ifree returned error %d",
				1632	__func__, error);
				1633	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
				1634	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1635	xfs_trans_cancel(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1636	return error;
				1637	}
				1638
				1639	/*
				1640	* Credit the quota account(s). The inode is gone.
				1641	*/
				1642	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1643
				1644	/*
Brian Foster	d4a97a0	2015-08-19 10:01:40 +1000	[diff] [blame]	1645	* Just ignore errors at this point. There is nothing we can do except
				1646	* to try to keep going. Make sure it's not a silent error.
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1647	*/
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1648	error = xfs_trans_commit(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1649	if (error)
				1650	xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
				1651	__func__, error);
				1652
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1653	return 0;
				1654	}
				1655
				1656	/*
Darrick J. Wong	62af7d5	2021-08-06 11:05:39 -0700	[diff] [blame]	1657	* Returns true if we need to update the on-disk metadata before we can free
				1658	* the memory used by this inode. Updates include freeing post-eof
				1659	* preallocations; freeing COW staging extents; and marking the inode free in
				1660	* the inobt if it is on the unlinked list.
				1661	*/
				1662	bool
				1663	xfs_inode_needs_inactive(
				1664	struct xfs_inode *ip)
				1665	{
				1666	struct xfs_mount *mp = ip->i_mount;
				1667	struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
				1668
				1669	/*
				1670	* If the inode is already free, then there can be nothing
				1671	* to clean up here.
				1672	*/
				1673	if (VFS_I(ip)->i_mode == 0)
				1674	return false;
				1675
				1676	/* If this is a read-only mount, don't do this (would generate I/O) */
				1677	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1678	return false;
				1679
				1680	/* If the log isn't running, push inodes straight to reclaim. */
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	1681	if (XFS_FORCED_SHUTDOWN(mp) \|\| xfs_has_norecovery(mp))
Darrick J. Wong	62af7d5	2021-08-06 11:05:39 -0700	[diff] [blame]	1682	return false;
				1683
				1684	/* Metadata inodes require explicit resource cleanup. */
				1685	if (xfs_is_metadata_inode(ip))
				1686	return false;
				1687
				1688	/* Want to clean out the cow blocks if there are any. */
				1689	if (cow_ifp && cow_ifp->if_bytes > 0)
				1690	return true;
				1691
				1692	/* Unlinked files must be freed. */
				1693	if (VFS_I(ip)->i_nlink == 0)
				1694	return true;
				1695
				1696	/*
				1697	* This file isn't being freed, so check if there are post-eof blocks
				1698	* to free. @force is true because we are evicting an inode from the
				1699	* cache. Post-eof blocks must be freed, lest we end up with broken
				1700	* free space accounting.
				1701	*
				1702	* Note: don't bother with iolock here since lockdep complains about
				1703	* acquiring it in reclaim context. We have the only reference to the
				1704	* inode at this point anyways.
				1705	*/
				1706	return xfs_can_free_eofblocks(ip, true);
				1707	}
				1708
				1709	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1710	* xfs_inactive
				1711	*
				1712	* This is called when the vnode reference count for the vnode
				1713	* goes to zero. If the file has been unlinked, then it must
				1714	* now be truncated. Also, we clear all of the read-ahead state
				1715	* kept for the inode here since the file is now closed.
				1716	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1717	void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1718	xfs_inactive(
				1719	xfs_inode_t *ip)
				1720	{
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1721	struct xfs_mount *mp;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1722	int error;
				1723	int truncate = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1724
				1725	/*
				1726	* If the inode is already free, then there can be nothing
				1727	* to clean up here.
				1728	*/
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1729	if (VFS_I(ip)->i_mode == 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1730	ASSERT(ip->i_df.if_broot_bytes == 0);
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1731	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1732	}
				1733
				1734	mp = ip->i_mount;
Darrick J. Wong	17c12bc	2016-10-03 09:11:29 -0700	[diff] [blame]	1735	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1736
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1737	/* If this is a read-only mount, don't do this (would generate I/O) */
				1738	if (mp->m_flags & XFS_MOUNT_RDONLY)
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1739	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1740
Darrick J. Wong	383e32b	2021-03-22 09:51:54 -0700	[diff] [blame]	1741	/* Metadata inodes require explicit resource cleanup. */
				1742	if (xfs_is_metadata_inode(ip))
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1743	goto out;
Darrick J. Wong	383e32b	2021-03-22 09:51:54 -0700	[diff] [blame]	1744
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1745	/* Try to clean out the cow blocks if there are any. */
Christoph Hellwig	51d6269	2018-07-17 16:51:51 -0700	[diff] [blame]	1746	if (xfs_inode_has_cow_data(ip))
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1747	xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
				1748
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1749	if (VFS_I(ip)->i_nlink != 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1750	/*
				1751	* force is true because we are evicting an inode from the
				1752	* cache. Post-eof blocks must be freed, lest we end up with
				1753	* broken free space accounting.
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1754	*
				1755	* Note: don't bother with iolock here since lockdep complains
				1756	* about acquiring it in reclaim context. We have the only
				1757	* reference to the inode at this point anyways.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1758	*/
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1759	if (xfs_can_free_eofblocks(ip, true))
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1760	xfs_free_eofblocks(ip);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1761
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1762	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1763	}
				1764
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1765	if (S_ISREG(VFS_I(ip)->i_mode) &&
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1766	(ip->i_disk_size != 0 \|\| XFS_ISIZE(ip) != 0 \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1767	ip->i_df.if_nextents > 0 \|\| ip->i_delayed_blks > 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1768	truncate = 1;
				1769
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1770	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1771	if (error)
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1772	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1773
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1774	if (S_ISLNK(VFS_I(ip)->i_mode))
Brian Foster	36b21dd	2013-09-20 11:06:09 -0400	[diff] [blame]	1775	error = xfs_inactive_symlink(ip);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1776	else if (truncate)
				1777	error = xfs_inactive_truncate(ip);
				1778	if (error)
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1779	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1780
				1781	/*
				1782	* If there are attributes associated with the file then blow them away
				1783	* now. The code calls a routine that recursively deconstructs the
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1784	* attribute fork. If also blows away the in-core attribute fork.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1785	*/
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1786	if (XFS_IFORK_Q(ip)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1787	error = xfs_attr_inactive(ip);
				1788	if (error)
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1789	goto out;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1790	}
				1791
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1792	ASSERT(!ip->i_afp);
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	1793	ASSERT(ip->i_forkoff == 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1794
				1795	/*
				1796	* Free the inode.
				1797	*/
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1798	xfs_inactive_ifree(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1799
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1800	out:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1801	/*
Darrick J. Wong	3ea06d7	2021-05-31 11:31:57 -0700	[diff] [blame]	1802	* We're done making metadata updates for this inode, so we can release
				1803	* the attached dquots.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1804	*/
				1805	xfs_qm_dqdetach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1806	}
				1807
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1808	/*
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1809	* In-Core Unlinked List Lookups
				1810	* =============================
				1811	*
				1812	* Every inode is supposed to be reachable from some other piece of metadata
				1813	* with the exception of the root directory. Inodes with a connection to a
				1814	* file descriptor but not linked from anywhere in the on-disk directory tree
				1815	* are collectively known as unlinked inodes, though the filesystem itself
				1816	* maintains links to these inodes so that on-disk metadata are consistent.
				1817	*
				1818	* XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
				1819	* header contains a number of buckets that point to an inode, and each inode
				1820	* record has a pointer to the next inode in the hash chain. This
				1821	* singly-linked list causes scaling problems in the iunlink remove function
				1822	* because we must walk that list to find the inode that points to the inode
				1823	* being removed from the unlinked hash bucket list.
				1824	*
				1825	* What if we modelled the unlinked list as a collection of records capturing
				1826	* "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd
				1827	* have a fast way to look up unlinked list predecessors, which avoids the
				1828	* slow list walk. That's exactly what we do here (in-core) with a per-AG
				1829	* rhashtable.
				1830	*
				1831	* Because this is a backref cache, we ignore operational failures since the
				1832	* iunlink code can fall back to the slow bucket walk. The only errors that
				1833	* should bubble out are for obviously incorrect situations.
				1834	*
				1835	* All users of the backref cache MUST hold the AGI buffer lock to serialize
				1836	* access or have otherwise provided for concurrency control.
				1837	*/
				1838
				1839	/* Capture a "X.next_unlinked = Y" relationship. */
				1840	struct xfs_iunlink {
				1841	struct rhash_head iu_rhash_head;
				1842	xfs_agino_t iu_agino; /* X */
				1843	xfs_agino_t iu_next_unlinked; /* Y */
				1844	};
				1845
				1846	/* Unlinked list predecessor lookup hashtable construction */
				1847	static int
				1848	xfs_iunlink_obj_cmpfn(
				1849	struct rhashtable_compare_arg *arg,
				1850	const void *obj)
				1851	{
				1852	const xfs_agino_t *key = arg->key;
				1853	const struct xfs_iunlink *iu = obj;
				1854
				1855	if (iu->iu_next_unlinked != *key)
				1856	return 1;
				1857	return 0;
				1858	}
				1859
				1860	static const struct rhashtable_params xfs_iunlink_hash_params = {
				1861	.min_size = XFS_AGI_UNLINKED_BUCKETS,
				1862	.key_len = sizeof(xfs_agino_t),
				1863	.key_offset = offsetof(struct xfs_iunlink,
				1864	iu_next_unlinked),
				1865	.head_offset = offsetof(struct xfs_iunlink, iu_rhash_head),
				1866	.automatic_shrinking = true,
				1867	.obj_cmpfn = xfs_iunlink_obj_cmpfn,
				1868	};
				1869
				1870	/*
				1871	* Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such
				1872	* relation is found.
				1873	*/
				1874	static xfs_agino_t
				1875	xfs_iunlink_lookup_backref(
				1876	struct xfs_perag *pag,
				1877	xfs_agino_t agino)
				1878	{
				1879	struct xfs_iunlink *iu;
				1880
				1881	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1882	xfs_iunlink_hash_params);
				1883	return iu ? iu->iu_agino : NULLAGINO;
				1884	}
				1885
				1886	/*
				1887	* Take ownership of an iunlink cache entry and insert it into the hash table.
				1888	* If successful, the entry will be owned by the cache; if not, it is freed.
				1889	* Either way, the caller does not own @iu after this call.
				1890	*/
				1891	static int
				1892	xfs_iunlink_insert_backref(
				1893	struct xfs_perag *pag,
				1894	struct xfs_iunlink *iu)
				1895	{
				1896	int error;
				1897
				1898	error = rhashtable_insert_fast(&pag->pagi_unlinked_hash,
				1899	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1900	/*
				1901	* Fail loudly if there already was an entry because that's a sign of
				1902	* corruption of in-memory data. Also fail loudly if we see an error
				1903	* code we didn't anticipate from the rhashtable code. Currently we
				1904	* only anticipate ENOMEM.
				1905	*/
				1906	if (error) {
				1907	WARN(error != -ENOMEM, "iunlink cache insert error %d", error);
				1908	kmem_free(iu);
				1909	}
				1910	/*
				1911	* Absorb any runtime errors that aren't a result of corruption because
				1912	* this is a cache and we can always fall back to bucket list scanning.
				1913	*/
				1914	if (error != 0 && error != -EEXIST)
				1915	error = 0;
				1916	return error;
				1917	}
				1918
				1919	/* Remember that @prev_agino.next_unlinked = @this_agino. */
				1920	static int
				1921	xfs_iunlink_add_backref(
				1922	struct xfs_perag *pag,
				1923	xfs_agino_t prev_agino,
				1924	xfs_agino_t this_agino)
				1925	{
				1926	struct xfs_iunlink *iu;
				1927
				1928	if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
				1929	return 0;
				1930
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	1931	iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1932	iu->iu_agino = prev_agino;
				1933	iu->iu_next_unlinked = this_agino;
				1934
				1935	return xfs_iunlink_insert_backref(pag, iu);
				1936	}
				1937
				1938	/*
				1939	* Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked.
				1940	* If @next_unlinked is NULLAGINO, we drop the backref and exit. If there
				1941	* wasn't any such entry then we don't bother.
				1942	*/
				1943	static int
				1944	xfs_iunlink_change_backref(
				1945	struct xfs_perag *pag,
				1946	xfs_agino_t agino,
				1947	xfs_agino_t next_unlinked)
				1948	{
				1949	struct xfs_iunlink *iu;
				1950	int error;
				1951
				1952	/* Look up the old entry; if there wasn't one then exit. */
				1953	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1954	xfs_iunlink_hash_params);
				1955	if (!iu)
				1956	return 0;
				1957
				1958	/*
				1959	* Remove the entry. This shouldn't ever return an error, but if we
				1960	* couldn't remove the old entry we don't want to add it again to the
				1961	* hash table, and if the entry disappeared on us then someone's
				1962	* violated the locking rules and we need to fail loudly. Either way
				1963	* we cannot remove the inode because internal state is or would have
				1964	* been corrupt.
				1965	*/
				1966	error = rhashtable_remove_fast(&pag->pagi_unlinked_hash,
				1967	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1968	if (error)
				1969	return error;
				1970
				1971	/* If there is no new next entry just free our item and return. */
				1972	if (next_unlinked == NULLAGINO) {
				1973	kmem_free(iu);
				1974	return 0;
				1975	}
				1976
				1977	/* Update the entry and re-add it to the hash table. */
				1978	iu->iu_next_unlinked = next_unlinked;
				1979	return xfs_iunlink_insert_backref(pag, iu);
				1980	}
				1981
				1982	/* Set up the in-core predecessor structures. */
				1983	int
				1984	xfs_iunlink_init(
				1985	struct xfs_perag *pag)
				1986	{
				1987	return rhashtable_init(&pag->pagi_unlinked_hash,
				1988	&xfs_iunlink_hash_params);
				1989	}
				1990
				1991	/* Free the in-core predecessor structures. */
				1992	static void
				1993	xfs_iunlink_free_item(
				1994	void *ptr,
				1995	void *arg)
				1996	{
				1997	struct xfs_iunlink *iu = ptr;
				1998	bool *freed_anything = arg;
				1999
				2000	*freed_anything = true;
				2001	kmem_free(iu);
				2002	}
				2003
				2004	void
				2005	xfs_iunlink_destroy(
				2006	struct xfs_perag *pag)
				2007	{
				2008	bool freed_anything = false;
				2009
				2010	rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
				2011	xfs_iunlink_free_item, &freed_anything);
				2012
				2013	ASSERT(freed_anything == false \|\| XFS_FORCED_SHUTDOWN(pag->pag_mount));
				2014	}
				2015
				2016	/*
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2017	* Point the AGI unlinked bucket at an inode and log the results. The caller
				2018	* is responsible for validating the old value.
				2019	*/
				2020	STATIC int
				2021	xfs_iunlink_update_bucket(
				2022	struct xfs_trans *tp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2023	struct xfs_perag *pag,
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2024	struct xfs_buf *agibp,
				2025	unsigned int bucket_index,
				2026	xfs_agino_t new_agino)
				2027	{
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2028	struct xfs_agi *agi = agibp->b_addr;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2029	xfs_agino_t old_value;
				2030	int offset;
				2031
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2032	ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino));
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2033
				2034	old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2035	trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2036	old_value, new_agino);
				2037
				2038	/*
				2039	* We should never find the head of the list already set to the value
				2040	* passed in because either we're adding or removing ourselves from the
				2041	* head of the list.
				2042	*/
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2043	if (old_value == new_agino) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	2044	xfs_buf_mark_corrupt(agibp);
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2045	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2046	}
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2047
				2048	agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
				2049	offset = offsetof(struct xfs_agi, agi_unlinked) +
				2050	(sizeof(xfs_agino_t) * bucket_index);
				2051	xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
				2052	return 0;
				2053	}
				2054
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2055	/* Set an on-disk inode's next_unlinked pointer. */
				2056	STATIC void
				2057	xfs_iunlink_update_dinode(
				2058	struct xfs_trans *tp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2059	struct xfs_perag *pag,
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2060	xfs_agino_t agino,
				2061	struct xfs_buf *ibp,
				2062	struct xfs_dinode *dip,
				2063	struct xfs_imap *imap,
				2064	xfs_agino_t next_agino)
				2065	{
				2066	struct xfs_mount *mp = tp->t_mountp;
				2067	int offset;
				2068
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2069	ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2070
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2071	trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino,
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2072	be32_to_cpu(dip->di_next_unlinked), next_agino);
				2073
				2074	dip->di_next_unlinked = cpu_to_be32(next_agino);
				2075	offset = imap->im_boffset +
				2076	offsetof(struct xfs_dinode, di_next_unlinked);
				2077
				2078	/* need to recalc the inode CRC if appropriate */
				2079	xfs_dinode_calc_crc(mp, dip);
				2080	xfs_trans_inode_buf(tp, ibp);
				2081	xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2082	}
				2083
				2084	/* Set an in-core inode's unlinked pointer and return the old value. */
				2085	STATIC int
				2086	xfs_iunlink_update_inode(
				2087	struct xfs_trans *tp,
				2088	struct xfs_inode *ip,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2089	struct xfs_perag *pag,
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2090	xfs_agino_t next_agino,
				2091	xfs_agino_t *old_next_agino)
				2092	{
				2093	struct xfs_mount *mp = tp->t_mountp;
				2094	struct xfs_dinode *dip;
				2095	struct xfs_buf *ibp;
				2096	xfs_agino_t old_value;
				2097	int error;
				2098
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2099	ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino));
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2100
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2101	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2102	if (error)
				2103	return error;
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2104	dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2105
				2106	/* Make sure the old pointer isn't garbage. */
				2107	old_value = be32_to_cpu(dip->di_next_unlinked);
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2108	if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2109	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
				2110	sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2111	error = -EFSCORRUPTED;
				2112	goto out;
				2113	}
				2114
				2115	/*
				2116	* Since we're updating a linked list, we should never find that the
				2117	* current pointer is the same as the new value, unless we're
				2118	* terminating the list.
				2119	*/
				2120	*old_next_agino = old_value;
				2121	if (old_value == next_agino) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2122	if (next_agino != NULLAGINO) {
				2123	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
				2124	dip, sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2125	error = -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2126	}
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2127	goto out;
				2128	}
				2129
				2130	/* Ok, update the new pointer. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2131	xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2132	ibp, dip, &ip->i_imap, next_agino);
				2133	return 0;
				2134	out:
				2135	xfs_trans_brelse(tp, ibp);
				2136	return error;
				2137	}
				2138
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2139	/*
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2140	* This is called when the inode's link count has gone to 0 or we are creating
				2141	* a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2142	*
				2143	* We place the on-disk inode on a list in the AGI. It will be pulled from this
				2144	* list when the inode is freed.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2145	*/
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2146	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2147	xfs_iunlink(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2148	struct xfs_trans *tp,
				2149	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2150	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2151	struct xfs_mount *mp = tp->t_mountp;
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2152	struct xfs_perag *pag;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2153	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2154	struct xfs_buf *agibp;
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2155	xfs_agino_t next_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2156	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2157	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2158	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2159
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2160	ASSERT(VFS_I(ip)->i_nlink == 0);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2161	ASSERT(VFS_I(ip)->i_mode != 0);
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2162	trace_xfs_iunlink(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2163
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2164	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
				2165
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2166	/* Get the agi buffer first. It ensures lock ordering on the list. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2167	error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
Vlad Apostolov	859d718	2007-10-11 17:44:18 +1000	[diff] [blame]	2168	if (error)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2169	goto out;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2170	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2171
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2172	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2173	* Get the index into the agi hash table for the list this inode will
				2174	* go on. Make sure the pointer isn't garbage and that this inode
				2175	* isn't already on the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2176	*/
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2177	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				2178	if (next_agino == agino \|\|
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2179	!xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	2180	xfs_buf_mark_corrupt(agibp);
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2181	error = -EFSCORRUPTED;
				2182	goto out;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2183	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2184
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2185	if (next_agino != NULLAGINO) {
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2186	xfs_agino_t old_agino;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2187
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2188	/*
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2189	* There is already another inode in the bucket, so point this
				2190	* inode to the current head of the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2191	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2192	error = xfs_iunlink_update_inode(tp, ip, pag, next_agino,
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2193	&old_agino);
Vlad Apostolov	c319b58	2007-11-23 16:27:51 +1100	[diff] [blame]	2194	if (error)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2195	goto out;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2196	ASSERT(old_agino == NULLAGINO);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2197
				2198	/*
				2199	* agino has been unlinked, add a backref from the next inode
				2200	* back to agino.
				2201	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2202	error = xfs_iunlink_add_backref(pag, agino, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2203	if (error)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2204	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2205	}
				2206
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2207	/* Point the head of the list to point to this inode. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2208	error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
				2209	out:
				2210	xfs_perag_put(pag);
				2211	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2212	}
				2213
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2214	/* Return the imap, dinode pointer, and buffer for an inode. */
				2215	STATIC int
				2216	xfs_iunlink_map_ino(
				2217	struct xfs_trans *tp,
				2218	xfs_agnumber_t agno,
				2219	xfs_agino_t agino,
				2220	struct xfs_imap *imap,
				2221	struct xfs_dinode **dipp,
				2222	struct xfs_buf **bpp)
				2223	{
				2224	struct xfs_mount *mp = tp->t_mountp;
				2225	int error;
				2226
				2227	imap->im_blkno = 0;
				2228	error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0);
				2229	if (error) {
				2230	xfs_warn(mp, "%s: xfs_imap returned error %d.",
				2231	__func__, error);
				2232	return error;
				2233	}
				2234
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2235	error = xfs_imap_to_bp(mp, tp, imap, bpp);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2236	if (error) {
				2237	xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
				2238	__func__, error);
				2239	return error;
				2240	}
				2241
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2242	dipp = xfs_buf_offset(bpp, imap->im_boffset);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2243	return 0;
				2244	}
				2245
				2246	/*
				2247	* Walk the unlinked chain from @head_agino until we find the inode that
				2248	* points to @target_agino. Return the inode number, map, dinode pointer,
				2249	* and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp.
				2250	*
				2251	* @tp, @pag, @head_agino, and @target_agino are input parameters.
				2252	* @agino, @imap, @dipp, and @bpp are all output parameters.
				2253	*
				2254	* Do not call this function if @target_agino is the head of the list.
				2255	*/
				2256	STATIC int
				2257	xfs_iunlink_map_prev(
				2258	struct xfs_trans *tp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2259	struct xfs_perag *pag,
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2260	xfs_agino_t head_agino,
				2261	xfs_agino_t target_agino,
				2262	xfs_agino_t *agino,
				2263	struct xfs_imap *imap,
				2264	struct xfs_dinode **dipp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2265	struct xfs_buf **bpp)
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2266	{
				2267	struct xfs_mount *mp = tp->t_mountp;
				2268	xfs_agino_t next_agino;
				2269	int error;
				2270
				2271	ASSERT(head_agino != target_agino);
				2272	*bpp = NULL;
				2273
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2274	/* See if our backref cache can find it faster. */
				2275	*agino = xfs_iunlink_lookup_backref(pag, target_agino);
				2276	if (*agino != NULLAGINO) {
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2277	error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap,
				2278	dipp, bpp);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2279	if (error)
				2280	return error;
				2281
				2282	if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino)
				2283	return 0;
				2284
				2285	/*
				2286	* If we get here the cache contents were corrupt, so drop the
				2287	* buffer and fall back to walking the bucket list.
				2288	*/
				2289	xfs_trans_brelse(tp, *bpp);
				2290	*bpp = NULL;
				2291	WARN_ON_ONCE(1);
				2292	}
				2293
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2294	trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2295
				2296	/* Otherwise, walk the entire bucket until we find it. */
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2297	next_agino = head_agino;
				2298	while (next_agino != target_agino) {
				2299	xfs_agino_t unlinked_agino;
				2300
				2301	if (*bpp)
				2302	xfs_trans_brelse(tp, *bpp);
				2303
				2304	*agino = next_agino;
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2305	error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap,
				2306	dipp, bpp);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2307	if (error)
				2308	return error;
				2309
				2310	unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked);
				2311	/*
				2312	* Make sure this pointer is valid and isn't an obvious
				2313	* infinite loop.
				2314	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2315	if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) \|\|
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2316	next_agino == unlinked_agino) {
				2317	XFS_CORRUPTION_ERROR(__func__,
				2318	XFS_ERRLEVEL_LOW, mp,
				2319	dipp, sizeof(*dipp));
				2320	error = -EFSCORRUPTED;
				2321	return error;
				2322	}
				2323	next_agino = unlinked_agino;
				2324	}
				2325
				2326	return 0;
				2327	}
				2328
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2329	/*
				2330	* Pull the on-disk inode from the AGI unlinked list.
				2331	*/
				2332	STATIC int
				2333	xfs_iunlink_remove(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2334	struct xfs_trans *tp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2335	struct xfs_perag *pag,
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2336	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2337	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2338	struct xfs_mount *mp = tp->t_mountp;
				2339	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2340	struct xfs_buf *agibp;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2341	struct xfs_buf *last_ibp;
				2342	struct xfs_dinode *last_dip = NULL;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2343	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2344	xfs_agino_t next_agino;
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2345	xfs_agino_t head_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2346	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2347	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2348
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2349	trace_xfs_iunlink_remove(ip);
				2350
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2351	/* Get the agi buffer first. It ensures lock ordering on the list. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2352	error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2353	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2354	return error;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2355	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2356
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2357	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2358	* Get the index into the agi hash table for the list this inode will
				2359	* go on. Make sure the head pointer isn't garbage.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2360	*/
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2361	head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2362	if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) {
Darrick J. Wong	d2e7366	2018-06-04 11:27:51 -0700	[diff] [blame]	2363	XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
				2364	agi, sizeof(*agi));
				2365	return -EFSCORRUPTED;
				2366	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2367
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2368	/*
				2369	* Set our inode's next_unlinked pointer to NULL and then return
				2370	* the old pointer value so that we can update whatever was previous
				2371	* to us in the list to point to whatever was next in the list.
				2372	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2373	error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino);
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2374	if (error)
				2375	return error;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2376
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2377	/*
				2378	* If there was a backref pointing from the next inode back to this
				2379	* one, remove it because we've removed this inode from the list.
				2380	*
				2381	* Later, if this inode was in the middle of the list we'll update
				2382	* this inode's backref to point from the next inode.
				2383	*/
				2384	if (next_agino != NULLAGINO) {
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2385	error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2386	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2387	return error;
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2388	}
				2389
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2390	if (head_agino != agino) {
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2391	struct xfs_imap imap;
				2392	xfs_agino_t prev_agino;
				2393
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2394	/* We need to search the list for the inode being freed. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2395	error = xfs_iunlink_map_prev(tp, pag, head_agino, agino,
				2396	&prev_agino, &imap, &last_dip, &last_ibp);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2397	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2398	return error;
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2399
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2400	/* Point the previous inode on the list to the next inode. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2401	xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp,
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2402	last_dip, &imap, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2403
				2404	/*
				2405	* Now we deal with the backref for this inode. If this inode
				2406	* pointed at a real inode, change the backref that pointed to
				2407	* us to point to our old next. If this inode was the end of
				2408	* the list, delete the backref that pointed to us. Note that
				2409	* change_backref takes care of deleting the backref if
				2410	* next_agino is NULLAGINO.
				2411	*/
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2412	return xfs_iunlink_change_backref(agibp->b_pag, agino,
				2413	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2414	}
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2415
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2416	/* Point the head of the list to the next unlinked inode. */
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2417	return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2418	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2419	}
				2420
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2421	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2422	* Look up the inode number specified and if it is not already marked XFS_ISTALE
				2423	* mark it stale. We should only find clean inodes in this lookup that aren't
				2424	* already stale.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2425	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2426	static void
				2427	xfs_ifree_mark_inode_stale(
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2428	struct xfs_perag *pag,
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2429	struct xfs_inode *free_ip,
Brian Foster	d9fdd0a	2020-04-02 08:18:57 -0700	[diff] [blame]	2430	xfs_ino_t inum)
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2431	{
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2432	struct xfs_mount *mp = pag->pag_mount;
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2433	struct xfs_inode_log_item *iip;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2434	struct xfs_inode *ip;
				2435
				2436	retry:
				2437	rcu_read_lock();
				2438	ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
				2439
				2440	/* Inode not in memory, nothing to do */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2441	if (!ip) {
				2442	rcu_read_unlock();
				2443	return;
				2444	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2445
				2446	/*
				2447	* because this is an RCU protected lookup, we could find a recently
				2448	* freed or even reallocated inode during the lookup. We need to check
				2449	* under the i_flags_lock for a valid inode here. Skip it if it is not
				2450	* valid, the wrong inode or stale.
				2451	*/
				2452	spin_lock(&ip->i_flags_lock);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2453	if (ip->i_ino != inum \|\| __xfs_iflags_test(ip, XFS_ISTALE))
				2454	goto out_iflags_unlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2455
				2456	/*
				2457	* Don't try to lock/unlock the current inode, but we _cannot_ skip the
				2458	* other inodes that we did not find in the list attached to the buffer
				2459	* and are not already marked stale. If we can't lock it, back off and
				2460	* retry.
				2461	*/
				2462	if (ip != free_ip) {
				2463	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2464	spin_unlock(&ip->i_flags_lock);
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2465	rcu_read_unlock();
				2466	delay(1);
				2467	goto retry;
				2468	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2469	}
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2470	ip->i_flags \|= XFS_ISTALE;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2471
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2472	/*
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2473	* If the inode is flushing, it is already attached to the buffer. All
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2474	* we needed to do here is mark the inode stale so buffer IO completion
				2475	* will remove it from the AIL.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2476	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2477	iip = ip->i_itemp;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2478	if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2479	ASSERT(!list_empty(&iip->ili_item.li_bio_list));
				2480	ASSERT(iip->ili_last_fields);
				2481	goto out_iunlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2482	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2483
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2484	/*
Dave Chinner	48d55e2	2020-06-29 14:49:18 -0700	[diff] [blame]	2485	* Inodes not attached to the buffer can be released immediately.
				2486	* Everything else has to go through xfs_iflush_abort() on journal
				2487	* commit as the flock synchronises removal of the inode from the
				2488	* cluster buffer against inode reclaim.
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2489	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2490	if (!iip \|\| list_empty(&iip->ili_item.li_bio_list))
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2491	goto out_iunlock;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2492
				2493	__xfs_iflags_set(ip, XFS_IFLUSHING);
				2494	spin_unlock(&ip->i_flags_lock);
				2495	rcu_read_unlock();
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2496
				2497	/* we have a dirty inode in memory that has not yet been flushed. */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2498	spin_lock(&iip->ili_lock);
				2499	iip->ili_last_fields = iip->ili_fields;
				2500	iip->ili_fields = 0;
				2501	iip->ili_fsync_fields = 0;
				2502	spin_unlock(&iip->ili_lock);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2503	ASSERT(iip->ili_last_fields);
				2504
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2505	if (ip != free_ip)
				2506	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				2507	return;
				2508
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2509	out_iunlock:
				2510	if (ip != free_ip)
				2511	xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2512	out_iflags_unlock:
				2513	spin_unlock(&ip->i_flags_lock);
				2514	rcu_read_unlock();
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2515	}
				2516
				2517	/*
Zhi Yong Wu	0b8182d	2013-08-12 03:14:59 +0000	[diff] [blame]	2518	* A big issue when freeing the inode cluster is that we _cannot_ skip any
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2519	* inodes that are in memory - they all must be marked stale and attached to
				2520	* the cluster buffer.
				2521	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2522	static int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2523	xfs_ifree_cluster(
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2524	struct xfs_trans *tp,
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2525	struct xfs_perag *pag,
				2526	struct xfs_inode *free_ip,
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2527	struct xfs_icluster *xic)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2528	{
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2529	struct xfs_mount *mp = free_ip->i_mount;
				2530	struct xfs_ino_geometry *igeo = M_IGEO(mp);
				2531	struct xfs_buf *bp;
				2532	xfs_daddr_t blkno;
				2533	xfs_ino_t inum = xic->first_ino;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2534	int nbufs;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2535	int i, j;
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2536	int ioffset;
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2537	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2538
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2539	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2540
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2541	for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2542	/*
				2543	* The allocation bitmap tells us which inodes of the chunk were
				2544	* physically allocated. Skip the cluster if an inode falls into
				2545	* a sparse region.
				2546	*/
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2547	ioffset = inum - xic->first_ino;
				2548	if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2549	ASSERT(ioffset % igeo->inodes_per_cluster == 0);
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2550	continue;
				2551	}
				2552
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2553	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
				2554	XFS_INO_TO_AGBNO(mp, inum));
				2555
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2556	/*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2557	* We obtain and lock the backing buffer first in the process
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2558	* here to ensure dirty inodes attached to the buffer remain in
				2559	* the flushing state while we mark them stale.
				2560	*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2561	* If we scan the in-memory inodes first, then buffer IO can
				2562	* complete before we get a lock on it, and hence we may fail
				2563	* to mark all the active inodes on the buffer stale.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2564	*/
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2565	error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
				2566	mp->m_bsize * igeo->blocks_per_cluster,
				2567	XBF_UNMAPPED, &bp);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2568	if (error)
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2569	return error;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2570
				2571	/*
				2572	* This buffer may not have been correctly initialised as we
				2573	* didn't read it from disk. That's not important because we are
				2574	* only using to mark the buffer as stale in the log, and to
				2575	* attach stale cached inodes on it. That means it will never be
				2576	* dispatched for IO. If it is, we want to know about it, and we
				2577	* want it to fail. We can acheive this by adding a write
				2578	* verifier to the buffer.
				2579	*/
Colin Ian King	8c4ce79	2018-12-12 08:46:20 -0800	[diff] [blame]	2580	bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2581
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2582	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2583	* Now we need to set all the cached clean inodes as XFS_ISTALE,
				2584	* too. This requires lookups, and will skip inodes that we've
				2585	* already marked XFS_ISTALE.
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2586	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2587	for (i = 0; i < igeo->inodes_per_cluster; i++)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2588	xfs_ifree_mark_inode_stale(pag, free_ip, inum + i);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2589
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2590	xfs_trans_stale_inode_buf(tp, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2591	xfs_trans_binval(tp, bp);
				2592	}
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2593	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2594	}
				2595
				2596	/*
				2597	* This is called to return an inode to the inode free list.
				2598	* The inode should already be truncated to 0 length and have
				2599	* no pages associated with it. This routine also assumes that
				2600	* the inode is already a part of the transaction.
				2601	*
				2602	* The on-disk copy of the inode will have been added to the list
				2603	* of unlinked inodes in the AGI. We need to remove the inode from
				2604	* that list atomically with respect to freeing it here.
				2605	*/
				2606	int
				2607	xfs_ifree(
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	2608	struct xfs_trans *tp,
				2609	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2610	{
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2611	struct xfs_mount *mp = ip->i_mount;
				2612	struct xfs_perag *pag;
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2613	struct xfs_icluster xic = { 0 };
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2614	struct xfs_inode_log_item *iip = ip->i_itemp;
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2615	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2616
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2617	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2618	ASSERT(VFS_I(ip)->i_nlink == 0);
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	2619	ASSERT(ip->i_df.if_nextents == 0);
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	2620	ASSERT(ip->i_disk_size == 0 \|\| !S_ISREG(VFS_I(ip)->i_mode));
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	2621	ASSERT(ip->i_nblocks == 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2622
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2623	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
				2624
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2625	/*
				2626	* Pull the on-disk inode from the AGI unlinked list.
				2627	*/
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2628	error = xfs_iunlink_remove(tp, pag, ip);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2629	if (error)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2630	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2631
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2632	error = xfs_difree(tp, pag, ip->i_ino, &xic);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2633	if (error)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2634	goto out;
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2635
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2636	/*
				2637	* Free any local-format data sitting around before we reset the
				2638	* data fork to extents format. Note that the attr fork data has
				2639	* already been freed by xfs_attr_inactive.
				2640	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2641	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2642	kmem_free(ip->i_df.if_u1.if_data);
				2643	ip->i_df.if_u1.if_data = NULL;
				2644	ip->i_df.if_bytes = 0;
				2645	}
Darrick J. Wong	98c4f78	2017-11-22 12:21:07 -0800	[diff] [blame]	2646
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2647	VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	2648	ip->i_diflags = 0;
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2649	ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	2650	ip->i_forkoff = 0; /* mark the attr fork not in use */
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2651	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	9b3beb0	2021-03-29 11:11:38 -0700	[diff] [blame]	2652	if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
				2653	xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2654
				2655	/* Don't attempt to replay owner changes for a deleted inode */
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2656	spin_lock(&iip->ili_lock);
				2657	iip->ili_fields &= ~(XFS_ILOG_AOWNER \| XFS_ILOG_DOWNER);
				2658	spin_unlock(&iip->ili_lock);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2659
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2660	/*
				2661	* Bump the generation count so no one will be confused
				2662	* by reincarnations of this inode.
				2663	*/
Dave Chinner	9e9a267	2016-02-09 16:54:58 +1100	[diff] [blame]	2664	VFS_I(ip)->i_generation++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2665	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				2666
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2667	if (xic.deleted)
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	2668	error = xfs_ifree_cluster(tp, pag, ip, &xic);
				2669	out:
				2670	xfs_perag_put(pag);
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2671	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2672	}
				2673
				2674	/*
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2675	* This is called to unpin an inode. The caller must have the inode locked
				2676	* in at least shared mode so that the buffer cannot be subsequently pinned
				2677	* once someone is waiting for it to be unpinned.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2678	*/
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2679	static void
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2680	xfs_iunpin(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2681	struct xfs_inode *ip)
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2682	{
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2683	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2684
Dave Chinner	4aaf15d	2010-03-08 11:24:07 +1100	[diff] [blame]	2685	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
				2686
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2687	/* Give the log a push to start the unpinning I/O */
Dave Chinner	5f9b4b0	2021-06-18 08:21:52 -0700	[diff] [blame]	2688	xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	2689
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2690	}
				2691
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2692	static void
				2693	__xfs_iunpin_wait(
				2694	struct xfs_inode *ip)
				2695	{
				2696	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
				2697	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
				2698
				2699	xfs_iunpin(ip);
				2700
				2701	do {
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2702	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2703	if (xfs_ipincount(ip))
				2704	io_schedule();
				2705	} while (xfs_ipincount(ip));
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2706	finish_wait(wq, &wait.wq_entry);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2707	}
				2708
Dave Chinner	777df5a	2010-02-06 12:37:26 +1100	[diff] [blame]	2709	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2710	xfs_iunpin_wait(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2711	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2712	{
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2713	if (xfs_ipincount(ip))
				2714	__xfs_iunpin_wait(ip);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2715	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2716
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2717	/*
				2718	* Removing an inode from the namespace involves removing the directory entry
				2719	* and dropping the link count on the inode. Removing the directory entry can
				2720	* result in locking an AGF (directory blocks were freed) and removing a link
				2721	* count can result in placing the inode on an unlinked list which results in
				2722	* locking an AGI.
				2723	*
				2724	* The big problem here is that we have an ordering constraint on AGF and AGI
				2725	* locking - inode allocation locks the AGI, then can allocate a new extent for
				2726	* new inodes, locking the AGF after the AGI. Similarly, freeing the inode
				2727	* removes the inode from the unlinked list, requiring that we lock the AGI
				2728	* first, and then freeing the inode can result in an inode chunk being freed
				2729	* and hence freeing disk space requiring that we lock an AGF.
				2730	*
				2731	* Hence the ordering that is imposed by other parts of the code is AGI before
				2732	* AGF. This means we cannot remove the directory entry before we drop the inode
				2733	* reference count and put it on the unlinked list as this results in a lock
				2734	* order of AGF then AGI, and this can deadlock against inode allocation and
				2735	* freeing. Therefore we must drop the link counts before we remove the
				2736	* directory entry.
				2737	*
				2738	* This is still safe from a transactional point of view - it is not until we
Darrick J. Wong	310a75a	2016-08-03 11:18:10 +1000	[diff] [blame]	2739	* get to xfs_defer_finish() that we have the possibility of multiple
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2740	* transactions in this operation. Hence as long as we remove the directory
				2741	* entry and drop the link count in the first transaction of the remove
				2742	* operation, there are no transactional constraints on the ordering here.
				2743	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2744	int
				2745	xfs_remove(
				2746	xfs_inode_t *dp,
				2747	struct xfs_name *name,
				2748	xfs_inode_t *ip)
				2749	{
				2750	xfs_mount_t *mp = dp->i_mount;
				2751	xfs_trans_t *tp = NULL;
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2752	int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2753	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2754	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2755
				2756	trace_xfs_remove(dp, name);
				2757
				2758	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2759	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2760
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2761	error = xfs_qm_dqattach(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2762	if (error)
				2763	goto std_return;
				2764
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2765	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2766	if (error)
				2767	goto std_return;
				2768
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2769	/*
				2770	* We try to get the real space reservation first,
				2771	* allowing for directory btree deletion(s) implying
				2772	* possible bmap insert(s). If we can't get the space
				2773	* reservation then we use 0 instead, and avoid the bmap
				2774	* btree insert(s) in the directory code by, if the bmap
				2775	* insert tries to happen, instead trimming the LAST
				2776	* block from the directory.
				2777	*/
				2778	resblks = XFS_REMOVE_SPACE_RES(mp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2779	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2780	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2781	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2782	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
				2783	&tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2784	}
				2785	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2786	ASSERT(error != -ENOSPC);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2787	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2788	}
				2789
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	2790	xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2791
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	2792	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2793	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2794
				2795	/*
				2796	* If we're removing a directory perform some additional validation.
				2797	*/
				2798	if (is_dir) {
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2799	ASSERT(VFS_I(ip)->i_nlink >= 2);
				2800	if (VFS_I(ip)->i_nlink != 2) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2801	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2802	goto out_trans_cancel;
				2803	}
				2804	if (!xfs_dir_isempty(ip)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2805	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2806	goto out_trans_cancel;
				2807	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2808
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2809	/* Drop the link from ip's "..". */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2810	error = xfs_droplink(tp, dp);
				2811	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2812	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2813
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2814	/* Drop the "." link from ip to self. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2815	error = xfs_droplink(tp, ip);
				2816	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2817	goto out_trans_cancel;
Darrick J. Wong	5838d03	2021-07-12 12:58:48 -0700	[diff] [blame]	2818
				2819	/*
				2820	* Point the unlinked child directory's ".." entry to the root
				2821	* directory to eliminate back-references to inodes that may
				2822	* get freed before the child directory is closed. If the fs
				2823	* gets shrunk, this can lead to dirent inode validation errors.
				2824	*/
				2825	if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
				2826	error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
				2827	tp->t_mountp->m_sb.sb_rootino, 0);
				2828	if (error)
				2829	return error;
				2830	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2831	} else {
				2832	/*
				2833	* When removing a non-directory we need to log the parent
				2834	* inode here. For a directory this is done implicitly
				2835	* by the xfs_droplink call for the ".." entry.
				2836	*/
				2837	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2838	}
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2839	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2840
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2841	/* Drop the link from dp to ip. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2842	error = xfs_droplink(tp, ip);
				2843	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2844	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2845
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2846	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2847	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2848	ASSERT(error != -ENOENT);
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	2849	goto out_trans_cancel;
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2850	}
				2851
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2852	/*
				2853	* If this is a synchronous mount, make sure that the
				2854	* remove transaction goes to disk before returning to
				2855	* the user.
				2856	*/
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	2857	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2858	xfs_trans_set_sync(tp);
				2859
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2860	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2861	if (error)
				2862	goto std_return;
				2863
Christoph Hellwig	2cd2ef6	2014-04-23 07:11:51 +1000	[diff] [blame]	2864	if (is_dir && xfs_inode_is_filestream(ip))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2865	xfs_filestream_deassociate(ip);
				2866
				2867	return 0;
				2868
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2869	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2870	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2871	std_return:
				2872	return error;
				2873	}
				2874
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2875	/*
				2876	* Enter all inodes for a rename transaction into a sorted array.
				2877	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2878	#define __XFS_SORT_INODES 5
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2879	STATIC void
				2880	xfs_sort_for_rename(
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2881	struct xfs_inode dp1, / in: old (source) directory inode */
				2882	struct xfs_inode dp2, / in: new (target) directory inode */
				2883	struct xfs_inode ip1, / in: inode of old entry */
				2884	struct xfs_inode ip2, / in: inode of new entry */
				2885	struct xfs_inode wip, / in: whiteout inode */
				2886	struct xfs_inode *i_tab,/ out: sorted array of inodes */
				2887	int num_inodes) / in/out: inodes in array */
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2888	{
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2889	int i, j;
				2890
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2891	ASSERT(*num_inodes == __XFS_SORT_INODES);
				2892	memset(i_tab, 0, num_inodes sizeof(struct xfs_inode *));
				2893
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2894	/*
				2895	* i_tab contains a list of pointers to inodes. We initialize
				2896	* the table here & we'll sort it. We will then use it to
				2897	* order the acquisition of the inode locks.
				2898	*
				2899	* Note that the table may contain duplicates. e.g., dp1 == dp2.
				2900	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2901	i = 0;
				2902	i_tab[i++] = dp1;
				2903	i_tab[i++] = dp2;
				2904	i_tab[i++] = ip1;
				2905	if (ip2)
				2906	i_tab[i++] = ip2;
				2907	if (wip)
				2908	i_tab[i++] = wip;
				2909	*num_inodes = i;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2910
				2911	/*
				2912	* Sort the elements via bubble sort. (Remember, there are at
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2913	* most 5 elements to sort, so this is adequate.)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2914	*/
				2915	for (i = 0; i < *num_inodes; i++) {
				2916	for (j = 1; j < *num_inodes; j++) {
				2917	if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2918	struct xfs_inode *temp = i_tab[j];
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2919	i_tab[j] = i_tab[j-1];
				2920	i_tab[j-1] = temp;
				2921	}
				2922	}
				2923	}
				2924	}
				2925
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2926	static int
				2927	xfs_finish_rename(
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	2928	struct xfs_trans *tp)
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2929	{
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2930	/*
				2931	* If this is a synchronous mount, make sure that the rename transaction
				2932	* goes to disk before returning to the user.
				2933	*/
Dave Chinner	0560f31	2021-08-18 18:46:52 -0700	[diff] [blame^]	2934	if (xfs_has_wsync(tp->t_mountp) \|\| xfs_has_dirsync(tp->t_mountp))
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2935	xfs_trans_set_sync(tp);
				2936
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2937	return xfs_trans_commit(tp);
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2938	}
				2939
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2940	/*
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2941	* xfs_cross_rename()
				2942	*
Bhaskar Chowdhury	0145225	2021-03-23 16:59:30 -0700	[diff] [blame]	2943	* responsible for handling RENAME_EXCHANGE flag in renameat2() syscall
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2944	*/
				2945	STATIC int
				2946	xfs_cross_rename(
				2947	struct xfs_trans *tp,
				2948	struct xfs_inode *dp1,
				2949	struct xfs_name *name1,
				2950	struct xfs_inode *ip1,
				2951	struct xfs_inode *dp2,
				2952	struct xfs_name *name2,
				2953	struct xfs_inode *ip2,
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2954	int spaceres)
				2955	{
				2956	int error = 0;
				2957	int ip1_flags = 0;
				2958	int ip2_flags = 0;
				2959	int dp2_flags = 0;
				2960
				2961	/* Swap inode number for dirent in first parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2962	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2963	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2964	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2965
				2966	/* Swap inode number for dirent in second parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2967	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2968	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2969	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2970
				2971	/*
				2972	* If we're renaming one or more directories across different parents,
				2973	* update the respective ".." entries (and link counts) to match the new
				2974	* parents.
				2975	*/
				2976	if (dp1 != dp2) {
				2977	dp2_flags = XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2978
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2979	if (S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2980	error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2981	dp1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2982	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2983	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2984
				2985	/* transfer ip2 ".." reference to dp1 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2986	if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2987	error = xfs_droplink(tp, dp2);
				2988	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2989	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2990	xfs_bumplink(tp, dp1);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2991	}
				2992
				2993	/*
				2994	* Although ip1 isn't changed here, userspace needs
				2995	* to be warned about the change, so that applications
				2996	* relying on it (like backup ones), will properly
				2997	* notify the change
				2998	*/
				2999	ip1_flags \|= XFS_ICHGTIME_CHG;
				3000	ip2_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				3001	}
				3002
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3003	if (S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3004	error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3005	dp2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3006	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3007	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3008
				3009	/* transfer ip1 ".." reference to dp2 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3010	if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3011	error = xfs_droplink(tp, dp1);
				3012	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3013	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	3014	xfs_bumplink(tp, dp2);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3015	}
				3016
				3017	/*
				3018	* Although ip2 isn't changed here, userspace needs
				3019	* to be warned about the change, so that applications
				3020	* relying on it (like backup ones), will properly
				3021	* notify the change
				3022	*/
				3023	ip1_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				3024	ip2_flags \|= XFS_ICHGTIME_CHG;
				3025	}
				3026	}
				3027
				3028	if (ip1_flags) {
				3029	xfs_trans_ichgtime(tp, ip1, ip1_flags);
				3030	xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
				3031	}
				3032	if (ip2_flags) {
				3033	xfs_trans_ichgtime(tp, ip2, ip2_flags);
				3034	xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
				3035	}
				3036	if (dp2_flags) {
				3037	xfs_trans_ichgtime(tp, dp2, dp2_flags);
				3038	xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
				3039	}
				3040	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3041	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	3042	return xfs_finish_rename(tp);
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3043
				3044	out_trans_abort:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	3045	xfs_trans_cancel(tp);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3046	return error;
				3047	}
				3048
				3049	/*
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3050	* xfs_rename_alloc_whiteout()
				3051	*
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	3052	* Return a referenced, unlinked, unlocked inode that can be used as a
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3053	* whiteout in a rename transaction. We use a tmpfile inode here so that if we
				3054	* crash between allocating the inode and linking it into the rename transaction
				3055	* recovery will free the inode and we won't leak it.
				3056	*/
				3057	static int
				3058	xfs_rename_alloc_whiteout(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3059	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3060	struct xfs_inode *dp,
				3061	struct xfs_inode **wip)
				3062	{
				3063	struct xfs_inode *tmpfile;
				3064	int error;
				3065
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3066	error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR \| WHITEOUT_MODE,
				3067	&tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3068	if (error)
				3069	return error;
				3070
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	3071	/*
				3072	* Prepare the tmpfile inode as if it were created through the VFS.
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	3073	* Complete the inode setup and flag it as linkable. nlink is already
				3074	* zero, so we can skip the drop_nlink.
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	3075	*/
Christoph Hellwig	2b3d1d4	2016-04-06 07:48:27 +1000	[diff] [blame]	3076	xfs_setup_iops(tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3077	xfs_finish_inode_setup(tmpfile);
				3078	VFS_I(tmpfile)->i_state \|= I_LINKABLE;
				3079
				3080	*wip = tmpfile;
				3081	return 0;
				3082	}
				3083
				3084	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3085	* xfs_rename
				3086	*/
				3087	int
				3088	xfs_rename(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3089	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3090	struct xfs_inode *src_dp,
				3091	struct xfs_name *src_name,
				3092	struct xfs_inode *src_ip,
				3093	struct xfs_inode *target_dp,
				3094	struct xfs_name *target_name,
				3095	struct xfs_inode *target_ip,
				3096	unsigned int flags)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3097	{
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3098	struct xfs_mount *mp = src_dp->i_mount;
				3099	struct xfs_trans *tp;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3100	struct xfs_inode wip = NULL; / whiteout inode */
				3101	struct xfs_inode *inodes[__XFS_SORT_INODES];
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	3102	int i;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3103	int num_inodes = __XFS_SORT_INODES;
Dave Chinner	2b93681	2015-03-25 15:12:30 +1100	[diff] [blame]	3104	bool new_parent = (src_dp != target_dp);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3105	bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3106	int spaceres;
				3107	int error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3108
				3109	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
				3110
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3111	if ((flags & RENAME_EXCHANGE) && !target_ip)
				3112	return -EINVAL;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3113
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3114	/*
				3115	* If we are doing a whiteout operation, allocate the whiteout inode
				3116	* we will be placing at the target and ensure the type is set
				3117	* appropriately.
				3118	*/
				3119	if (flags & RENAME_WHITEOUT) {
				3120	ASSERT(!(flags & (RENAME_NOREPLACE \| RENAME_EXCHANGE)));
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3121	error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3122	if (error)
				3123	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3124
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3125	/* setup target dirent info as whiteout */
				3126	src_name->type = XFS_DIR3_FT_CHRDEV;
				3127	}
				3128
				3129	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3130	inodes, &num_inodes);
				3131
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3132	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3133	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3134	if (error == -ENOSPC) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3135	spaceres = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3136	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
				3137	&tp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3138	}
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3139	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3140	goto out_release_wip;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3141
				3142	/*
				3143	* Attach the dquots to the inodes
				3144	*/
				3145	error = xfs_qm_vop_rename_dqattach(inodes);
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3146	if (error)
				3147	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3148
				3149	/*
				3150	* Lock all the participating inodes. Depending upon whether
				3151	* the target_name exists in the target directory, and
				3152	* whether the target directory is the same as the source
				3153	* directory, we can lock from 2 to 4 inodes.
				3154	*/
				3155	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
				3156
				3157	/*
				3158	* Join all the inodes to the transaction. From this point on,
				3159	* we can rely on either trans_commit or trans_cancel to unlock
				3160	* them.
				3161	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3162	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3163	if (new_parent)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3164	xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3165	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
				3166	if (target_ip)
				3167	xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3168	if (wip)
				3169	xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3170
				3171	/*
				3172	* If we are using project inheritance, we only allow renames
				3173	* into our tree when the project IDs are the same; else the
				3174	* tree quota mechanism would be circumvented.
				3175	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	3176	if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	3177	target_dp->i_projid != src_ip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3178	error = -EXDEV;
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3179	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3180	}
				3181
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3182	/* RENAME_EXCHANGE is unique from here on. */
				3183	if (flags & RENAME_EXCHANGE)
				3184	return xfs_cross_rename(tp, src_dp, src_name, src_ip,
				3185	target_dp, target_name, target_ip,
Brian Foster	f16dea5	2018-07-11 22:26:20 -0700	[diff] [blame]	3186	spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3187
				3188	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3189	* Check for expected errors before we dirty the transaction
				3190	* so we can return an error without a transaction abort.
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3191	*
				3192	* Extent count overflow check:
				3193	*
				3194	* From the perspective of src_dp, a rename operation is essentially a
				3195	* directory entry remove operation. Hence the only place where we check
				3196	* for extent count overflow for src_dp is in
				3197	* xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
				3198	* -ENOSPC when it detects a possible extent count overflow and in
				3199	* response, the higher layers of directory handling code do the
				3200	* following:
				3201	* 1. Data/Free blocks: XFS lets these blocks linger until a
				3202	* future remove operation removes them.
				3203	* 2. Dabtree blocks: XFS swaps the blocks with the last block in the
				3204	* Leaf space and unmaps the last block.
				3205	*
				3206	* For target_dp, there are two cases depending on whether the
				3207	* destination directory entry exists or not.
				3208	*
				3209	* When destination directory entry does not exist (i.e. target_ip ==
				3210	* NULL), extent count overflow check is performed only when transaction
				3211	* has a non-zero sized space reservation associated with it. With a
				3212	* zero-sized space reservation, XFS allows a rename operation to
				3213	* continue only when the directory has sufficient free space in its
				3214	* data/leaf/free space blocks to hold the new entry.
				3215	*
				3216	* When destination directory entry exists (i.e. target_ip != NULL), all
				3217	* we need to do is change the inode number associated with the already
				3218	* existing entry. Hence there is no need to perform an extent count
				3219	* overflow check.
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3220	*/
				3221	if (target_ip == NULL) {
				3222	/*
				3223	* If there's no space reservation, check the entry will
				3224	* fit before actually inserting it.
				3225	*/
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3226	if (!spaceres) {
				3227	error = xfs_dir_canenter(tp, target_dp, target_name);
				3228	if (error)
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3229	goto out_trans_cancel;
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3230	} else {
				3231	error = xfs_iext_count_may_overflow(target_dp,
				3232	XFS_DATA_FORK,
				3233	XFS_IEXT_DIR_MANIP_CNT(mp));
				3234	if (error)
				3235	goto out_trans_cancel;
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3236	}
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3237	} else {
				3238	/*
				3239	* If target exists and it's a directory, check that whether
				3240	* it can be destroyed.
				3241	*/
				3242	if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
				3243	(!xfs_dir_isempty(target_ip) \|\|
				3244	(VFS_I(target_ip)->i_nlink > 2))) {
				3245	error = -EEXIST;
				3246	goto out_trans_cancel;
				3247	}
				3248	}
				3249
				3250	/*
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	3251	* Lock the AGI buffers we need to handle bumping the nlink of the
				3252	* whiteout inode off the unlinked list and to handle dropping the
				3253	* nlink of the target inode. Per locking order rules, do this in
				3254	* increasing AG order and before directory block allocation tries to
				3255	* grab AGFs because we grab AGIs before AGFs.
				3256	*
				3257	* The (vfs) caller must ensure that if src is a directory then
				3258	* target_ip is either null or an empty directory.
				3259	*/
				3260	for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
				3261	if (inodes[i] == wip \|\|
				3262	(inodes[i] == target_ip &&
				3263	(VFS_I(target_ip)->i_nlink == 1 \|\| src_is_directory))) {
				3264	struct xfs_buf *bp;
				3265	xfs_agnumber_t agno;
				3266
				3267	agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino);
				3268	error = xfs_read_agi(mp, tp, agno, &bp);
				3269	if (error)
				3270	goto out_trans_cancel;
				3271	}
				3272	}
				3273
				3274	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3275	* Directory entry creation below may acquire the AGF. Remove
				3276	* the whiteout from the unlinked list first to preserve correct
				3277	* AGI/AGF locking order. This dirties the transaction so failures
				3278	* after this point will abort and log recovery will clean up the
				3279	* mess.
				3280	*
				3281	* For whiteouts, we need to bump the link count on the whiteout
				3282	* inode. After this point, we have a real link, clear the tmpfile
				3283	* state flag from the inode so it doesn't accidentally get misused
				3284	* in future.
				3285	*/
				3286	if (wip) {
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	3287	struct xfs_perag *pag;
				3288
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3289	ASSERT(VFS_I(wip)->i_nlink == 0);
Dave Chinner	f40aadb	2021-06-02 10:48:51 +1000	[diff] [blame]	3290
				3291	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino));
				3292	error = xfs_iunlink_remove(tp, pag, wip);
				3293	xfs_perag_put(pag);
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3294	if (error)
				3295	goto out_trans_cancel;
				3296
				3297	xfs_bumplink(tp, wip);
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3298	VFS_I(wip)->i_state &= ~I_LINKABLE;
				3299	}
				3300
				3301	/*
				3302	* Set up the target.
				3303	*/
				3304	if (target_ip == NULL) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3305	/*
				3306	* If target does not exist and the rename crosses
				3307	* directories, adjust the target directory link count
				3308	* to account for the ".." reference from the new entry.
				3309	*/
				3310	error = xfs_dir_createname(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3311	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3312	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3313	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3314
				3315	xfs_trans_ichgtime(tp, target_dp,
				3316	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3317
				3318	if (new_parent && src_is_directory) {
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	3319	xfs_bumplink(tp, target_dp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3320	}
				3321	} else { /* target_ip != NULL */
				3322	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3323	* Link the source inode under the target name.
				3324	* If the source inode is a directory and we are moving
				3325	* it across directories, its ".." entry will be
				3326	* inconsistent until we replace that down below.
				3327	*
				3328	* In case there is already an entry with the same
				3329	* name at the destination directory, remove it first.
				3330	*/
				3331	error = xfs_dir_replace(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3332	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3333	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3334	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3335
				3336	xfs_trans_ichgtime(tp, target_dp,
				3337	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3338
				3339	/*
				3340	* Decrement the link count on the target since the target
				3341	* dir no longer points to it.
				3342	*/
				3343	error = xfs_droplink(tp, target_ip);
				3344	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3345	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3346
				3347	if (src_is_directory) {
				3348	/*
				3349	* Drop the link from the old "." entry.
				3350	*/
				3351	error = xfs_droplink(tp, target_ip);
				3352	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3353	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3354	}
				3355	} /* target_ip != NULL */
				3356
				3357	/*
				3358	* Remove the source.
				3359	*/
				3360	if (new_parent && src_is_directory) {
				3361	/*
				3362	* Rewrite the ".." entry to point to the new
				3363	* directory.
				3364	*/
				3365	error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3366	target_dp->i_ino, spaceres);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3367	ASSERT(error != -EEXIST);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3368	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3369	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3370	}
				3371
				3372	/*
				3373	* We always want to hit the ctime on the source inode.
				3374	*
				3375	* This isn't strictly required by the standards since the source
				3376	* inode isn't really being changed, but old unix file systems did
				3377	* it and some incremental backup programs won't work without it.
				3378	*/
				3379	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
				3380	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
				3381
				3382	/*
				3383	* Adjust the link count on src_dp. This is necessary when
				3384	* renaming a directory, either within one parent when
				3385	* the target existed, or across two parent directories.
				3386	*/
				3387	if (src_is_directory && (new_parent \|\| target_ip != NULL)) {
				3388
				3389	/*
				3390	* Decrement link count on src_directory since the
				3391	* entry that's moved no longer points to it.
				3392	*/
				3393	error = xfs_droplink(tp, src_dp);
				3394	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3395	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3396	}
				3397
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3398	/*
				3399	* For whiteouts, we only need to update the source dirent with the
				3400	* inode number of the whiteout inode rather than removing it
				3401	* altogether.
				3402	*/
				3403	if (wip) {
				3404	error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3405	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3406	} else {
				3407	/*
				3408	* NOTE: We don't need to check for extent count overflow here
				3409	* because the dir remove name code will leave the dir block in
				3410	* place if the extent count would overflow.
				3411	*/
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3412	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3413	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3414	}
				3415
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3416	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3417	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3418
				3419	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3420	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
				3421	if (new_parent)
				3422	xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
				3423
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	3424	error = xfs_finish_rename(tp);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3425	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3426	xfs_irele(wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3427	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3428
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3429	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	3430	xfs_trans_cancel(tp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3431	out_release_wip:
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3432	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3433	xfs_irele(wip);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3434	return error;
				3435	}
				3436
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3437	static int
				3438	xfs_iflush(
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3439	struct xfs_inode *ip,
				3440	struct xfs_buf *bp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3441	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3442	struct xfs_inode_log_item *iip = ip->i_itemp;
				3443	struct xfs_dinode *dip;
				3444	struct xfs_mount *mp = ip->i_mount;
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3445	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3446
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3447	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3448	ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3449	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3450	ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Dave Chinner	90c60e1	2020-06-29 14:49:19 -0700	[diff] [blame]	3451	ASSERT(iip->ili_item.li_buf == bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3452
Christoph Hellwig	88ee2df	2015-06-22 09:44:29 +1000	[diff] [blame]	3453	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3454
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3455	/*
				3456	* We don't flush the inode if any of the following checks fail, but we
				3457	* do still update the log item and attach to the backing buffer as if
				3458	* the flush happened. This is a formality to facilitate predictable
				3459	* error handling as the caller will shutdown and fail the buffer.
				3460	*/
				3461	error = -EFSCORRUPTED;
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	3462	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3463	mp, XFS_ERRTAG_IFLUSH_1)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3464	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3465	"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3466	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3467	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3468	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3469	if (S_ISREG(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3470	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3471	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3472	ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3473	mp, XFS_ERRTAG_IFLUSH_3)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3474	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3475	"%s: Bad regular inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3476	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3477	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3478	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3479	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3480	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3481	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3482	ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
				3483	ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3484	mp, XFS_ERRTAG_IFLUSH_4)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3485	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3486	"%s: Bad directory inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3487	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3488	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3489	}
				3490	}
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3491	if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3492	ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3493	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3494	"%s: detected corrupt incore inode %Lu, "
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3495	"total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3496	__func__, ip->i_ino,
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3497	ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3498	ip->i_nblocks, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3499	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3500	}
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3501	if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3502	mp, XFS_ERRTAG_IFLUSH_6)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3503	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3504	"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3505	__func__, ip->i_ino, ip->i_forkoff, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3506	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3507	}
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3508
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3509	/*
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3510	* Inode item log recovery for v2 inodes are dependent on the flushiter
				3511	* count for correct sequencing. We bump the flush iteration count so
				3512	* we can detect flushes which postdate a log record during recovery.
				3513	* This is redundant as we now log every change and hence this can't
				3514	* happen but we need to still do it to ensure backwards compatibility
				3515	* with old kernels that predate logging all inode changes.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3516	*/
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	3517	if (!xfs_has_v3inodes(mp))
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3518	ip->i_flushiter++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3519
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3520	/*
				3521	* If there are inline format data / attr forks attached to this inode,
				3522	* make sure they are not corrupt.
				3523	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3524	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3525	xfs_ifork_verify_local_data(ip))
				3526	goto flush_out;
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3527	if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3528	xfs_ifork_verify_local_attr(ip))
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3529	goto flush_out;
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3530
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3531	/*
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	3532	* Copy the dirty parts of the inode into the on-disk inode. We always
				3533	* copy out the core of the inode, because if the inode is dirty at all
				3534	* the core must be.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3535	*/
Dave Chinner	93f958f	2016-02-09 16:54:58 +1100	[diff] [blame]	3536	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3537
				3538	/* Wrap, we never let the log put out DI_MAX_FLUSH */
Dave Chinner	38c26bf	2021-08-18 18:46:37 -0700	[diff] [blame]	3539	if (!xfs_has_v3inodes(mp)) {
Christoph Hellwig	ee7b83f	2021-03-29 11:11:43 -0700	[diff] [blame]	3540	if (ip->i_flushiter == DI_MAX_FLUSH)
				3541	ip->i_flushiter = 0;
				3542	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3543
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3544	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
				3545	if (XFS_IFORK_Q(ip))
				3546	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3547
				3548	/*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3549	* We've recorded everything logged in the inode, so we'd like to clear
				3550	* the ili_fields bits so we don't log and flush things unnecessarily.
				3551	* However, we can't stop logging all this information until the data
				3552	* we've copied into the disk buffer is written to disk. If we did we
				3553	* might overwrite the copy of the inode in the log with all the data
				3554	* after re-logging only part of it, and in the face of a crash we
				3555	* wouldn't have all the data we need to recover.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3556	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3557	* What we do is move the bits to the ili_last_fields field. When
				3558	* logging the inode, these bits are moved back to the ili_fields field.
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3559	* In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
				3560	* we know that the information those bits represent is permanently on
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3561	* disk. As long as the flush completes before the inode is logged
				3562	* again, then both ili_fields and ili_last_fields will be cleared.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3563	*/
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3564	error = 0;
				3565	flush_out:
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3566	spin_lock(&iip->ili_lock);
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3567	iip->ili_last_fields = iip->ili_fields;
				3568	iip->ili_fields = 0;
Dave Chinner	fc0561c	2015-11-03 13:14:59 +1100	[diff] [blame]	3569	iip->ili_fsync_fields = 0;
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3570	spin_unlock(&iip->ili_lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3571
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3572	/*
				3573	* Store the current LSN of the inode so that we can tell whether the
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3574	* item has moved in the AIL from xfs_buf_inode_iodone().
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3575	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3576	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				3577	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3578
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3579	/* generate the checksum. */
				3580	xfs_dinode_calc_crc(mp, dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3581	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3582	}
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3583
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3584	/*
				3585	* Non-blocking flush of dirty inode metadata into the backing buffer.
				3586	*
				3587	* The caller must have a reference to the inode and hold the cluster buffer
				3588	* locked. The function will walk across all the inodes on the cluster buffer it
				3589	* can find and lock without blocking, and flush them to the cluster buffer.
				3590	*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3591	* On successful flushing of at least one inode, the caller must write out the
				3592	* buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
				3593	* the caller needs to release the buffer. On failure, the filesystem will be
				3594	* shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
				3595	* will be returned.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3596	*/
				3597	int
				3598	xfs_iflush_cluster(
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3599	struct xfs_buf *bp)
				3600	{
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3601	struct xfs_mount *mp = bp->b_mount;
				3602	struct xfs_log_item lip, n;
				3603	struct xfs_inode *ip;
				3604	struct xfs_inode_log_item *iip;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3605	int clcount = 0;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3606	int error = 0;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3607
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3608	/*
				3609	* We must use the safe variant here as on shutdown xfs_iflush_abort()
				3610	* can remove itself from the list.
				3611	*/
				3612	list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
				3613	iip = (struct xfs_inode_log_item *)lip;
				3614	ip = iip->ili_inode;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3615
				3616	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3617	* Quick and dirty check to avoid locks if possible.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3618	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3619	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING))
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3620	continue;
				3621	if (xfs_ipincount(ip))
				3622	continue;
				3623
				3624	/*
				3625	* The inode is still attached to the buffer, which means it is
				3626	* dirty but reclaim might try to grab it. Check carefully for
				3627	* that, and grab the ilock while still holding the i_flags_lock
				3628	* to guarantee reclaim will not be able to reclaim this inode
				3629	* once we drop the i_flags_lock.
				3630	*/
				3631	spin_lock(&ip->i_flags_lock);
				3632	ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3633	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING)) {
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3634	spin_unlock(&ip->i_flags_lock);
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3635	continue;
				3636	}
				3637
				3638	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3639	* ILOCK will pin the inode against reclaim and prevent
				3640	* concurrent transactions modifying the inode while we are
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3641	* flushing the inode. If we get the lock, set the flushing
				3642	* state before we drop the i_flags_lock.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3643	*/
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3644	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
				3645	spin_unlock(&ip->i_flags_lock);
				3646	continue;
				3647	}
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3648	__xfs_iflags_set(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3649	spin_unlock(&ip->i_flags_lock);
				3650
				3651	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3652	* Abort flushing this inode if we are shut down because the
				3653	* inode may not currently be in the AIL. This can occur when
				3654	* log I/O failure unpins the inode without inserting into the
				3655	* AIL, leaving a dirty/unpinned inode attached to the buffer
				3656	* that otherwise looks like it should be flushed.
				3657	*/
				3658	if (XFS_FORCED_SHUTDOWN(mp)) {
				3659	xfs_iunpin_wait(ip);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3660	xfs_iflush_abort(ip);
				3661	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3662	error = -EIO;
				3663	continue;
				3664	}
				3665
				3666	/* don't block waiting on a log force to unpin dirty inodes */
				3667	if (xfs_ipincount(ip)) {
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3668	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3669	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3670	continue;
				3671	}
				3672
				3673	if (!xfs_inode_clean(ip))
				3674	error = xfs_iflush(ip, bp);
				3675	else
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3676	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3677	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3678	if (error)
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3679	break;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3680	clcount++;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3681	}
				3682
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3683	if (error) {
				3684	bp->b_flags \|= XBF_ASYNC;
				3685	xfs_buf_ioend_fail(bp);
				3686	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3687	return error;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3688	}
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3689
				3690	if (!clcount)
				3691	return -EAGAIN;
				3692
				3693	XFS_STATS_INC(mp, xs_icluster_flushcnt);
				3694	XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
				3695	return 0;
				3696
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3697	}
				3698
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3699	/* Release an inode. */
				3700	void
				3701	xfs_irele(
				3702	struct xfs_inode *ip)
				3703	{
				3704	trace_xfs_irele(ip, _RET_IP_);
				3705	iput(VFS_I(ip));
				3706	}
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3707
				3708	/*
				3709	* Ensure all commited transactions touching the inode are written to the log.
				3710	*/
				3711	int
				3712	xfs_log_force_inode(
				3713	struct xfs_inode *ip)
				3714	{
Dave Chinner	5f9b4b0	2021-06-18 08:21:52 -0700	[diff] [blame]	3715	xfs_csn_t seq = 0;
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3716
				3717	xfs_ilock(ip, XFS_ILOCK_SHARED);
				3718	if (xfs_ipincount(ip))
Dave Chinner	5f9b4b0	2021-06-18 08:21:52 -0700	[diff] [blame]	3719	seq = ip->i_itemp->ili_commit_seq;
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3720	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3721
Dave Chinner	5f9b4b0	2021-06-18 08:21:52 -0700	[diff] [blame]	3722	if (!seq)
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3723	return 0;
Dave Chinner	5f9b4b0	2021-06-18 08:21:52 -0700	[diff] [blame]	3724	return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3725	}
Darrick J. Wong	e2aaee9	2020-06-29 14:47:20 -0700	[diff] [blame]	3726
				3727	/*
				3728	* Grab the exclusive iolock for a data copy from src to dest, making sure to
				3729	* abide vfs locking order (lowest pointer value goes first) and breaking the
				3730	* layout leases before proceeding. The loop is needed because we cannot call
				3731	* the blocking break_layout() with the iolocks held, and therefore have to
				3732	* back out both locks.
				3733	*/
				3734	static int
				3735	xfs_iolock_two_inodes_and_break_layout(
				3736	struct inode *src,
				3737	struct inode *dest)
				3738	{
				3739	int error;
				3740
				3741	if (src > dest)
				3742	swap(src, dest);
				3743
				3744	retry:
				3745	/* Wait to break both inodes' layouts before we start locking. */
				3746	error = break_layout(src, true);
				3747	if (error)
				3748	return error;
				3749	if (src != dest) {
				3750	error = break_layout(dest, true);
				3751	if (error)
				3752	return error;
				3753	}
				3754
				3755	/* Lock one inode and make sure nobody got in and leased it. */
				3756	inode_lock(src);
				3757	error = break_layout(src, false);
				3758	if (error) {
				3759	inode_unlock(src);
				3760	if (error == -EWOULDBLOCK)
				3761	goto retry;
				3762	return error;
				3763	}
				3764
				3765	if (src == dest)
				3766	return 0;
				3767
				3768	/* Lock the other inode and make sure nobody got in and leased it. */
				3769	inode_lock_nested(dest, I_MUTEX_NONDIR2);
				3770	error = break_layout(dest, false);
				3771	if (error) {
				3772	inode_unlock(src);
				3773	inode_unlock(dest);
				3774	if (error == -EWOULDBLOCK)
				3775	goto retry;
				3776	return error;
				3777	}
				3778
				3779	return 0;
				3780	}
				3781
				3782	/*
				3783	* Lock two inodes so that userspace cannot initiate I/O via file syscalls or
				3784	* mmap activity.
				3785	*/
				3786	int
				3787	xfs_ilock2_io_mmap(
				3788	struct xfs_inode *ip1,
				3789	struct xfs_inode *ip2)
				3790	{
				3791	int ret;
				3792
				3793	ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
				3794	if (ret)
				3795	return ret;
				3796	if (ip1 == ip2)
				3797	xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
				3798	else
				3799	xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
				3800	ip2, XFS_MMAPLOCK_EXCL);
				3801	return 0;
				3802	}
				3803
				3804	/* Unlock both inodes to allow IO and mmap activity. */
				3805	void
				3806	xfs_iunlock2_io_mmap(
				3807	struct xfs_inode *ip1,
				3808	struct xfs_inode *ip2)
				3809	{
				3810	bool same_inode = (ip1 == ip2);
				3811
				3812	xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
				3813	if (!same_inode)
				3814	xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
				3815	inode_unlock(VFS_I(ip2));
				3816	if (!same_inode)
				3817	inode_unlock(VFS_I(ip1));
				3818	}