Blame - fs/xfs/xfs_inode.c - SHIFTPHONES/mainline/linux

blob: 0369eb22c1bb0fd04e8b97f99703494a23bf69b5 [file] [log] [blame]

Dave Chinner	0b61f8a	2018-06-05 19:42:14 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	4	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	5	*/
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	6	#include <linux/iversion.h>
Robert P. J. Day	40ebd81	2007-11-23 16:30:51 +1100	[diff] [blame]	7
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	9	#include "xfs_fs.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame]	10	#include "xfs_shared.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	11	#include "xfs_format.h"
				12	#include "xfs_log_format.h"
				13	#include "xfs_trans_resv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include "xfs_sb.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15	#include "xfs_mount.h"
Darrick J. Wong	3ab78df	2016-08-03 11:15:38 +1000	[diff] [blame]	16	#include "xfs_defer.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	17	#include "xfs_inode.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	18	#include "xfs_dir2.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	19	#include "xfs_attr.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	20	#include "xfs_trans_space.h"
				21	#include "xfs_trans.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#include "xfs_buf_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	23	#include "xfs_inode_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	24	#include "xfs_ialloc.h"
				25	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame]	26	#include "xfs_bmap_util.h"
Darrick J. Wong	e9e899a	2017-10-31 12:04:49 -0700	[diff] [blame]	27	#include "xfs_errortag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	29	#include "xfs_quota.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	30	#include "xfs_filestream.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	31	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	32	#include "xfs_icache.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	33	#include "xfs_symlink.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	34	#include "xfs_trans_priv.h"
				35	#include "xfs_log.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	36	#include "xfs_bmap_btree.h"
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	37	#include "xfs_reflink.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	kmem_zone_t *xfs_inode_zone;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40
				41	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	42	* Used in xfs_itruncate_extents(). This is the maximum number of extents
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	* freed from a file in a single transaction.
				44	*/
				45	#define XFS_ITRUNC_MAX_EXTENTS 2
				46
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	47	STATIC int xfs_iunlink(struct xfs_trans , struct xfs_inode );
				48	STATIC int xfs_iunlink_remove(struct xfs_trans , struct xfs_inode );
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	49
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	50	/*
				51	* helper function to extract extent size hint from inode
				52	*/
				53	xfs_extlen_t
				54	xfs_get_extsz_hint(
				55	struct xfs_inode *ip)
				56	{
Christoph Hellwig	bdb2ed2	2019-10-14 10:07:21 -0700	[diff] [blame]	57	/*
				58	* No point in aligning allocations if we need to COW to actually
				59	* write to them.
				60	*/
				61	if (xfs_is_always_cow_inode(ip))
				62	return 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	63	if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	64	return ip->i_extsize;
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	65	if (XFS_IS_REALTIME_INODE(ip))
				66	return ip->i_mount->m_sb.sb_rextsize;
				67	return 0;
				68	}
				69
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	70	/*
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	71	* Helper function to extract CoW extent size hint from inode.
				72	* Between the extent size hint and the CoW extent size hint, we
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	73	* return the greater of the two. If the value is zero (automatic),
				74	* use the default size.
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	75	*/
				76	xfs_extlen_t
				77	xfs_get_cowextsz_hint(
				78	struct xfs_inode *ip)
				79	{
				80	xfs_extlen_t a, b;
				81
				82	a = 0;
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	83	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	84	a = ip->i_cowextsize;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	85	b = xfs_get_extsz_hint(ip);
				86
Darrick J. Wong	e153aa7	2016-10-03 09:11:49 -0700	[diff] [blame]	87	a = max(a, b);
				88	if (a == 0)
				89	return XFS_DEFAULT_COWEXTSZ_HINT;
				90	return a;
Darrick J. Wong	f7ca352	2016-10-03 09:11:43 -0700	[diff] [blame]	91	}
				92
				93	/*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	94	* These two are wrapper routines around the xfs_ilock() routine used to
				95	* centralize some grungy code. They are used in places that wish to lock the
				96	* inode solely for reading the extents. The reason these places can't just
				97	* call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
				98	* bringing in of the extents from disk for a file in b-tree format. If the
				99	* inode is in b-tree format, then we need to lock the inode exclusively until
				100	* the extents are read in. Locking it exclusively all the time would limit
				101	* our parallelism unnecessarily, though. What we do instead is check to see
				102	* if the extents have been read in yet, and only lock the inode exclusively
				103	* if they have not.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	104	*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	105	* The functions return a value which should be given to the corresponding
Christoph Hellwig	01f4f32	2013-12-06 12:30:08 -0800	[diff] [blame]	106	* xfs_iunlock() call.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	107	*/
				108	uint
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	109	xfs_ilock_data_map_shared(
				110	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	111	{
Christoph Hellwig	309ecac8	2013-12-06 12:30:09 -0800	[diff] [blame]	112	uint lock_mode = XFS_ILOCK_SHARED;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	113
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	114	if (xfs_need_iread_extents(&ip->i_df))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	115	lock_mode = XFS_ILOCK_EXCL;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	116	xfs_ilock(ip, lock_mode);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	117	return lock_mode;
				118	}
				119
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	120	uint
				121	xfs_ilock_attr_map_shared(
				122	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	123	{
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	124	uint lock_mode = XFS_ILOCK_SHARED;
				125
Christoph Hellwig	b2197a3	2021-04-13 11:15:12 -0700	[diff] [blame]	126	if (ip->i_afp && xfs_need_iread_extents(ip->i_afp))
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	127	lock_mode = XFS_ILOCK_EXCL;
				128	xfs_ilock(ip, lock_mode);
				129	return lock_mode;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	130	}
				131
				132	/*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	133	* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
				134	* multi-reader locks: i_mmap_lock and the i_lock. This routine allows
				135	* various combinations of the locks to be obtained.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	136	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	137	* The 3 locks should always be ordered so that the IO lock is obtained first,
				138	* the mmap lock second and the ilock last in order to prevent deadlock.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	139	*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	140	* Basic locking order:
				141	*
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	142	* i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	143	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	144	* mmap_lock locking order:
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	145	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	146	* i_rwsem -> page lock -> mmap_lock
				147	* mmap_lock -> i_mmap_lock -> page_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	148	*
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	149	* The difference in mmap_lock locking order mean that we cannot hold the
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	150	* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	151	* fault in pages during copy in/out (for buffered IO) or require the mmap_lock
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	152	* in get_user_pages() to map the user pages into the kernel address space for
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	153	* direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	154	* page faults already hold the mmap_lock.
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	155	*
				156	* Hence to serialise fully against both syscall and mmap based IO, we need to
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	157	* take both the i_rwsem and the i_mmap_lock. These locks should only be both
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	158	* taken in places where we need to invalidate the page cache in a race
				159	* free manner (e.g. truncate, hole punch and other extent manipulation
				160	* functions).
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	161	*/
				162	void
				163	xfs_ilock(
				164	xfs_inode_t *ip,
				165	uint lock_flags)
				166	{
				167	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
				168
				169	/*
				170	* You can't set both SHARED and EXCL for the same lock,
				171	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				172	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				173	*/
				174	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				175	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	176	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				177	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	178	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				179	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	180	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	181
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	182	if (lock_flags & XFS_IOLOCK_EXCL) {
				183	down_write_nested(&VFS_I(ip)->i_rwsem,
				184	XFS_IOLOCK_DEP(lock_flags));
				185	} else if (lock_flags & XFS_IOLOCK_SHARED) {
				186	down_read_nested(&VFS_I(ip)->i_rwsem,
				187	XFS_IOLOCK_DEP(lock_flags));
				188	}
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	189
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	190	if (lock_flags & XFS_MMAPLOCK_EXCL)
				191	mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				192	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				193	mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
				194
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	195	if (lock_flags & XFS_ILOCK_EXCL)
				196	mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				197	else if (lock_flags & XFS_ILOCK_SHARED)
				198	mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				199	}
				200
				201	/*
				202	* This is just like xfs_ilock(), except that the caller
				203	* is guaranteed not to sleep. It returns 1 if it gets
				204	* the requested locks and 0 otherwise. If the IO lock is
				205	* obtained but the inode lock cannot be, then the IO lock
				206	* is dropped before returning.
				207	*
				208	* ip -- the inode being locked
				209	* lock_flags -- this parameter indicates the inode's locks to be
				210	* to be locked. See the comment for xfs_ilock() for a list
				211	* of valid values.
				212	*/
				213	int
				214	xfs_ilock_nowait(
				215	xfs_inode_t *ip,
				216	uint lock_flags)
				217	{
				218	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
				219
				220	/*
				221	* You can't set both SHARED and EXCL for the same lock,
				222	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				223	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				224	*/
				225	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				226	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	227	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				228	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	229	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				230	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	231	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	232
				233	if (lock_flags & XFS_IOLOCK_EXCL) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	234	if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	235	goto out;
				236	} else if (lock_flags & XFS_IOLOCK_SHARED) {
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	237	if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	238	goto out;
				239	}
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	240
				241	if (lock_flags & XFS_MMAPLOCK_EXCL) {
				242	if (!mrtryupdate(&ip->i_mmaplock))
				243	goto out_undo_iolock;
				244	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
				245	if (!mrtryaccess(&ip->i_mmaplock))
				246	goto out_undo_iolock;
				247	}
				248
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	249	if (lock_flags & XFS_ILOCK_EXCL) {
				250	if (!mrtryupdate(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	251	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	252	} else if (lock_flags & XFS_ILOCK_SHARED) {
				253	if (!mrtryaccess(&ip->i_lock))
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	254	goto out_undo_mmaplock;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	255	}
				256	return 1;
				257
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	258	out_undo_mmaplock:
				259	if (lock_flags & XFS_MMAPLOCK_EXCL)
				260	mrunlock_excl(&ip->i_mmaplock);
				261	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				262	mrunlock_shared(&ip->i_mmaplock);
				263	out_undo_iolock:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	264	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	265	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	266	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	267	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	268	out:
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	269	return 0;
				270	}
				271
				272	/*
				273	* xfs_iunlock() is used to drop the inode locks acquired with
				274	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
				275	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
				276	* that we know which locks to drop.
				277	*
				278	* ip -- the inode being unlocked
				279	* lock_flags -- this parameter indicates the inode's locks to be
				280	* to be unlocked. See the comment for xfs_ilock() for a list
				281	* of valid values for this parameter.
				282	*
				283	*/
				284	void
				285	xfs_iunlock(
				286	xfs_inode_t *ip,
				287	uint lock_flags)
				288	{
				289	/*
				290	* You can't set both SHARED and EXCL for the same lock,
				291	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				292	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				293	*/
				294	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				295	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	296	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
				297	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	298	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				299	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	300	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	301	ASSERT(lock_flags != 0);
				302
				303	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	304	up_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	305	else if (lock_flags & XFS_IOLOCK_SHARED)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	306	up_read(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	307
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	308	if (lock_flags & XFS_MMAPLOCK_EXCL)
				309	mrunlock_excl(&ip->i_mmaplock);
				310	else if (lock_flags & XFS_MMAPLOCK_SHARED)
				311	mrunlock_shared(&ip->i_mmaplock);
				312
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	313	if (lock_flags & XFS_ILOCK_EXCL)
				314	mrunlock_excl(&ip->i_lock);
				315	else if (lock_flags & XFS_ILOCK_SHARED)
				316	mrunlock_shared(&ip->i_lock);
				317
				318	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
				319	}
				320
				321	/*
				322	* give up write locks. the i/o lock cannot be held nested
				323	* if it is being demoted.
				324	*/
				325	void
				326	xfs_ilock_demote(
				327	xfs_inode_t *ip,
				328	uint lock_flags)
				329	{
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	330	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL));
				331	ASSERT((lock_flags &
				332	~(XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL)) == 0);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	333
				334	if (lock_flags & XFS_ILOCK_EXCL)
				335	mrdemote(&ip->i_lock);
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	336	if (lock_flags & XFS_MMAPLOCK_EXCL)
				337	mrdemote(&ip->i_mmaplock);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	338	if (lock_flags & XFS_IOLOCK_EXCL)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	339	downgrade_write(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	340
				341	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
				342	}
				343
Dave Chinner	742ae1e	2013-04-30 21:39:34 +1000	[diff] [blame]	344	#if defined(DEBUG) \|\| defined(XFS_WARN)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	345	int
				346	xfs_isilocked(
				347	xfs_inode_t *ip,
				348	uint lock_flags)
				349	{
				350	if (lock_flags & (XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED)) {
				351	if (!(lock_flags & XFS_ILOCK_SHARED))
				352	return !!ip->i_lock.mr_writer;
				353	return rwsem_is_locked(&ip->i_lock.mr_lock);
				354	}
				355
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	356	if (lock_flags & (XFS_MMAPLOCK_EXCL\|XFS_MMAPLOCK_SHARED)) {
				357	if (!(lock_flags & XFS_MMAPLOCK_SHARED))
				358	return !!ip->i_mmaplock.mr_writer;
				359	return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
				360	}
				361
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	362	if (lock_flags & (XFS_IOLOCK_EXCL\|XFS_IOLOCK_SHARED)) {
				363	if (!(lock_flags & XFS_IOLOCK_SHARED))
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	364	return !debug_locks \|\|
				365	lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
				366	return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	367	}
				368
				369	ASSERT(0);
				370	return 0;
				371	}
				372	#endif
				373
Dave Chinner	b6a9947	2015-08-25 10:05:13 +1000	[diff] [blame]	374	/*
				375	* xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
				376	* DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
				377	* when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
				378	* errors and warnings.
				379	*/
				380	#if (defined(DEBUG) \|\| defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	381	static bool
				382	xfs_lockdep_subclass_ok(
				383	int subclass)
				384	{
				385	return subclass < MAX_LOCKDEP_SUBCLASSES;
				386	}
				387	#else
				388	#define xfs_lockdep_subclass_ok(subclass) (true)
				389	#endif
				390
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	391	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	392	* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	393	* value. This can be called for any type of inode lock combination, including
				394	* parent locking. Care must be taken to ensure we don't overrun the subclass
				395	* storage fields in the class mask we build.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	396	*/
				397	static inline int
				398	xfs_lock_inumorder(int lock_mode, int subclass)
				399	{
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	400	int class = 0;
				401
				402	ASSERT(!(lock_mode & (XFS_ILOCK_PARENT \| XFS_ILOCK_RTBITMAP \|
				403	XFS_ILOCK_RTSUM)));
Dave Chinner	3403ccc	2015-08-20 09:27:49 +1000	[diff] [blame]	404	ASSERT(xfs_lockdep_subclass_ok(subclass));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	405
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	406	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	407	ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	408	class += subclass << XFS_IOLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	409	}
				410
				411	if (lock_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) {
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	412	ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
				413	class += subclass << XFS_MMAPLOCK_SHIFT;
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	414	}
				415
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	416	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) {
				417	ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
				418	class += subclass << XFS_ILOCK_SHIFT;
				419	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	420
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	421	return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) \| class;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	422	}
				423
				424	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	425	* The following routine will lock n inodes in exclusive mode. We assume the
				426	* caller calls us with the inodes in i_ino order.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	427	*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	428	* We need to detect deadlock where an inode that we lock is in the AIL and we
				429	* start waiting for another inode that is locked by a thread in a long running
				430	* transaction (such as truncate). This can result in deadlock since the long
				431	* running trans might need to wait for the inode we just locked in order to
				432	* push the tail and free space in the log.
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	433	*
				434	* xfs_lock_inodes() can only be used to lock one type of lock at a time -
				435	* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
				436	* lock more than one at a time, lockdep will report false positives saying we
				437	* have violated locking orders.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	438	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	439	static void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	440	xfs_lock_inodes(
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	441	struct xfs_inode **ips,
				442	int inodes,
				443	uint lock_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	444	{
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	445	int attempts = 0, i, j, try_lock;
				446	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	447
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	448	/*
				449	* Currently supports between 2 and 5 inodes with exclusive locking. We
				450	* support an arbitrary depth of locking here, but absolute limits on
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	451	* inodes depend on the type of locking and the limits placed by
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	452	* lockdep annotations in xfs_lock_inumorder. These are all checked by
				453	* the asserts.
				454	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	455	ASSERT(ips && inodes >= 2 && inodes <= 5);
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	456	ASSERT(lock_mode & (XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL \|
				457	XFS_ILOCK_EXCL));
				458	ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED \| XFS_MMAPLOCK_SHARED \|
				459	XFS_ILOCK_SHARED)));
Dave Chinner	0952c81	2015-08-19 10:32:49 +1000	[diff] [blame]	460	ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) \|\|
				461	inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
				462	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) \|\|
				463	inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
				464
				465	if (lock_mode & XFS_IOLOCK_EXCL) {
				466	ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL \| XFS_ILOCK_EXCL)));
				467	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
				468	ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	469
				470	try_lock = 0;
				471	i = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	472	again:
				473	for (; i < inodes; i++) {
				474	ASSERT(ips[i]);
				475
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	476	if (i && (ips[i] == ips[i - 1])) /* Already locked */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	477	continue;
				478
				479	/*
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	480	* If try_lock is not set yet, make sure all locked inodes are
				481	* not in the AIL. If any are, set try_lock to be used later.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	482	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	483	if (!try_lock) {
				484	for (j = (i - 1); j >= 0 && !try_lock; j--) {
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	485	lp = &ips[j]->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	486	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	487	try_lock++;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	488	}
				489	}
				490
				491	/*
				492	* If any of the previous locks we have locked is in the AIL,
				493	* we must TRY to get the second and subsequent locks. If
				494	* we can't get any, we must release all we have
				495	* and try again.
				496	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	497	if (!try_lock) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	498	xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	499	continue;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	500	}
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	501
				502	/* try_lock means we have an inode locked that is in the AIL. */
				503	ASSERT(i != 0);
				504	if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
				505	continue;
				506
				507	/*
				508	* Unlock all previous guys and try again. xfs_iunlock will try
				509	* to push the tail if the inode is in the AIL.
				510	*/
				511	attempts++;
				512	for (j = i - 1; j >= 0; j--) {
				513	/*
				514	* Check to see if we've already unlocked this one. Not
				515	* the first one going back, and the inode ptr is the
				516	* same.
				517	*/
				518	if (j != (i - 1) && ips[j] == ips[j + 1])
				519	continue;
				520
				521	xfs_iunlock(ips[j], lock_mode);
				522	}
				523
				524	if ((attempts % 5) == 0) {
				525	delay(1); /* Don't just spin the CPU */
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	526	}
				527	i = 0;
				528	try_lock = 0;
				529	goto again;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	530	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	531	}
				532
				533	/*
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	534	* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	535	* the mmaplock or the ilock, but not more than one type at a time. If we lock
				536	* more than one at a time, lockdep will report false positives saying we have
				537	* violated locking orders. The iolock must be double-locked separately since
				538	* we use i_rwsem for that. We now support taking one lock EXCL and the other
				539	* SHARED.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	540	*/
				541	void
				542	xfs_lock_two_inodes(
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	543	struct xfs_inode *ip0,
				544	uint ip0_mode,
				545	struct xfs_inode *ip1,
				546	uint ip1_mode)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	547	{
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	548	struct xfs_inode *temp;
				549	uint mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	550	int attempts = 0;
Christoph Hellwig	efe2330	2019-06-28 19:27:33 -0700	[diff] [blame]	551	struct xfs_log_item *lp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	552
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	553	ASSERT(hweight32(ip0_mode) == 1);
				554	ASSERT(hweight32(ip1_mode) == 1);
				555	ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				556	ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
				557	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				558	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				559	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				560	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				561	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				562	!(ip0_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
				563	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) \|\|
				564	!(ip1_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)));
Dave Chinner	653c60b	2015-02-23 21:43:37 +1100	[diff] [blame]	565
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	566	ASSERT(ip0->i_ino != ip1->i_ino);
				567
				568	if (ip0->i_ino > ip1->i_ino) {
				569	temp = ip0;
				570	ip0 = ip1;
				571	ip1 = temp;
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	572	mode_temp = ip0_mode;
				573	ip0_mode = ip1_mode;
				574	ip1_mode = mode_temp;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	575	}
				576
				577	again:
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	578	xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	579
				580	/*
				581	* If the first lock we have locked is in the AIL, we must TRY to get
				582	* the second lock. If we can't get it, we must release the first one
				583	* and try again.
				584	*/
Christoph Hellwig	b3b14aa	2019-06-28 19:27:33 -0700	[diff] [blame]	585	lp = &ip0->i_itemp->ili_item;
Dave Chinner	22525c1	2018-05-09 07:47:34 -0700	[diff] [blame]	586	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	587	if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
				588	xfs_iunlock(ip0, ip0_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	589	if ((++attempts % 5) == 0)
				590	delay(1); /* Don't just spin the CPU */
				591	goto again;
				592	}
				593	} else {
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	594	xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	595	}
				596	}
				597
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	598	uint
				599	xfs_ip2xflags(
Dave Chinner	58f88ca	2016-01-04 16:44:15 +1100	[diff] [blame]	600	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	601	{
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	602	uint flags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	603
Christoph Hellwig	4422501	2021-03-29 11:11:46 -0700	[diff] [blame]	604	if (ip->i_diflags & XFS_DIFLAG_ANY) {
				605	if (ip->i_diflags & XFS_DIFLAG_REALTIME)
				606	flags \|= FS_XFLAG_REALTIME;
				607	if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
				608	flags \|= FS_XFLAG_PREALLOC;
				609	if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
				610	flags \|= FS_XFLAG_IMMUTABLE;
				611	if (ip->i_diflags & XFS_DIFLAG_APPEND)
				612	flags \|= FS_XFLAG_APPEND;
				613	if (ip->i_diflags & XFS_DIFLAG_SYNC)
				614	flags \|= FS_XFLAG_SYNC;
				615	if (ip->i_diflags & XFS_DIFLAG_NOATIME)
				616	flags \|= FS_XFLAG_NOATIME;
				617	if (ip->i_diflags & XFS_DIFLAG_NODUMP)
				618	flags \|= FS_XFLAG_NODUMP;
				619	if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
				620	flags \|= FS_XFLAG_RTINHERIT;
				621	if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
				622	flags \|= FS_XFLAG_PROJINHERIT;
				623	if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
				624	flags \|= FS_XFLAG_NOSYMLINKS;
				625	if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
				626	flags \|= FS_XFLAG_EXTSIZE;
				627	if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
				628	flags \|= FS_XFLAG_EXTSZINHERIT;
				629	if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
				630	flags \|= FS_XFLAG_NODEFRAG;
				631	if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
				632	flags \|= FS_XFLAG_FILESTREAM;
				633	}
				634
				635	if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
				636	if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
				637	flags \|= FS_XFLAG_DAX;
				638	if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
				639	flags \|= FS_XFLAG_COWEXTSIZE;
				640	}
				641
				642	if (XFS_IFORK_Q(ip))
				643	flags \|= FS_XFLAG_HASATTR;
				644	return flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	645	}
				646
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	647	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	648	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
				649	* is allowed, otherwise it has to be an exact match. If a CI match is found,
				650	* ci_name->name will point to a the actual name (caller must free) or
				651	* will be set to NULL if an exact match is found.
				652	*/
				653	int
				654	xfs_lookup(
				655	xfs_inode_t *dp,
				656	struct xfs_name *name,
				657	xfs_inode_t **ipp,
				658	struct xfs_name *ci_name)
				659	{
				660	xfs_ino_t inum;
				661	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	662
				663	trace_xfs_lookup(dp, name);
				664
				665	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	666	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	667
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	668	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	669	if (error)
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	670	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	671
				672	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
				673	if (error)
				674	goto out_free_name;
				675
				676	return 0;
				677
				678	out_free_name:
				679	if (ci_name)
				680	kmem_free(ci_name->name);
Dave Chinner	dbad7c9	2015-08-19 10:33:00 +1000	[diff] [blame]	681	out_unlock:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	682	*ipp = NULL;
				683	return error;
				684	}
				685
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	686	/* Propagate di_flags from a parent inode to a child inode. */
				687	static void
				688	xfs_inode_inherit_flags(
				689	struct xfs_inode *ip,
				690	const struct xfs_inode *pip)
				691	{
				692	unsigned int di_flags = 0;
				693	umode_t mode = VFS_I(ip)->i_mode;
				694
				695	if (S_ISDIR(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	696	if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	697	di_flags \|= XFS_DIFLAG_RTINHERIT;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	698	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	699	di_flags \|= XFS_DIFLAG_EXTSZINHERIT;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	700	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	701	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	702	if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	703	di_flags \|= XFS_DIFLAG_PROJINHERIT;
				704	} else if (S_ISREG(mode)) {
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	705	if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
Darrick J. Wong	d4f2c14	2020-09-13 10:16:41 -0700	[diff] [blame]	706	xfs_sb_version_hasrealtime(&ip->i_mount->m_sb))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	707	di_flags \|= XFS_DIFLAG_REALTIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	708	if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	709	di_flags \|= XFS_DIFLAG_EXTSIZE;
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	710	ip->i_extsize = pip->i_extsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	711	}
				712	}
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	713	if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	714	xfs_inherit_noatime)
				715	di_flags \|= XFS_DIFLAG_NOATIME;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	716	if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	717	xfs_inherit_nodump)
				718	di_flags \|= XFS_DIFLAG_NODUMP;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	719	if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	720	xfs_inherit_sync)
				721	di_flags \|= XFS_DIFLAG_SYNC;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	722	if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	723	xfs_inherit_nosymlinks)
				724	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	725	if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	726	xfs_inherit_nodefrag)
				727	di_flags \|= XFS_DIFLAG_NODEFRAG;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	728	if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	729	di_flags \|= XFS_DIFLAG_FILESTREAM;
				730
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	731	ip->i_diflags \|= di_flags;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	732	}
				733
				734	/* Propagate di_flags2 from a parent inode to a child inode. */
				735	static void
				736	xfs_inode_inherit_flags2(
				737	struct xfs_inode *ip,
				738	const struct xfs_inode *pip)
				739	{
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	740	if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
				741	ip->i_diflags2 \|= XFS_DIFLAG2_COWEXTSIZE;
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	742	ip->i_cowextsize = pip->i_cowextsize;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	743	}
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	744	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
				745	ip->i_diflags2 \|= XFS_DIFLAG2_DAX;
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	746	}
				747
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	748	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	749	* Initialise a newly allocated inode and return the in-core inode to the
				750	* caller locked exclusively.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	751	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	752	static int
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	753	xfs_init_new_inode(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	754	struct user_namespace *mnt_userns,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	755	struct xfs_trans *tp,
				756	struct xfs_inode *pip,
				757	xfs_ino_t ino,
				758	umode_t mode,
				759	xfs_nlink_t nlink,
				760	dev_t rdev,
				761	prid_t prid,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	762	bool init_xattrs,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	763	struct xfs_inode **ipp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	764	{
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	765	struct inode *dir = pip ? VFS_I(pip) : NULL;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	766	struct xfs_mount *mp = tp->t_mountp;
				767	struct xfs_inode *ip;
				768	unsigned int flags;
				769	int error;
				770	struct timespec64 tv;
				771	struct inode *inode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	772
				773	/*
Dave Chinner	8b26984	2018-04-17 17:17:35 -0700	[diff] [blame]	774	* Protect against obviously corrupt allocation btree records. Later
				775	* xfs_iget checks will catch re-allocation of other active in-memory
				776	* and on-disk inodes. If we don't catch reallocating the parent inode
				777	* here we will deadlock in xfs_iget() so we have to do these checks
				778	* first.
				779	*/
				780	if ((pip && ino == pip->i_ino) \|\| !xfs_verify_dir_ino(mp, ino)) {
				781	xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
				782	return -EFSCORRUPTED;
				783	}
				784
				785	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	786	* Get the in-core inode with the lock held exclusively to prevent
				787	* others from looking at until we're done.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	788	*/
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	789	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	790	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	791	return error;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	792
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	793	ASSERT(ip != NULL);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	794	inode = VFS_I(ip);
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	795	set_nlink(inode, nlink);
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	796	inode->i_rdev = rdev;
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	797	ip->i_projid = prid;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	799	if (dir && !(dir->i_mode & S_ISGID) &&
				800	(mp->m_flags & XFS_MOUNT_GRPID)) {
Christian Brauner	db99855	2021-03-20 13:26:24 +0100	[diff] [blame]	801	inode_fsuid_set(inode, mnt_userns);
Christoph Hellwig	01ea173	2021-01-22 16:48:18 -0800	[diff] [blame]	802	inode->i_gid = dir->i_gid;
				803	inode->i_mode = mode;
Christoph Hellwig	3d8f282	2020-02-21 08:31:26 -0800	[diff] [blame]	804	} else {
Linus Torvalds	7d6beb7	2021-02-23 13:39:45 -0800	[diff] [blame]	805	inode_init_owner(mnt_userns, inode, dir, mode);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	806	}
				807
				808	/*
				809	* If the group ID of the new file does not match the effective group
				810	* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
				811	* (and only if the irix_sgid_inherit compatibility variable is set).
				812	*/
Christoph Hellwig	5429515	2020-02-21 08:31:27 -0800	[diff] [blame]	813	if (irix_sgid_inherit &&
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	814	(inode->i_mode & S_ISGID) &&
				815	!in_group_p(i_gid_into_mnt(mnt_userns, inode)))
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	816	inode->i_mode &= ~S_ISGID;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	817
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	818	ip->i_disk_size = 0;
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	819	ip->i_df.if_nextents = 0;
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	820	ASSERT(ip->i_nblocks == 0);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	821
Deepa Dinamani	c2050a4	2016-09-14 07:48:06 -0700	[diff] [blame]	822	tv = current_time(inode);
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	823	inode->i_mtime = tv;
				824	inode->i_atime = tv;
				825	inode->i_ctime = tv;
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	826
Christoph Hellwig	031474c	2021-03-29 11:11:41 -0700	[diff] [blame]	827	ip->i_extsize = 0;
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	828	ip->i_diflags = 0;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	829
Christoph Hellwig	6471e9c	2020-03-18 08:15:11 -0700	[diff] [blame]	830	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
Jeff Layton	f0e2828	2017-12-11 06:35:19 -0500	[diff] [blame]	831	inode_set_iversion(inode, 1);
Christoph Hellwig	b33ce57	2021-03-29 11:11:42 -0700	[diff] [blame]	832	ip->i_cowextsize = 0;
Christoph Hellwig	e98d5e8	2021-03-29 11:11:45 -0700	[diff] [blame]	833	ip->i_crtime = tv;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	834	}
				835
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	836	flags = XFS_ILOG_CORE;
				837	switch (mode & S_IFMT) {
				838	case S_IFIFO:
				839	case S_IFCHR:
				840	case S_IFBLK:
				841	case S_IFSOCK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	842	ip->i_df.if_format = XFS_DINODE_FMT_DEV;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	843	flags \|= XFS_ILOG_DEV;
				844	break;
				845	case S_IFREG:
				846	case S_IFDIR:
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	847	if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	848	xfs_inode_inherit_flags(ip, pip);
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	849	if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
Darrick J. Wong	8a569d7	2020-09-13 10:16:40 -0700	[diff] [blame]	850	xfs_inode_inherit_flags2(ip, pip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	851	/* FALLTHROUGH */
				852	case S_IFLNK:
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	853	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	fcacbc3	2018-07-17 16:51:50 -0700	[diff] [blame]	854	ip->i_df.if_bytes = 0;
Christoph Hellwig	6bdcf26	2017-11-03 10:34:46 -0700	[diff] [blame]	855	ip->i_df.if_u1.if_root = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	856	break;
				857	default:
				858	ASSERT(0);
				859	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	860
				861	/*
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	862	* If we need to create attributes immediately after allocating the
				863	* inode, initialise an empty attribute fork right now. We use the
				864	* default fork offset for attributes here as we don't know exactly what
				865	* size or how many attributes we might be adding. We can do this
				866	* safely here because we know the data fork is completely empty and
				867	* this saves us from needing to run a separate transaction to set the
				868	* fork offset in the immediate future.
				869	*/
Dave Chinner	2442ee1	2021-04-06 07:01:00 -0700	[diff] [blame]	870	if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) {
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	871	ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	872	ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
				873	}
				874
				875	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	876	* Log the new values stuffed into the inode.
				877	*/
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	878	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	879	xfs_trans_log_inode(tp, ip, flags);
				880
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	881	/* now that we have an i_mode we can setup the inode structure */
Christoph Hellwig	41be8be	2008-08-13 16:23:13 +1000	[diff] [blame]	882	xfs_setup_inode(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	883
				884	*ipp = ip;
				885	return 0;
				886	}
				887
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	888	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	889	* Allocates a new inode from disk and return a pointer to the incore copy. This
				890	* routine will internally commit the current transaction and allocate a new one
				891	* if we needed to allocate more on-disk free inodes to perform the requested
				892	* operation.
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	893	*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	894	* If we are allocating quota inodes, we do not have a parent inode to attach to
				895	* or associate with (i.e. dp == NULL) because they are not linked into the
				896	* directory structure - they are attached directly to the superblock - and so
				897	* have no parent.
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	898	*/
				899	int
				900	xfs_dir_ialloc(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	901	struct user_namespace *mnt_userns,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	902	struct xfs_trans **tpp,
				903	struct xfs_inode *dp,
				904	umode_t mode,
				905	xfs_nlink_t nlink,
				906	dev_t rdev,
				907	prid_t prid,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	908	bool init_xattrs,
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	909	struct xfs_inode **ipp)
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	910	{
Dave Chinner	8d822dc	2020-12-09 10:05:16 -0800	[diff] [blame]	911	struct xfs_buf *agibp;
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	912	xfs_ino_t parent_ino = dp ? dp->i_ino : 0;
				913	xfs_ino_t ino;
				914	int error;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	915
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	916	ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	917
				918	/*
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	919	* Call the space management code to pick the on-disk inode to be
Dave Chinner	f3bf6e0	2020-12-09 10:05:16 -0800	[diff] [blame]	920	* allocated.
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	921	*/
Dave Chinner	8d822dc	2020-12-09 10:05:16 -0800	[diff] [blame]	922	error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp);
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	923	if (error)
				924	return error;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	925
Dave Chinner	8d822dc	2020-12-09 10:05:16 -0800	[diff] [blame]	926	if (!agibp)
Dave Chinner	1abcf26	2020-12-09 10:05:15 -0800	[diff] [blame]	927	return -ENOSPC;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	928
Dave Chinner	8d822dc	2020-12-09 10:05:16 -0800	[diff] [blame]	929	/* Allocate an inode from the selected AG */
				930	error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino);
				931	if (error)
				932	return error;
				933	ASSERT(ino != NULLFSINO);
				934
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	935	return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	936	prid, init_xattrs, ipp);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	937	}
				938
				939	/*
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	940	* Decrement the link count on an inode & log the change. If this causes the
				941	* link count to go to zero, move the inode to AGI unlinked list so that it can
				942	* be freed when the last active reference goes away via xfs_inactive().
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	943	*/
Eric Sandeen	0d5a75e	2016-06-01 17:38:15 +1000	[diff] [blame]	944	static int /* error */
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	945	xfs_droplink(
				946	xfs_trans_t *tp,
				947	xfs_inode_t *ip)
				948	{
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	949	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				950
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	951	drop_nlink(VFS_I(ip));
				952	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				953
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	954	if (VFS_I(ip)->i_nlink)
				955	return 0;
				956
				957	return xfs_iunlink(tp, ip);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	958	}
				959
				960	/*
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	961	* Increment the link count on an inode & log the change.
				962	*/
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	963	static void
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	964	xfs_bumplink(
				965	xfs_trans_t *tp,
				966	xfs_inode_t *ip)
				967	{
				968	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				969
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	970	inc_nlink(VFS_I(ip));
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	971	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	972	}
				973
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	974	int
				975	xfs_create(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	976	struct user_namespace *mnt_userns,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	977	xfs_inode_t *dp,
				978	struct xfs_name *name,
				979	umode_t mode,
Christoph Hellwig	66f3646	2017-10-19 11:07:09 -0700	[diff] [blame]	980	dev_t rdev,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	981	bool init_xattrs,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	982	xfs_inode_t **ipp)
				983	{
				984	int is_dir = S_ISDIR(mode);
				985	struct xfs_mount *mp = dp->i_mount;
				986	struct xfs_inode *ip = NULL;
				987	struct xfs_trans *tp = NULL;
				988	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	989	bool unlock_dp_on_error = false;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	990	prid_t prid;
				991	struct xfs_dquot *udqp = NULL;
				992	struct xfs_dquot *gdqp = NULL;
				993	struct xfs_dquot *pdqp = NULL;
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	994	struct xfs_trans_res *tres;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	995	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	996
				997	trace_xfs_create(dp, name);
				998
				999	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1000	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1001
Zhi Yong Wu	163467d	2013-12-18 08:22:39 +0800	[diff] [blame]	1002	prid = xfs_get_initial_prid(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1003
				1004	/*
				1005	* Make sure that we have allocated dquot(s) on disk.
				1006	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	1007	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				1008	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	1009	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1010	&udqp, &gdqp, &pdqp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1011	if (error)
				1012	return error;
				1013
				1014	if (is_dir) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1015	resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1016	tres = &M_RES(mp)->tr_mkdir;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1017	} else {
				1018	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1019	tres = &M_RES(mp)->tr_create;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1020	}
				1021
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1022	/*
				1023	* Initially assume that the file does not exist and
				1024	* reserve the resources for that case. If that is not
				1025	* the case we'll drop the one we have and get a more
				1026	* appropriate transaction later.
				1027	*/
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1028	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				1029	&tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1030	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1031	/* flush outstanding delalloc blocks and retry */
				1032	xfs_flush_inodes(mp);
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1033	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
				1034	resblks, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1035	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1036	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1037	goto out_release_dquots;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1038
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1039	xfs_ilock(dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1040	unlock_dp_on_error = true;
				1041
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	1042	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
				1043	XFS_IEXT_DIR_MANIP_CNT(mp));
				1044	if (error)
				1045	goto out_trans_cancel;
				1046
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1047	/*
				1048	* A newly created regular or special file just has one directory
				1049	* entry pointing to them, but a directory also the "." entry
				1050	* pointing to itself.
				1051	*/
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	1052	error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev,
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	1053	prid, init_xattrs, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1054	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1055	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1056
				1057	/*
				1058	* Now we join the directory inode to the transaction. We do not do it
				1059	* earlier because xfs_dir_ialloc might commit the previous transaction
				1060	* (and release all the locks). An error from here on will result in
				1061	* the transaction cancel unlocking dp so don't do it explicitly in the
				1062	* error path.
				1063	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1064	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1065	unlock_dp_on_error = false;
				1066
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1067	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
Kaixu Xia	63337b6	2020-03-27 08:28:39 -0700	[diff] [blame]	1068	resblks - XFS_IALLOC_SPACE_RES(mp));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1069	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1070	ASSERT(error != -ENOSPC);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1071	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1072	}
				1073	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1074	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1075
				1076	if (is_dir) {
				1077	error = xfs_dir_init(tp, ip, dp);
				1078	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	1079	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1080
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1081	xfs_bumplink(tp, dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1082	}
				1083
				1084	/*
				1085	* If this is a synchronous mount, make sure that the
				1086	* create transaction goes to disk before returning to
				1087	* the user.
				1088	*/
				1089	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				1090	xfs_trans_set_sync(tp);
				1091
				1092	/*
				1093	* Attach the dquot(s) to the inodes and modify them incore.
				1094	* These ids of the inode couldn't have changed since the new
				1095	* inode has been locked ever since it was created.
				1096	*/
				1097	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1098
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1099	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1100	if (error)
				1101	goto out_release_inode;
				1102
				1103	xfs_qm_dqrele(udqp);
				1104	xfs_qm_dqrele(gdqp);
				1105	xfs_qm_dqrele(pdqp);
				1106
				1107	*ipp = ip;
				1108	return 0;
				1109
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1110	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1111	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1112	out_release_inode:
				1113	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1114	* Wait until after the current transaction is aborted to finish the
				1115	* setup of the inode and release the inode. This prevents recursive
				1116	* transactions and deadlocks from xfs_inactive.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1117	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1118	if (ip) {
				1119	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1120	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1121	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1122	out_release_dquots:
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1123	xfs_qm_dqrele(udqp);
				1124	xfs_qm_dqrele(gdqp);
				1125	xfs_qm_dqrele(pdqp);
				1126
				1127	if (unlock_dp_on_error)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1128	xfs_iunlock(dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1129	return error;
				1130	}
				1131
				1132	int
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1133	xfs_create_tmpfile(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	1134	struct user_namespace *mnt_userns,
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1135	struct xfs_inode *dp,
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1136	umode_t mode,
				1137	struct xfs_inode **ipp)
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1138	{
				1139	struct xfs_mount *mp = dp->i_mount;
				1140	struct xfs_inode *ip = NULL;
				1141	struct xfs_trans *tp = NULL;
				1142	int error;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1143	prid_t prid;
				1144	struct xfs_dquot *udqp = NULL;
				1145	struct xfs_dquot *gdqp = NULL;
				1146	struct xfs_dquot *pdqp = NULL;
				1147	struct xfs_trans_res *tres;
				1148	uint resblks;
				1149
				1150	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1151	return -EIO;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1152
				1153	prid = xfs_get_initial_prid(dp);
				1154
				1155	/*
				1156	* Make sure that we have allocated dquot(s) on disk.
				1157	*/
Christian Brauner	a65e58e	2021-03-20 13:26:22 +0100	[diff] [blame]	1158	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
				1159	mapped_fsgid(mnt_userns), prid,
Darrick J. Wong	b5a0842	2021-03-02 09:32:52 -0800	[diff] [blame]	1160	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1161	&udqp, &gdqp, &pdqp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1162	if (error)
				1163	return error;
				1164
				1165	resblks = XFS_IALLOC_SPACE_RES(mp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1166	tres = &M_RES(mp)->tr_create_tmpfile;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1167
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1168	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
				1169	&tp);
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1170	if (error)
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1171	goto out_release_dquots;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1172
Dave Chinner	e6a688c	2021-03-22 09:52:03 -0700	[diff] [blame]	1173	error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid,
				1174	false, &ip);
Jan Kara	d6077aa	2015-07-29 11:52:08 +1000	[diff] [blame]	1175	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1176	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1177
				1178	if (mp->m_flags & XFS_MOUNT_WSYNC)
				1179	xfs_trans_set_sync(tp);
				1180
				1181	/*
				1182	* Attach the dquot(s) to the inodes and modify them incore.
				1183	* These ids of the inode couldn't have changed since the new
				1184	* inode has been locked ever since it was created.
				1185	*/
				1186	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1187
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1188	error = xfs_iunlink(tp, ip);
				1189	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1190	goto out_trans_cancel;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1191
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1192	error = xfs_trans_commit(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1193	if (error)
				1194	goto out_release_inode;
				1195
				1196	xfs_qm_dqrele(udqp);
				1197	xfs_qm_dqrele(gdqp);
				1198	xfs_qm_dqrele(pdqp);
				1199
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1200	*ipp = ip;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1201	return 0;
				1202
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1203	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1204	xfs_trans_cancel(tp);
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1205	out_release_inode:
				1206	/*
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1207	* Wait until after the current transaction is aborted to finish the
				1208	* setup of the inode and release the inode. This prevents recursive
				1209	* transactions and deadlocks from xfs_inactive.
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1210	*/
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1211	if (ip) {
				1212	xfs_finish_inode_setup(ip);
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	1213	xfs_irele(ip);
Dave Chinner	58c9047	2015-02-23 22:38:08 +1100	[diff] [blame]	1214	}
Darrick J. Wong	f2f7b9f	2021-01-27 12:07:57 -0800	[diff] [blame]	1215	out_release_dquots:
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1216	xfs_qm_dqrele(udqp);
				1217	xfs_qm_dqrele(gdqp);
				1218	xfs_qm_dqrele(pdqp);
				1219
				1220	return error;
				1221	}
				1222
				1223	int
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1224	xfs_link(
				1225	xfs_inode_t *tdp,
				1226	xfs_inode_t *sip,
				1227	struct xfs_name *target_name)
				1228	{
				1229	xfs_mount_t *mp = tdp->i_mount;
				1230	xfs_trans_t *tp;
				1231	int error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1232	int resblks;
				1233
				1234	trace_xfs_link(tdp, target_name);
				1235
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1236	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1237
				1238	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1239	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1240
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1241	error = xfs_qm_dqattach(sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1242	if (error)
				1243	goto std_return;
				1244
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1245	error = xfs_qm_dqattach(tdp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1246	if (error)
				1247	goto std_return;
				1248
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1249	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1250	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1251	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1252	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1253	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1254	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1255	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1256	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1257
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	1258	xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1259
				1260	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	1261	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1262
Chandan Babu R	f5d9274	2021-01-22 16:48:12 -0800	[diff] [blame]	1263	error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
				1264	XFS_IEXT_DIR_MANIP_CNT(mp));
				1265	if (error)
				1266	goto error_return;
				1267
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1268	/*
				1269	* If we are using project inheritance, we only allow hard link
				1270	* creation in our tree when the project IDs are the same; else
				1271	* the tree quota mechanism could be circumvented.
				1272	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	1273	if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	1274	tdp->i_projid != sip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1275	error = -EXDEV;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1276	goto error_return;
				1277	}
				1278
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	1279	if (!resblks) {
				1280	error = xfs_dir_canenter(tp, tdp, target_name);
				1281	if (error)
				1282	goto error_return;
				1283	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1284
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1285	/*
				1286	* Handle initial link state of O_TMPFILE inode
				1287	*/
				1288	if (VFS_I(sip)->i_nlink == 0) {
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1289	error = xfs_iunlink_remove(tp, sip);
				1290	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1291	goto error_return;
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1292	}
				1293
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1294	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	1295	resblks);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1296	if (error)
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1297	goto error_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1298	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1299	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				1300
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	1301	xfs_bumplink(tp, sip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1302
				1303	/*
				1304	* If this is a synchronous mount, make sure that the
				1305	* link transaction goes to disk before returning to
				1306	* the user.
				1307	*/
Eric Sandeen	f6106ef	2016-01-11 11:34:01 +1100	[diff] [blame]	1308	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1309	xfs_trans_set_sync(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1310
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1311	return xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1312
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1313	error_return:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1314	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1315	std_return:
				1316	return error;
				1317	}
				1318
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1319	/* Clear the reflink flag and the cowblocks tag if possible. */
				1320	static void
				1321	xfs_itruncate_clear_reflink_flags(
				1322	struct xfs_inode *ip)
				1323	{
				1324	struct xfs_ifork *dfork;
				1325	struct xfs_ifork *cfork;
				1326
				1327	if (!xfs_is_reflink_inode(ip))
				1328	return;
				1329	dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
				1330	cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
				1331	if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	1332	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
Darrick J. Wong	363e59b	2017-12-14 15:42:59 -0800	[diff] [blame]	1333	if (cfork->if_bytes == 0)
				1334	xfs_inode_clear_cowblocks_tag(ip);
				1335	}
				1336
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1337	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1338	* Free up the underlying blocks past new_size. The new size must be smaller
				1339	* than the current size. This routine can be used both for the attribute and
				1340	* data fork, and does not modify the inode size, which is left to the caller.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1341	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1342	* The transaction passed to this routine must have made a permanent log
				1343	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
				1344	* given transaction and start new ones, so make sure everything involved in
				1345	* the transaction is tidy before calling here. Some transaction will be
				1346	* returned to the caller to be committed. The incoming transaction must
				1347	* already include the inode, and both inode locks must be held exclusively.
				1348	* The inode must also be "held" within the transaction. On return the inode
				1349	* will be "held" within the returned transaction. This routine does NOT
				1350	* require any disk space to be reserved for it within the transaction.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1352	* If we get an error, we must return with the inode locked and linked into the
				1353	* current transaction. This keeps things simple for the higher level code,
				1354	* because it always knows that the inode is locked and held in the transaction
				1355	* that returns to it whether errors occur or not. We don't mark the inode
				1356	* dirty on error so that transactions can be easily aborted if possible.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1357	*/
				1358	int
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1359	xfs_itruncate_extents_flags(
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1360	struct xfs_trans **tpp,
				1361	struct xfs_inode *ip,
				1362	int whichfork,
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1363	xfs_fsize_t new_size,
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1364	int flags)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1365	{
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1366	struct xfs_mount *mp = ip->i_mount;
				1367	struct xfs_trans tp = tpp;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1368	xfs_fileoff_t first_unmap_block;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1369	xfs_filblks_t unmap_len;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1370	int error = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1371
Christoph Hellwig	0b56185	2012-07-04 11:13:31 -0400	[diff] [blame]	1372	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1373	ASSERT(!atomic_read(&VFS_I(ip)->i_count) \|\|
				1374	xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	1375	ASSERT(new_size <= XFS_ISIZE(ip));
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1376	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1377	ASSERT(ip->i_itemp != NULL);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1378	ASSERT(ip->i_itemp->ili_lock_flags == 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1379	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1380
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1381	trace_xfs_itruncate_extents_start(ip, new_size);
				1382
Brian Foster	4e52933	2018-05-10 09:35:42 -0700	[diff] [blame]	1383	flags \|= xfs_bmapi_aflag(whichfork);
Brian Foster	13b86fc	2018-05-09 08:45:04 -0700	[diff] [blame]	1384
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1385	/*
				1386	* Since it is possible for space to become allocated beyond
				1387	* the end of the file (in a crash where the space is allocated
				1388	* but the inode size is not yet updated), simply remove any
				1389	* blocks which show up between the new EOF and the maximum
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1390	* possible file size.
				1391	*
				1392	* We have to free all the blocks to the bmbt maximum offset, even if
				1393	* the page cache can't scale that far.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1394	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1395	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
Darrick J. Wong	33005fd	2020-12-04 13:28:35 -0800	[diff] [blame]	1396	if (!xfs_verify_fileoff(mp, first_unmap_block)) {
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1397	WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1398	return 0;
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1399	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1400
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1401	unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
				1402	while (unmap_len > 0) {
Brian Foster	02dff7b	2018-07-24 13:43:07 -0700	[diff] [blame]	1403	ASSERT(tp->t_firstblock == NULLFSBLOCK);
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1404	error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
				1405	flags, XFS_ITRUNC_MAX_EXTENTS);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1406	if (error)
Brian Foster	d5a2e28	2018-09-29 13:41:58 +1000	[diff] [blame]	1407	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1408
Brian Foster	6dd379c	2020-09-15 20:44:46 -0700	[diff] [blame]	1409	/* free the just unmapped extents */
Brian Foster	9e28a24	2018-07-24 13:43:15 -0700	[diff] [blame]	1410	error = xfs_defer_finish(&tp);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1411	if (error)
Brian Foster	9b1f4e9	2018-08-01 07:20:33 -0700	[diff] [blame]	1412	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1413	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1414
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1415	if (whichfork == XFS_DATA_FORK) {
				1416	/* Remove all pending CoW reservations. */
				1417	error = xfs_reflink_cancel_cow_blocks(ip, &tp,
Darrick J. Wong	4bbb04a	2020-01-02 13:20:13 -0800	[diff] [blame]	1418	first_unmap_block, XFS_MAX_FILEOFF, true);
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1419	if (error)
				1420	goto out;
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1421
Darrick J. Wong	4919d42	2018-04-10 08:28:33 -0700	[diff] [blame]	1422	xfs_itruncate_clear_reflink_flags(ip);
				1423	}
Darrick J. Wong	aa8968f	2016-10-03 09:11:38 -0700	[diff] [blame]	1424
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1425	/*
				1426	* Always re-log the inode so that our permanent transaction can keep
				1427	* on rolling it forward in the log.
				1428	*/
				1429	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1430
				1431	trace_xfs_itruncate_extents_end(ip, new_size);
				1432
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1433	out:
				1434	*tpp = tp;
				1435	return error;
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1436	}
				1437
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1438	int
				1439	xfs_release(
				1440	xfs_inode_t *ip)
				1441	{
				1442	xfs_mount_t *mp = ip->i_mount;
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1443	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1444
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1445	if (!S_ISREG(VFS_I(ip)->i_mode) \|\| (VFS_I(ip)->i_mode == 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1446	return 0;
				1447
				1448	/* If this is a read-only mount, don't do this (would generate I/O) */
				1449	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1450	return 0;
				1451
				1452	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1453	int truncated;
				1454
				1455	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1456	* If we previously truncated this file and removed old data
				1457	* in the process, we want to initiate "early" writeout on
				1458	* the last close. This is an attempt to combat the notorious
				1459	* NULL files problem which is particularly noticeable from a
				1460	* truncate down, buffered (re-)write (delalloc), followed by
				1461	* a crash. What we are effectively doing here is
				1462	* significantly reducing the time window where we'd otherwise
				1463	* be exposed to that problem.
				1464	*/
				1465	truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
				1466	if (truncated) {
				1467	xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
Dave Chinner	eac152b	2014-08-04 13:22:49 +1000	[diff] [blame]	1468	if (ip->i_delayed_blks > 0) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1469	error = filemap_flush(VFS_I(ip)->i_mapping);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1470	if (error)
				1471	return error;
				1472	}
				1473	}
				1474	}
				1475
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1476	if (VFS_I(ip)->i_nlink == 0)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1477	return 0;
				1478
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1479	/*
				1480	* If we can't get the iolock just skip truncating the blocks past EOF
				1481	* because we could deadlock with the mmap_lock otherwise. We'll get
				1482	* another chance to drop them once the last reference to the inode is
				1483	* dropped, so we'll never leak blocks permanently.
				1484	*/
				1485	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
				1486	return 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1487
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1488	if (xfs_can_free_eofblocks(ip, false)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1489	/*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1490	* Check if the inode is being opened, written and closed
				1491	* frequently and we have delayed allocation blocks outstanding
				1492	* (e.g. streaming writes from the NFS server), truncating the
				1493	* blocks past EOF will cause fragmentation to occur.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1494	*
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1495	* In this case don't do the truncation, but we have to be
				1496	* careful how we detect this case. Blocks beyond EOF show up as
				1497	* i_delayed_blks even when the inode is clean, so we need to
				1498	* truncate them away first before checking for a dirty release.
				1499	* Hence on the first dirty close we will still remove the
				1500	* speculative allocation, but after that we will leave it in
				1501	* place.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1502	*/
				1503	if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1504	goto out_unlock;
				1505
				1506	error = xfs_free_eofblocks(ip);
				1507	if (error)
				1508	goto out_unlock;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1509
				1510	/* delalloc blocks after truncation means it really is dirty */
				1511	if (ip->i_delayed_blks)
				1512	xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
				1513	}
Darrick J. Wong	7d88329	2021-03-23 16:59:31 -0700	[diff] [blame]	1514
				1515	out_unlock:
				1516	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				1517	return error;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1518	}
				1519
				1520	/*
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1521	* xfs_inactive_truncate
				1522	*
				1523	* Called to perform a truncate when an inode becomes unlinked.
				1524	*/
				1525	STATIC int
				1526	xfs_inactive_truncate(
				1527	struct xfs_inode *ip)
				1528	{
				1529	struct xfs_mount *mp = ip->i_mount;
				1530	struct xfs_trans *tp;
				1531	int error;
				1532
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	1533	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1534	if (error) {
				1535	ASSERT(XFS_FORCED_SHUTDOWN(mp));
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1536	return error;
				1537	}
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1538	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1539	xfs_trans_ijoin(tp, ip, 0);
				1540
				1541	/*
				1542	* Log the inode size first to prevent stale data exposure in the event
				1543	* of a system crash before the truncate completes. See the related
Jan Kara	69bca80	2016-05-26 14:46:43 +0200	[diff] [blame]	1544	* comment in xfs_vn_setattr_size() for details.
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1545	*/
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1546	ip->i_disk_size = 0;
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1547	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1548
				1549	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
				1550	if (error)
				1551	goto error_trans_cancel;
				1552
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1553	ASSERT(ip->i_df.if_nextents == 0);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1554
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1555	error = xfs_trans_commit(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1556	if (error)
				1557	goto error_unlock;
				1558
				1559	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1560	return 0;
				1561
				1562	error_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1563	xfs_trans_cancel(tp);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1564	error_unlock:
				1565	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1566	return error;
				1567	}
				1568
				1569	/*
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1570	* xfs_inactive_ifree()
				1571	*
				1572	* Perform the inode free when an inode is unlinked.
				1573	*/
				1574	STATIC int
				1575	xfs_inactive_ifree(
				1576	struct xfs_inode *ip)
				1577	{
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1578	struct xfs_mount *mp = ip->i_mount;
				1579	struct xfs_trans *tp;
				1580	int error;
				1581
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1582	/*
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1583	* We try to use a per-AG reservation for any block needed by the finobt
				1584	* tree, but as the finobt feature predates the per-AG reservation
				1585	* support a degraded file system might not have enough space for the
				1586	* reservation at mount time. In that case try to dip into the reserved
				1587	* pool and pray.
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1588	*
				1589	* Send a warning if the reservation does happen to fail, as the inode
				1590	* now remains allocated and sits on the unlinked list until the fs is
				1591	* repaired.
				1592	*/
Darrick J. Wong	e1f6ca1	2019-02-14 09:33:15 -0800	[diff] [blame]	1593	if (unlikely(mp->m_finobt_nores)) {
Christoph Hellwig	76d771b	2017-01-25 07:49:35 -0800	[diff] [blame]	1594	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
				1595	XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
				1596	&tp);
				1597	} else {
				1598	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
				1599	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1600	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1601	if (error == -ENOSPC) {
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1602	xfs_warn_ratelimited(mp,
				1603	"Failed to remove inode(s) from unlinked list. "
				1604	"Please free space, unmount and run xfs_repair.");
				1605	} else {
				1606	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1607	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1608	return error;
				1609	}
				1610
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1611	/*
				1612	* We do not hold the inode locked across the entire rolling transaction
				1613	* here. We only need to hold it for the first transaction that
				1614	* xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
				1615	* underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
				1616	* here breaks the relationship between cluster buffer invalidation and
				1617	* stale inode invalidation on cluster buffer item journal commit
				1618	* completion, and can result in leaving dirty stale inodes hanging
				1619	* around in memory.
				1620	*
				1621	* We have no need for serialising this inode operation against other
				1622	* operations - we freed the inode and hence reallocation is required
				1623	* and that will serialise on reallocating the space the deferops need
				1624	* to free. Hence we can unlock the inode on the first commit of
				1625	* the transaction rather than roll it right through the deferops. This
				1626	* avoids relogging the XFS_ISTALE inode.
				1627	*
				1628	* We check that xfs_ifree() hasn't grown an internal transaction roll
				1629	* by asserting that the inode is still locked when it returns.
				1630	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1631	xfs_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1632	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1633
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	1634	error = xfs_ifree(tp, ip);
Dave Chinner	96355d5a	2020-06-29 14:48:45 -0700	[diff] [blame]	1635	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1636	if (error) {
				1637	/*
				1638	* If we fail to free the inode, shut down. The cancel
				1639	* might do that, we need to make sure. Otherwise the
				1640	* inode might be lost for a long time or forever.
				1641	*/
				1642	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1643	xfs_notice(mp, "%s: xfs_ifree returned error %d",
				1644	__func__, error);
				1645	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
				1646	}
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	1647	xfs_trans_cancel(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1648	return error;
				1649	}
				1650
				1651	/*
				1652	* Credit the quota account(s). The inode is gone.
				1653	*/
				1654	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1655
				1656	/*
Brian Foster	d4a97a0	2015-08-19 10:01:40 +1000	[diff] [blame]	1657	* Just ignore errors at this point. There is nothing we can do except
				1658	* to try to keep going. Make sure it's not a silent error.
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1659	*/
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	1660	error = xfs_trans_commit(tp);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1661	if (error)
				1662	xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
				1663	__func__, error);
				1664
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1665	return 0;
				1666	}
				1667
				1668	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1669	* xfs_inactive
				1670	*
				1671	* This is called when the vnode reference count for the vnode
				1672	* goes to zero. If the file has been unlinked, then it must
				1673	* now be truncated. Also, we clear all of the read-ahead state
				1674	* kept for the inode here since the file is now closed.
				1675	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1676	void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1677	xfs_inactive(
				1678	xfs_inode_t *ip)
				1679	{
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1680	struct xfs_mount *mp;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1681	int error;
				1682	int truncate = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1683
				1684	/*
				1685	* If the inode is already free, then there can be nothing
				1686	* to clean up here.
				1687	*/
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1688	if (VFS_I(ip)->i_mode == 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1689	ASSERT(ip->i_df.if_broot_bytes == 0);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1690	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1691	}
				1692
				1693	mp = ip->i_mount;
Darrick J. Wong	17c12bc	2016-10-03 09:11:29 -0700	[diff] [blame]	1694	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1695
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1696	/* If this is a read-only mount, don't do this (would generate I/O) */
				1697	if (mp->m_flags & XFS_MOUNT_RDONLY)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1698	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1699
Darrick J. Wong	383e32b	2021-03-22 09:51:54 -0700	[diff] [blame]	1700	/* Metadata inodes require explicit resource cleanup. */
				1701	if (xfs_is_metadata_inode(ip))
				1702	return;
				1703
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1704	/* Try to clean out the cow blocks if there are any. */
Christoph Hellwig	51d6269	2018-07-17 16:51:51 -0700	[diff] [blame]	1705	if (xfs_inode_has_cow_data(ip))
Darrick J. Wong	6231848	2018-03-06 17:08:31 -0800	[diff] [blame]	1706	xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
				1707
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	1708	if (VFS_I(ip)->i_nlink != 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1709	/*
				1710	* force is true because we are evicting an inode from the
				1711	* cache. Post-eof blocks must be freed, lest we end up with
				1712	* broken free space accounting.
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1713	*
				1714	* Note: don't bother with iolock here since lockdep complains
				1715	* about acquiring it in reclaim context. We have the only
				1716	* reference to the inode at this point anyways.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1717	*/
Brian Foster	3b4683c	2017-04-11 10:50:05 -0700	[diff] [blame]	1718	if (xfs_can_free_eofblocks(ip, true))
Brian Foster	a36b926	2017-01-27 23:22:55 -0800	[diff] [blame]	1719	xfs_free_eofblocks(ip);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1720
				1721	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1722	}
				1723
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1724	if (S_ISREG(VFS_I(ip)->i_mode) &&
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	1725	(ip->i_disk_size != 0 \|\| XFS_ISIZE(ip) != 0 \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	1726	ip->i_df.if_nextents > 0 \|\| ip->i_delayed_blks > 0))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1727	truncate = 1;
				1728
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	1729	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1730	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1731	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1732
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	1733	if (S_ISLNK(VFS_I(ip)->i_mode))
Brian Foster	36b21dd	2013-09-20 11:06:09 -0400	[diff] [blame]	1734	error = xfs_inactive_symlink(ip);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1735	else if (truncate)
				1736	error = xfs_inactive_truncate(ip);
				1737	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1738	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1739
				1740	/*
				1741	* If there are attributes associated with the file then blow them away
				1742	* now. The code calls a routine that recursively deconstructs the
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1743	* attribute fork. If also blows away the in-core attribute fork.
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1744	*/
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1745	if (XFS_IFORK_Q(ip)) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1746	error = xfs_attr_inactive(ip);
				1747	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1748	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1749	}
				1750
Dave Chinner	6dfe5a0	2015-05-29 07:40:08 +1000	[diff] [blame]	1751	ASSERT(!ip->i_afp);
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	1752	ASSERT(ip->i_forkoff == 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1753
				1754	/*
				1755	* Free the inode.
				1756	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1757	error = xfs_inactive_ifree(ip);
				1758	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1759	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1760
				1761	/*
				1762	* Release the dquots held by inode, if any.
				1763	*/
				1764	xfs_qm_dqdetach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1765	}
				1766
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1767	/*
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1768	* In-Core Unlinked List Lookups
				1769	* =============================
				1770	*
				1771	* Every inode is supposed to be reachable from some other piece of metadata
				1772	* with the exception of the root directory. Inodes with a connection to a
				1773	* file descriptor but not linked from anywhere in the on-disk directory tree
				1774	* are collectively known as unlinked inodes, though the filesystem itself
				1775	* maintains links to these inodes so that on-disk metadata are consistent.
				1776	*
				1777	* XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
				1778	* header contains a number of buckets that point to an inode, and each inode
				1779	* record has a pointer to the next inode in the hash chain. This
				1780	* singly-linked list causes scaling problems in the iunlink remove function
				1781	* because we must walk that list to find the inode that points to the inode
				1782	* being removed from the unlinked hash bucket list.
				1783	*
				1784	* What if we modelled the unlinked list as a collection of records capturing
				1785	* "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd
				1786	* have a fast way to look up unlinked list predecessors, which avoids the
				1787	* slow list walk. That's exactly what we do here (in-core) with a per-AG
				1788	* rhashtable.
				1789	*
				1790	* Because this is a backref cache, we ignore operational failures since the
				1791	* iunlink code can fall back to the slow bucket walk. The only errors that
				1792	* should bubble out are for obviously incorrect situations.
				1793	*
				1794	* All users of the backref cache MUST hold the AGI buffer lock to serialize
				1795	* access or have otherwise provided for concurrency control.
				1796	*/
				1797
				1798	/* Capture a "X.next_unlinked = Y" relationship. */
				1799	struct xfs_iunlink {
				1800	struct rhash_head iu_rhash_head;
				1801	xfs_agino_t iu_agino; /* X */
				1802	xfs_agino_t iu_next_unlinked; /* Y */
				1803	};
				1804
				1805	/* Unlinked list predecessor lookup hashtable construction */
				1806	static int
				1807	xfs_iunlink_obj_cmpfn(
				1808	struct rhashtable_compare_arg *arg,
				1809	const void *obj)
				1810	{
				1811	const xfs_agino_t *key = arg->key;
				1812	const struct xfs_iunlink *iu = obj;
				1813
				1814	if (iu->iu_next_unlinked != *key)
				1815	return 1;
				1816	return 0;
				1817	}
				1818
				1819	static const struct rhashtable_params xfs_iunlink_hash_params = {
				1820	.min_size = XFS_AGI_UNLINKED_BUCKETS,
				1821	.key_len = sizeof(xfs_agino_t),
				1822	.key_offset = offsetof(struct xfs_iunlink,
				1823	iu_next_unlinked),
				1824	.head_offset = offsetof(struct xfs_iunlink, iu_rhash_head),
				1825	.automatic_shrinking = true,
				1826	.obj_cmpfn = xfs_iunlink_obj_cmpfn,
				1827	};
				1828
				1829	/*
				1830	* Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such
				1831	* relation is found.
				1832	*/
				1833	static xfs_agino_t
				1834	xfs_iunlink_lookup_backref(
				1835	struct xfs_perag *pag,
				1836	xfs_agino_t agino)
				1837	{
				1838	struct xfs_iunlink *iu;
				1839
				1840	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1841	xfs_iunlink_hash_params);
				1842	return iu ? iu->iu_agino : NULLAGINO;
				1843	}
				1844
				1845	/*
				1846	* Take ownership of an iunlink cache entry and insert it into the hash table.
				1847	* If successful, the entry will be owned by the cache; if not, it is freed.
				1848	* Either way, the caller does not own @iu after this call.
				1849	*/
				1850	static int
				1851	xfs_iunlink_insert_backref(
				1852	struct xfs_perag *pag,
				1853	struct xfs_iunlink *iu)
				1854	{
				1855	int error;
				1856
				1857	error = rhashtable_insert_fast(&pag->pagi_unlinked_hash,
				1858	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1859	/*
				1860	* Fail loudly if there already was an entry because that's a sign of
				1861	* corruption of in-memory data. Also fail loudly if we see an error
				1862	* code we didn't anticipate from the rhashtable code. Currently we
				1863	* only anticipate ENOMEM.
				1864	*/
				1865	if (error) {
				1866	WARN(error != -ENOMEM, "iunlink cache insert error %d", error);
				1867	kmem_free(iu);
				1868	}
				1869	/*
				1870	* Absorb any runtime errors that aren't a result of corruption because
				1871	* this is a cache and we can always fall back to bucket list scanning.
				1872	*/
				1873	if (error != 0 && error != -EEXIST)
				1874	error = 0;
				1875	return error;
				1876	}
				1877
				1878	/* Remember that @prev_agino.next_unlinked = @this_agino. */
				1879	static int
				1880	xfs_iunlink_add_backref(
				1881	struct xfs_perag *pag,
				1882	xfs_agino_t prev_agino,
				1883	xfs_agino_t this_agino)
				1884	{
				1885	struct xfs_iunlink *iu;
				1886
				1887	if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
				1888	return 0;
				1889
Tetsuo Handa	707e0dd	2019-08-26 12:06:22 -0700	[diff] [blame]	1890	iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	1891	iu->iu_agino = prev_agino;
				1892	iu->iu_next_unlinked = this_agino;
				1893
				1894	return xfs_iunlink_insert_backref(pag, iu);
				1895	}
				1896
				1897	/*
				1898	* Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked.
				1899	* If @next_unlinked is NULLAGINO, we drop the backref and exit. If there
				1900	* wasn't any such entry then we don't bother.
				1901	*/
				1902	static int
				1903	xfs_iunlink_change_backref(
				1904	struct xfs_perag *pag,
				1905	xfs_agino_t agino,
				1906	xfs_agino_t next_unlinked)
				1907	{
				1908	struct xfs_iunlink *iu;
				1909	int error;
				1910
				1911	/* Look up the old entry; if there wasn't one then exit. */
				1912	iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino,
				1913	xfs_iunlink_hash_params);
				1914	if (!iu)
				1915	return 0;
				1916
				1917	/*
				1918	* Remove the entry. This shouldn't ever return an error, but if we
				1919	* couldn't remove the old entry we don't want to add it again to the
				1920	* hash table, and if the entry disappeared on us then someone's
				1921	* violated the locking rules and we need to fail loudly. Either way
				1922	* we cannot remove the inode because internal state is or would have
				1923	* been corrupt.
				1924	*/
				1925	error = rhashtable_remove_fast(&pag->pagi_unlinked_hash,
				1926	&iu->iu_rhash_head, xfs_iunlink_hash_params);
				1927	if (error)
				1928	return error;
				1929
				1930	/* If there is no new next entry just free our item and return. */
				1931	if (next_unlinked == NULLAGINO) {
				1932	kmem_free(iu);
				1933	return 0;
				1934	}
				1935
				1936	/* Update the entry and re-add it to the hash table. */
				1937	iu->iu_next_unlinked = next_unlinked;
				1938	return xfs_iunlink_insert_backref(pag, iu);
				1939	}
				1940
				1941	/* Set up the in-core predecessor structures. */
				1942	int
				1943	xfs_iunlink_init(
				1944	struct xfs_perag *pag)
				1945	{
				1946	return rhashtable_init(&pag->pagi_unlinked_hash,
				1947	&xfs_iunlink_hash_params);
				1948	}
				1949
				1950	/* Free the in-core predecessor structures. */
				1951	static void
				1952	xfs_iunlink_free_item(
				1953	void *ptr,
				1954	void *arg)
				1955	{
				1956	struct xfs_iunlink *iu = ptr;
				1957	bool *freed_anything = arg;
				1958
				1959	*freed_anything = true;
				1960	kmem_free(iu);
				1961	}
				1962
				1963	void
				1964	xfs_iunlink_destroy(
				1965	struct xfs_perag *pag)
				1966	{
				1967	bool freed_anything = false;
				1968
				1969	rhashtable_free_and_destroy(&pag->pagi_unlinked_hash,
				1970	xfs_iunlink_free_item, &freed_anything);
				1971
				1972	ASSERT(freed_anything == false \|\| XFS_FORCED_SHUTDOWN(pag->pag_mount));
				1973	}
				1974
				1975	/*
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1976	* Point the AGI unlinked bucket at an inode and log the results. The caller
				1977	* is responsible for validating the old value.
				1978	*/
				1979	STATIC int
				1980	xfs_iunlink_update_bucket(
				1981	struct xfs_trans *tp,
				1982	xfs_agnumber_t agno,
				1983	struct xfs_buf *agibp,
				1984	unsigned int bucket_index,
				1985	xfs_agino_t new_agino)
				1986	{
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	1987	struct xfs_agi *agi = agibp->b_addr;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	1988	xfs_agino_t old_value;
				1989	int offset;
				1990
				1991	ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino));
				1992
				1993	old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				1994	trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index,
				1995	old_value, new_agino);
				1996
				1997	/*
				1998	* We should never find the head of the list already set to the value
				1999	* passed in because either we're adding or removing ourselves from the
				2000	* head of the list.
				2001	*/
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2002	if (old_value == new_agino) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	2003	xfs_buf_mark_corrupt(agibp);
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2004	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2005	}
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2006
				2007	agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
				2008	offset = offsetof(struct xfs_agi, agi_unlinked) +
				2009	(sizeof(xfs_agino_t) * bucket_index);
				2010	xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
				2011	return 0;
				2012	}
				2013
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2014	/* Set an on-disk inode's next_unlinked pointer. */
				2015	STATIC void
				2016	xfs_iunlink_update_dinode(
				2017	struct xfs_trans *tp,
				2018	xfs_agnumber_t agno,
				2019	xfs_agino_t agino,
				2020	struct xfs_buf *ibp,
				2021	struct xfs_dinode *dip,
				2022	struct xfs_imap *imap,
				2023	xfs_agino_t next_agino)
				2024	{
				2025	struct xfs_mount *mp = tp->t_mountp;
				2026	int offset;
				2027
				2028	ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
				2029
				2030	trace_xfs_iunlink_update_dinode(mp, agno, agino,
				2031	be32_to_cpu(dip->di_next_unlinked), next_agino);
				2032
				2033	dip->di_next_unlinked = cpu_to_be32(next_agino);
				2034	offset = imap->im_boffset +
				2035	offsetof(struct xfs_dinode, di_next_unlinked);
				2036
				2037	/* need to recalc the inode CRC if appropriate */
				2038	xfs_dinode_calc_crc(mp, dip);
				2039	xfs_trans_inode_buf(tp, ibp);
				2040	xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2041	}
				2042
				2043	/* Set an in-core inode's unlinked pointer and return the old value. */
				2044	STATIC int
				2045	xfs_iunlink_update_inode(
				2046	struct xfs_trans *tp,
				2047	struct xfs_inode *ip,
				2048	xfs_agnumber_t agno,
				2049	xfs_agino_t next_agino,
				2050	xfs_agino_t *old_next_agino)
				2051	{
				2052	struct xfs_mount *mp = tp->t_mountp;
				2053	struct xfs_dinode *dip;
				2054	struct xfs_buf *ibp;
				2055	xfs_agino_t old_value;
				2056	int error;
				2057
				2058	ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino));
				2059
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2060	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2061	if (error)
				2062	return error;
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2063	dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2064
				2065	/* Make sure the old pointer isn't garbage. */
				2066	old_value = be32_to_cpu(dip->di_next_unlinked);
				2067	if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2068	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
				2069	sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2070	error = -EFSCORRUPTED;
				2071	goto out;
				2072	}
				2073
				2074	/*
				2075	* Since we're updating a linked list, we should never find that the
				2076	* current pointer is the same as the new value, unless we're
				2077	* terminating the list.
				2078	*/
				2079	*old_next_agino = old_value;
				2080	if (old_value == next_agino) {
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2081	if (next_agino != NULLAGINO) {
				2082	xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
				2083	dip, sizeof(*dip), __this_address);
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2084	error = -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2085	}
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2086	goto out;
				2087	}
				2088
				2089	/* Ok, update the new pointer. */
				2090	xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino),
				2091	ibp, dip, &ip->i_imap, next_agino);
				2092	return 0;
				2093	out:
				2094	xfs_trans_brelse(tp, ibp);
				2095	return error;
				2096	}
				2097
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2098	/*
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2099	* This is called when the inode's link count has gone to 0 or we are creating
				2100	* a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2101	*
				2102	* We place the on-disk inode on a list in the AGI. It will be pulled from this
				2103	* list when the inode is freed.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2104	*/
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2105	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2106	xfs_iunlink(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2107	struct xfs_trans *tp,
				2108	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2109	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2110	struct xfs_mount *mp = tp->t_mountp;
				2111	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2112	struct xfs_buf *agibp;
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2113	xfs_agino_t next_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2114	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
				2115	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2116	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2117	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2118
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	2119	ASSERT(VFS_I(ip)->i_nlink == 0);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2120	ASSERT(VFS_I(ip)->i_mode != 0);
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2121	trace_xfs_iunlink(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2122
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2123	/* Get the agi buffer first. It ensures lock ordering on the list. */
				2124	error = xfs_read_agi(mp, tp, agno, &agibp);
Vlad Apostolov	859d718	2007-10-11 17:44:18 +1000	[diff] [blame]	2125	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2126	return error;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2127	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2128
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2129	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2130	* Get the index into the agi hash table for the list this inode will
				2131	* go on. Make sure the pointer isn't garbage and that this inode
				2132	* isn't already on the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2133	*/
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2134	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				2135	if (next_agino == agino \|\|
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2136	!xfs_verify_agino_or_null(mp, agno, next_agino)) {
Darrick J. Wong	8d57c21	2020-03-11 10:37:54 -0700	[diff] [blame]	2137	xfs_buf_mark_corrupt(agibp);
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2138	return -EFSCORRUPTED;
Darrick J. Wong	a5155b8	2019-11-02 09:40:53 -0700	[diff] [blame]	2139	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2140
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2141	if (next_agino != NULLAGINO) {
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2142	xfs_agino_t old_agino;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2143
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2144	/*
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2145	* There is already another inode in the bucket, so point this
				2146	* inode to the current head of the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2147	*/
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2148	error = xfs_iunlink_update_inode(tp, ip, agno, next_agino,
				2149	&old_agino);
Vlad Apostolov	c319b58	2007-11-23 16:27:51 +1100	[diff] [blame]	2150	if (error)
				2151	return error;
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2152	ASSERT(old_agino == NULLAGINO);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2153
				2154	/*
				2155	* agino has been unlinked, add a backref from the next inode
				2156	* back to agino.
				2157	*/
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2158	error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2159	if (error)
				2160	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2161	}
				2162
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2163	/* Point the head of the list to point to this inode. */
				2164	return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2165	}
				2166
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2167	/* Return the imap, dinode pointer, and buffer for an inode. */
				2168	STATIC int
				2169	xfs_iunlink_map_ino(
				2170	struct xfs_trans *tp,
				2171	xfs_agnumber_t agno,
				2172	xfs_agino_t agino,
				2173	struct xfs_imap *imap,
				2174	struct xfs_dinode **dipp,
				2175	struct xfs_buf **bpp)
				2176	{
				2177	struct xfs_mount *mp = tp->t_mountp;
				2178	int error;
				2179
				2180	imap->im_blkno = 0;
				2181	error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0);
				2182	if (error) {
				2183	xfs_warn(mp, "%s: xfs_imap returned error %d.",
				2184	__func__, error);
				2185	return error;
				2186	}
				2187
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2188	error = xfs_imap_to_bp(mp, tp, imap, bpp);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2189	if (error) {
				2190	xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
				2191	__func__, error);
				2192	return error;
				2193	}
				2194
Christoph Hellwig	af9dcdd	2021-03-29 11:11:37 -0700	[diff] [blame]	2195	dipp = xfs_buf_offset(bpp, imap->im_boffset);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2196	return 0;
				2197	}
				2198
				2199	/*
				2200	* Walk the unlinked chain from @head_agino until we find the inode that
				2201	* points to @target_agino. Return the inode number, map, dinode pointer,
				2202	* and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp.
				2203	*
				2204	* @tp, @pag, @head_agino, and @target_agino are input parameters.
				2205	* @agino, @imap, @dipp, and @bpp are all output parameters.
				2206	*
				2207	* Do not call this function if @target_agino is the head of the list.
				2208	*/
				2209	STATIC int
				2210	xfs_iunlink_map_prev(
				2211	struct xfs_trans *tp,
				2212	xfs_agnumber_t agno,
				2213	xfs_agino_t head_agino,
				2214	xfs_agino_t target_agino,
				2215	xfs_agino_t *agino,
				2216	struct xfs_imap *imap,
				2217	struct xfs_dinode **dipp,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2218	struct xfs_buf **bpp,
				2219	struct xfs_perag *pag)
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2220	{
				2221	struct xfs_mount *mp = tp->t_mountp;
				2222	xfs_agino_t next_agino;
				2223	int error;
				2224
				2225	ASSERT(head_agino != target_agino);
				2226	*bpp = NULL;
				2227
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2228	/* See if our backref cache can find it faster. */
				2229	*agino = xfs_iunlink_lookup_backref(pag, target_agino);
				2230	if (*agino != NULLAGINO) {
				2231	error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp);
				2232	if (error)
				2233	return error;
				2234
				2235	if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino)
				2236	return 0;
				2237
				2238	/*
				2239	* If we get here the cache contents were corrupt, so drop the
				2240	* buffer and fall back to walking the bucket list.
				2241	*/
				2242	xfs_trans_brelse(tp, *bpp);
				2243	*bpp = NULL;
				2244	WARN_ON_ONCE(1);
				2245	}
				2246
				2247	trace_xfs_iunlink_map_prev_fallback(mp, agno);
				2248
				2249	/* Otherwise, walk the entire bucket until we find it. */
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2250	next_agino = head_agino;
				2251	while (next_agino != target_agino) {
				2252	xfs_agino_t unlinked_agino;
				2253
				2254	if (*bpp)
				2255	xfs_trans_brelse(tp, *bpp);
				2256
				2257	*agino = next_agino;
				2258	error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp,
				2259	bpp);
				2260	if (error)
				2261	return error;
				2262
				2263	unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked);
				2264	/*
				2265	* Make sure this pointer is valid and isn't an obvious
				2266	* infinite loop.
				2267	*/
				2268	if (!xfs_verify_agino(mp, agno, unlinked_agino) \|\|
				2269	next_agino == unlinked_agino) {
				2270	XFS_CORRUPTION_ERROR(__func__,
				2271	XFS_ERRLEVEL_LOW, mp,
				2272	dipp, sizeof(*dipp));
				2273	error = -EFSCORRUPTED;
				2274	return error;
				2275	}
				2276	next_agino = unlinked_agino;
				2277	}
				2278
				2279	return 0;
				2280	}
				2281
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2282	/*
				2283	* Pull the on-disk inode from the AGI unlinked list.
				2284	*/
				2285	STATIC int
				2286	xfs_iunlink_remove(
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2287	struct xfs_trans *tp,
				2288	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2289	{
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2290	struct xfs_mount *mp = tp->t_mountp;
				2291	struct xfs_agi *agi;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2292	struct xfs_buf *agibp;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2293	struct xfs_buf *last_ibp;
				2294	struct xfs_dinode *last_dip = NULL;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2295	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
				2296	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2297	xfs_agino_t next_agino;
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2298	xfs_agino_t head_agino;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2299	short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2300	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2301
Darrick J. Wong	4664c66	2019-02-07 10:37:16 -0800	[diff] [blame]	2302	trace_xfs_iunlink_remove(ip);
				2303
Darrick J. Wong	5837f62	2019-02-07 10:37:13 -0800	[diff] [blame]	2304	/* Get the agi buffer first. It ensures lock ordering on the list. */
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2305	error = xfs_read_agi(mp, tp, agno, &agibp);
				2306	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2307	return error;
Christoph Hellwig	370c782	2020-03-10 08:57:29 -0700	[diff] [blame]	2308	agi = agibp->b_addr;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2309
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2310	/*
Darrick J. Wong	86bfd37	2019-02-07 10:37:14 -0800	[diff] [blame]	2311	* Get the index into the agi hash table for the list this inode will
				2312	* go on. Make sure the head pointer isn't garbage.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2313	*/
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2314	head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
				2315	if (!xfs_verify_agino(mp, agno, head_agino)) {
Darrick J. Wong	d2e7366	2018-06-04 11:27:51 -0700	[diff] [blame]	2316	XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
				2317	agi, sizeof(*agi));
				2318	return -EFSCORRUPTED;
				2319	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2320
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2321	/*
				2322	* Set our inode's next_unlinked pointer to NULL and then return
				2323	* the old pointer value so that we can update whatever was previous
				2324	* to us in the list to point to whatever was next in the list.
				2325	*/
				2326	error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino);
				2327	if (error)
				2328	return error;
Darrick J. Wong	9a4a511	2019-02-07 10:37:14 -0800	[diff] [blame]	2329
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2330	/*
				2331	* If there was a backref pointing from the next inode back to this
				2332	* one, remove it because we've removed this inode from the list.
				2333	*
				2334	* Later, if this inode was in the middle of the list we'll update
				2335	* this inode's backref to point from the next inode.
				2336	*/
				2337	if (next_agino != NULLAGINO) {
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2338	error = xfs_iunlink_change_backref(agibp->b_pag, next_agino,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2339	NULLAGINO);
				2340	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2341	return error;
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2342	}
				2343
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2344	if (head_agino != agino) {
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2345	struct xfs_imap imap;
				2346	xfs_agino_t prev_agino;
				2347
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2348	/* We need to search the list for the inode being freed. */
Darrick J. Wong	b1d2a06	2019-02-07 10:37:15 -0800	[diff] [blame]	2349	error = xfs_iunlink_map_prev(tp, agno, head_agino, agino,
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2350	&prev_agino, &imap, &last_dip, &last_ibp,
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2351	agibp->b_pag);
Darrick J. Wong	23ffa52	2019-02-07 10:37:15 -0800	[diff] [blame]	2352	if (error)
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2353	return error;
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2354
Darrick J. Wong	f2fc16a	2019-02-07 10:37:15 -0800	[diff] [blame]	2355	/* Point the previous inode on the list to the next inode. */
				2356	xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp,
				2357	last_dip, &imap, next_agino);
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2358
				2359	/*
				2360	* Now we deal with the backref for this inode. If this inode
				2361	* pointed at a real inode, change the backref that pointed to
				2362	* us to point to our old next. If this inode was the end of
				2363	* the list, delete the backref that pointed to us. Note that
				2364	* change_backref takes care of deleting the backref if
				2365	* next_agino is NULLAGINO.
				2366	*/
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2367	return xfs_iunlink_change_backref(agibp->b_pag, agino,
				2368	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2369	}
Darrick J. Wong	9b24717	2019-02-07 10:37:16 -0800	[diff] [blame]	2370
Gao Xiang	92a0054	2020-07-13 09:13:00 -0700	[diff] [blame]	2371	/* Point the head of the list to the next unlinked inode. */
				2372	return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index,
				2373	next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2374	}
				2375
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2376	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2377	* Look up the inode number specified and if it is not already marked XFS_ISTALE
				2378	* mark it stale. We should only find clean inodes in this lookup that aren't
				2379	* already stale.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2380	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2381	static void
				2382	xfs_ifree_mark_inode_stale(
				2383	struct xfs_buf *bp,
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2384	struct xfs_inode *free_ip,
Brian Foster	d9fdd0a	2020-04-02 08:18:57 -0700	[diff] [blame]	2385	xfs_ino_t inum)
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2386	{
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2387	struct xfs_mount *mp = bp->b_mount;
				2388	struct xfs_perag *pag = bp->b_pag;
				2389	struct xfs_inode_log_item *iip;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2390	struct xfs_inode *ip;
				2391
				2392	retry:
				2393	rcu_read_lock();
				2394	ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
				2395
				2396	/* Inode not in memory, nothing to do */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2397	if (!ip) {
				2398	rcu_read_unlock();
				2399	return;
				2400	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2401
				2402	/*
				2403	* because this is an RCU protected lookup, we could find a recently
				2404	* freed or even reallocated inode during the lookup. We need to check
				2405	* under the i_flags_lock for a valid inode here. Skip it if it is not
				2406	* valid, the wrong inode or stale.
				2407	*/
				2408	spin_lock(&ip->i_flags_lock);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2409	if (ip->i_ino != inum \|\| __xfs_iflags_test(ip, XFS_ISTALE))
				2410	goto out_iflags_unlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2411
				2412	/*
				2413	* Don't try to lock/unlock the current inode, but we _cannot_ skip the
				2414	* other inodes that we did not find in the list attached to the buffer
				2415	* and are not already marked stale. If we can't lock it, back off and
				2416	* retry.
				2417	*/
				2418	if (ip != free_ip) {
				2419	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2420	spin_unlock(&ip->i_flags_lock);
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2421	rcu_read_unlock();
				2422	delay(1);
				2423	goto retry;
				2424	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2425	}
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2426	ip->i_flags \|= XFS_ISTALE;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2427
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2428	/*
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2429	* If the inode is flushing, it is already attached to the buffer. All
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2430	* we needed to do here is mark the inode stale so buffer IO completion
				2431	* will remove it from the AIL.
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2432	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2433	iip = ip->i_itemp;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2434	if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2435	ASSERT(!list_empty(&iip->ili_item.li_bio_list));
				2436	ASSERT(iip->ili_last_fields);
				2437	goto out_iunlock;
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2438	}
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2439
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2440	/*
Dave Chinner	48d55e2	2020-06-29 14:49:18 -0700	[diff] [blame]	2441	* Inodes not attached to the buffer can be released immediately.
				2442	* Everything else has to go through xfs_iflush_abort() on journal
				2443	* commit as the flock synchronises removal of the inode from the
				2444	* cluster buffer against inode reclaim.
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2445	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2446	if (!iip \|\| list_empty(&iip->ili_item.li_bio_list))
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2447	goto out_iunlock;
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2448
				2449	__xfs_iflags_set(ip, XFS_IFLUSHING);
				2450	spin_unlock(&ip->i_flags_lock);
				2451	rcu_read_unlock();
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2452
				2453	/* we have a dirty inode in memory that has not yet been flushed. */
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2454	spin_lock(&iip->ili_lock);
				2455	iip->ili_last_fields = iip->ili_fields;
				2456	iip->ili_fields = 0;
				2457	iip->ili_fsync_fields = 0;
				2458	spin_unlock(&iip->ili_lock);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2459	ASSERT(iip->ili_last_fields);
				2460
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2461	if (ip != free_ip)
				2462	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				2463	return;
				2464
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2465	out_iunlock:
				2466	if (ip != free_ip)
				2467	xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2468	out_iflags_unlock:
				2469	spin_unlock(&ip->i_flags_lock);
				2470	rcu_read_unlock();
Dave Chinner	5806165	2020-03-24 20:10:30 -0700	[diff] [blame]	2471	}
				2472
				2473	/*
Zhi Yong Wu	0b8182d	2013-08-12 03:14:59 +0000	[diff] [blame]	2474	* A big issue when freeing the inode cluster is that we _cannot_ skip any
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2475	* inodes that are in memory - they all must be marked stale and attached to
				2476	* the cluster buffer.
				2477	*/
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2478	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2479	xfs_ifree_cluster(
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2480	struct xfs_inode *free_ip,
				2481	struct xfs_trans *tp,
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2482	struct xfs_icluster *xic)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2483	{
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2484	struct xfs_mount *mp = free_ip->i_mount;
				2485	struct xfs_ino_geometry *igeo = M_IGEO(mp);
				2486	struct xfs_buf *bp;
				2487	xfs_daddr_t blkno;
				2488	xfs_ino_t inum = xic->first_ino;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2489	int nbufs;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2490	int i, j;
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2491	int ioffset;
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2492	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2493
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2494	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2495
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2496	for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2497	/*
				2498	* The allocation bitmap tells us which inodes of the chunk were
				2499	* physically allocated. Skip the cluster if an inode falls into
				2500	* a sparse region.
				2501	*/
Brian Foster	3cdaa18	2015-06-04 13:03:34 +1000	[diff] [blame]	2502	ioffset = inum - xic->first_ino;
				2503	if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
Darrick J. Wong	ef32595	2019-06-05 11:19:34 -0700	[diff] [blame]	2504	ASSERT(ioffset % igeo->inodes_per_cluster == 0);
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2505	continue;
				2506	}
				2507
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2508	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
				2509	XFS_INO_TO_AGBNO(mp, inum));
				2510
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2511	/*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2512	* We obtain and lock the backing buffer first in the process
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	2513	* here to ensure dirty inodes attached to the buffer remain in
				2514	* the flushing state while we mark them stale.
				2515	*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2516	* If we scan the in-memory inodes first, then buffer IO can
				2517	* complete before we get a lock on it, and hence we may fail
				2518	* to mark all the active inodes on the buffer stale.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2519	*/
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2520	error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
				2521	mp->m_bsize * igeo->blocks_per_cluster,
				2522	XBF_UNMAPPED, &bp);
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2523	if (error)
Darrick J. Wong	ce92464	2020-01-23 17:01:18 -0800	[diff] [blame]	2524	return error;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2525
				2526	/*
				2527	* This buffer may not have been correctly initialised as we
				2528	* didn't read it from disk. That's not important because we are
				2529	* only using to mark the buffer as stale in the log, and to
				2530	* attach stale cached inodes on it. That means it will never be
				2531	* dispatched for IO. If it is, we want to know about it, and we
				2532	* want it to fail. We can acheive this by adding a write
				2533	* verifier to the buffer.
				2534	*/
Colin Ian King	8c4ce79	2018-12-12 08:46:20 -0800	[diff] [blame]	2535	bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2536
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2537	/*
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2538	* Now we need to set all the cached clean inodes as XFS_ISTALE,
				2539	* too. This requires lookups, and will skip inodes that we've
				2540	* already marked XFS_ISTALE.
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2541	*/
Dave Chinner	71e3e35	2020-06-29 14:49:18 -0700	[diff] [blame]	2542	for (i = 0; i < igeo->inodes_per_cluster; i++)
				2543	xfs_ifree_mark_inode_stale(bp, free_ip, inum + i);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2544
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2545	xfs_trans_stale_inode_buf(tp, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2546	xfs_trans_binval(tp, bp);
				2547	}
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2548	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2549	}
				2550
				2551	/*
				2552	* This is called to return an inode to the inode free list.
				2553	* The inode should already be truncated to 0 length and have
				2554	* no pages associated with it. This routine also assumes that
				2555	* the inode is already a part of the transaction.
				2556	*
				2557	* The on-disk copy of the inode will have been added to the list
				2558	* of unlinked inodes in the AGI. We need to remove the inode from
				2559	* that list atomically with respect to freeing it here.
				2560	*/
				2561	int
				2562	xfs_ifree(
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	2563	struct xfs_trans *tp,
				2564	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2565	{
				2566	int error;
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2567	struct xfs_icluster xic = { 0 };
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2568	struct xfs_inode_log_item *iip = ip->i_itemp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2569
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2570	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2571	ASSERT(VFS_I(ip)->i_nlink == 0);
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	2572	ASSERT(ip->i_df.if_nextents == 0);
Christoph Hellwig	13d2c10	2021-03-29 11:11:40 -0700	[diff] [blame]	2573	ASSERT(ip->i_disk_size == 0 \|\| !S_ISREG(VFS_I(ip)->i_mode));
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	2574	ASSERT(ip->i_nblocks == 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2575
				2576	/*
				2577	* Pull the on-disk inode from the AGI unlinked list.
				2578	*/
				2579	error = xfs_iunlink_remove(tp, ip);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2580	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2581	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2582
Brian Foster	0e0417f	2018-07-11 22:26:07 -0700	[diff] [blame]	2583	error = xfs_difree(tp, ip->i_ino, &xic);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2584	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2585	return error;
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2586
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2587	/*
				2588	* Free any local-format data sitting around before we reset the
				2589	* data fork to extents format. Note that the attr fork data has
				2590	* already been freed by xfs_attr_inactive.
				2591	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2592	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
Christoph Hellwig	b2c2004	2020-05-18 10:27:21 -0700	[diff] [blame]	2593	kmem_free(ip->i_df.if_u1.if_data);
				2594	ip->i_df.if_u1.if_data = NULL;
				2595	ip->i_df.if_bytes = 0;
				2596	}
Darrick J. Wong	98c4f78	2017-11-22 12:21:07 -0800	[diff] [blame]	2597
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2598	VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	2599	ip->i_diflags = 0;
Christoph Hellwig	3e09ab8	2021-03-29 11:11:45 -0700	[diff] [blame]	2600	ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2;
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	2601	ip->i_forkoff = 0; /* mark the attr fork not in use */
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	2602	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
Christoph Hellwig	9b3beb0	2021-03-29 11:11:38 -0700	[diff] [blame]	2603	if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
				2604	xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2605
				2606	/* Don't attempt to replay owner changes for a deleted inode */
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	2607	spin_lock(&iip->ili_lock);
				2608	iip->ili_fields &= ~(XFS_ILOG_AOWNER \| XFS_ILOG_DOWNER);
				2609	spin_unlock(&iip->ili_lock);
Eric Sandeen	dc1baa7	2018-03-28 17:48:08 -0700	[diff] [blame]	2610
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2611	/*
				2612	* Bump the generation count so no one will be confused
				2613	* by reincarnations of this inode.
				2614	*/
Dave Chinner	9e9a267	2016-02-09 16:54:58 +1100	[diff] [blame]	2615	VFS_I(ip)->i_generation++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2616	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				2617
Brian Foster	09b5660	2015-05-29 09:26:03 +1000	[diff] [blame]	2618	if (xic.deleted)
				2619	error = xfs_ifree_cluster(ip, tp, &xic);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2620
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2621	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2622	}
				2623
				2624	/*
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2625	* This is called to unpin an inode. The caller must have the inode locked
				2626	* in at least shared mode so that the buffer cannot be subsequently pinned
				2627	* once someone is waiting for it to be unpinned.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2628	*/
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2629	static void
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2630	xfs_iunpin(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2631	struct xfs_inode *ip)
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2632	{
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2633	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2634
Dave Chinner	4aaf15d	2010-03-08 11:24:07 +1100	[diff] [blame]	2635	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
				2636
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2637	/* Give the log a push to start the unpinning I/O */
Christoph Hellwig	656de4f	2018-03-13 23:15:28 -0700	[diff] [blame]	2638	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	2639
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2640	}
				2641
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2642	static void
				2643	__xfs_iunpin_wait(
				2644	struct xfs_inode *ip)
				2645	{
				2646	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
				2647	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
				2648
				2649	xfs_iunpin(ip);
				2650
				2651	do {
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2652	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2653	if (xfs_ipincount(ip))
				2654	io_schedule();
				2655	} while (xfs_ipincount(ip));
Ingo Molnar	2141713	2017-03-05 11:25:39 +0100	[diff] [blame]	2656	finish_wait(wq, &wait.wq_entry);
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2657	}
				2658
Dave Chinner	777df5a	2010-02-06 12:37:26 +1100	[diff] [blame]	2659	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2660	xfs_iunpin_wait(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2661	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2662	{
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2663	if (xfs_ipincount(ip))
				2664	__xfs_iunpin_wait(ip);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2665	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2666
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2667	/*
				2668	* Removing an inode from the namespace involves removing the directory entry
				2669	* and dropping the link count on the inode. Removing the directory entry can
				2670	* result in locking an AGF (directory blocks were freed) and removing a link
				2671	* count can result in placing the inode on an unlinked list which results in
				2672	* locking an AGI.
				2673	*
				2674	* The big problem here is that we have an ordering constraint on AGF and AGI
				2675	* locking - inode allocation locks the AGI, then can allocate a new extent for
				2676	* new inodes, locking the AGF after the AGI. Similarly, freeing the inode
				2677	* removes the inode from the unlinked list, requiring that we lock the AGI
				2678	* first, and then freeing the inode can result in an inode chunk being freed
				2679	* and hence freeing disk space requiring that we lock an AGF.
				2680	*
				2681	* Hence the ordering that is imposed by other parts of the code is AGI before
				2682	* AGF. This means we cannot remove the directory entry before we drop the inode
				2683	* reference count and put it on the unlinked list as this results in a lock
				2684	* order of AGF then AGI, and this can deadlock against inode allocation and
				2685	* freeing. Therefore we must drop the link counts before we remove the
				2686	* directory entry.
				2687	*
				2688	* This is still safe from a transactional point of view - it is not until we
Darrick J. Wong	310a75a	2016-08-03 11:18:10 +1000	[diff] [blame]	2689	* get to xfs_defer_finish() that we have the possibility of multiple
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2690	* transactions in this operation. Hence as long as we remove the directory
				2691	* entry and drop the link count in the first transaction of the remove
				2692	* operation, there are no transactional constraints on the ordering here.
				2693	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2694	int
				2695	xfs_remove(
				2696	xfs_inode_t *dp,
				2697	struct xfs_name *name,
				2698	xfs_inode_t *ip)
				2699	{
				2700	xfs_mount_t *mp = dp->i_mount;
				2701	xfs_trans_t *tp = NULL;
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2702	int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2703	int error = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2704	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2705
				2706	trace_xfs_remove(dp, name);
				2707
				2708	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2709	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2710
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2711	error = xfs_qm_dqattach(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2712	if (error)
				2713	goto std_return;
				2714
Darrick J. Wong	c14cfcc	2018-05-04 15:30:21 -0700	[diff] [blame]	2715	error = xfs_qm_dqattach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2716	if (error)
				2717	goto std_return;
				2718
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2719	/*
				2720	* We try to get the real space reservation first,
				2721	* allowing for directory btree deletion(s) implying
				2722	* possible bmap insert(s). If we can't get the space
				2723	* reservation then we use 0 instead, and avoid the bmap
				2724	* btree insert(s) in the directory code by, if the bmap
				2725	* insert tries to happen, instead trimming the LAST
				2726	* block from the directory.
				2727	*/
				2728	resblks = XFS_REMOVE_SPACE_RES(mp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2729	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2730	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2731	resblks = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2732	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
				2733	&tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2734	}
				2735	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2736	ASSERT(error != -ENOSPC);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	2737	goto std_return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2738	}
				2739
Darrick J. Wong	7c2d238	2018-01-26 15:27:33 -0800	[diff] [blame]	2740	xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2741
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	2742	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2743	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2744
				2745	/*
				2746	* If we're removing a directory perform some additional validation.
				2747	*/
				2748	if (is_dir) {
Dave Chinner	54d7b5c	2016-02-09 16:54:58 +1100	[diff] [blame]	2749	ASSERT(VFS_I(ip)->i_nlink >= 2);
				2750	if (VFS_I(ip)->i_nlink != 2) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2751	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2752	goto out_trans_cancel;
				2753	}
				2754	if (!xfs_dir_isempty(ip)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2755	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2756	goto out_trans_cancel;
				2757	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2758
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2759	/* Drop the link from ip's "..". */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2760	error = xfs_droplink(tp, dp);
				2761	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2762	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2763
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2764	/* Drop the "." link from ip to self. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2765	error = xfs_droplink(tp, ip);
				2766	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2767	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2768	} else {
				2769	/*
				2770	* When removing a non-directory we need to log the parent
				2771	* inode here. For a directory this is done implicitly
				2772	* by the xfs_droplink call for the ".." entry.
				2773	*/
				2774	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2775	}
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2776	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2777
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2778	/* Drop the link from dp to ip. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2779	error = xfs_droplink(tp, ip);
				2780	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2781	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2782
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2783	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2784	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2785	ASSERT(error != -ENOENT);
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	2786	goto out_trans_cancel;
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2787	}
				2788
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2789	/*
				2790	* If this is a synchronous mount, make sure that the
				2791	* remove transaction goes to disk before returning to
				2792	* the user.
				2793	*/
				2794	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2795	xfs_trans_set_sync(tp);
				2796
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2797	error = xfs_trans_commit(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2798	if (error)
				2799	goto std_return;
				2800
Christoph Hellwig	2cd2ef6	2014-04-23 07:11:51 +1000	[diff] [blame]	2801	if (is_dir && xfs_inode_is_filestream(ip))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2802	xfs_filestream_deassociate(ip);
				2803
				2804	return 0;
				2805
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2806	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2807	xfs_trans_cancel(tp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2808	std_return:
				2809	return error;
				2810	}
				2811
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2812	/*
				2813	* Enter all inodes for a rename transaction into a sorted array.
				2814	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2815	#define __XFS_SORT_INODES 5
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2816	STATIC void
				2817	xfs_sort_for_rename(
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2818	struct xfs_inode dp1, / in: old (source) directory inode */
				2819	struct xfs_inode dp2, / in: new (target) directory inode */
				2820	struct xfs_inode ip1, / in: inode of old entry */
				2821	struct xfs_inode ip2, / in: inode of new entry */
				2822	struct xfs_inode wip, / in: whiteout inode */
				2823	struct xfs_inode *i_tab,/ out: sorted array of inodes */
				2824	int num_inodes) / in/out: inodes in array */
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2825	{
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2826	int i, j;
				2827
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2828	ASSERT(*num_inodes == __XFS_SORT_INODES);
				2829	memset(i_tab, 0, num_inodes sizeof(struct xfs_inode *));
				2830
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2831	/*
				2832	* i_tab contains a list of pointers to inodes. We initialize
				2833	* the table here & we'll sort it. We will then use it to
				2834	* order the acquisition of the inode locks.
				2835	*
				2836	* Note that the table may contain duplicates. e.g., dp1 == dp2.
				2837	*/
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2838	i = 0;
				2839	i_tab[i++] = dp1;
				2840	i_tab[i++] = dp2;
				2841	i_tab[i++] = ip1;
				2842	if (ip2)
				2843	i_tab[i++] = ip2;
				2844	if (wip)
				2845	i_tab[i++] = wip;
				2846	*num_inodes = i;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2847
				2848	/*
				2849	* Sort the elements via bubble sort. (Remember, there are at
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2850	* most 5 elements to sort, so this is adequate.)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2851	*/
				2852	for (i = 0; i < *num_inodes; i++) {
				2853	for (j = 1; j < *num_inodes; j++) {
				2854	if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
Dave Chinner	95afcf5	2015-03-25 14:03:32 +1100	[diff] [blame]	2855	struct xfs_inode *temp = i_tab[j];
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2856	i_tab[j] = i_tab[j-1];
				2857	i_tab[j-1] = temp;
				2858	}
				2859	}
				2860	}
				2861	}
				2862
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2863	static int
				2864	xfs_finish_rename(
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	2865	struct xfs_trans *tp)
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2866	{
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2867	/*
				2868	* If this is a synchronous mount, make sure that the rename transaction
				2869	* goes to disk before returning to the user.
				2870	*/
				2871	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2872	xfs_trans_set_sync(tp);
				2873
Christoph Hellwig	7039331	2015-06-04 13:48:08 +1000	[diff] [blame]	2874	return xfs_trans_commit(tp);
Dave Chinner	310606b	2015-03-25 14:06:07 +1100	[diff] [blame]	2875	}
				2876
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2877	/*
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2878	* xfs_cross_rename()
				2879	*
Bhaskar Chowdhury	0145225	2021-03-23 16:59:30 -0700	[diff] [blame]	2880	* responsible for handling RENAME_EXCHANGE flag in renameat2() syscall
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2881	*/
				2882	STATIC int
				2883	xfs_cross_rename(
				2884	struct xfs_trans *tp,
				2885	struct xfs_inode *dp1,
				2886	struct xfs_name *name1,
				2887	struct xfs_inode *ip1,
				2888	struct xfs_inode *dp2,
				2889	struct xfs_name *name2,
				2890	struct xfs_inode *ip2,
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2891	int spaceres)
				2892	{
				2893	int error = 0;
				2894	int ip1_flags = 0;
				2895	int ip2_flags = 0;
				2896	int dp2_flags = 0;
				2897
				2898	/* Swap inode number for dirent in first parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2899	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2900	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2901	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2902
				2903	/* Swap inode number for dirent in second parent */
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2904	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2905	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2906	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2907
				2908	/*
				2909	* If we're renaming one or more directories across different parents,
				2910	* update the respective ".." entries (and link counts) to match the new
				2911	* parents.
				2912	*/
				2913	if (dp1 != dp2) {
				2914	dp2_flags = XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2915
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2916	if (S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2917	error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2918	dp1->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2919	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2920	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2921
				2922	/* transfer ip2 ".." reference to dp1 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2923	if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2924	error = xfs_droplink(tp, dp2);
				2925	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2926	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2927	xfs_bumplink(tp, dp1);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2928	}
				2929
				2930	/*
				2931	* Although ip1 isn't changed here, userspace needs
				2932	* to be warned about the change, so that applications
				2933	* relying on it (like backup ones), will properly
				2934	* notify the change
				2935	*/
				2936	ip1_flags \|= XFS_ICHGTIME_CHG;
				2937	ip2_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2938	}
				2939
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2940	if (S_ISDIR(VFS_I(ip1)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2941	error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	2942	dp2->i_ino, spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2943	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2944	goto out_trans_abort;
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2945
				2946	/* transfer ip1 ".." reference to dp2 */
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	2947	if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2948	error = xfs_droplink(tp, dp1);
				2949	if (error)
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2950	goto out_trans_abort;
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	2951	xfs_bumplink(tp, dp2);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2952	}
				2953
				2954	/*
				2955	* Although ip2 isn't changed here, userspace needs
				2956	* to be warned about the change, so that applications
				2957	* relying on it (like backup ones), will properly
				2958	* notify the change
				2959	*/
				2960	ip1_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2961	ip2_flags \|= XFS_ICHGTIME_CHG;
				2962	}
				2963	}
				2964
				2965	if (ip1_flags) {
				2966	xfs_trans_ichgtime(tp, ip1, ip1_flags);
				2967	xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
				2968	}
				2969	if (ip2_flags) {
				2970	xfs_trans_ichgtime(tp, ip2, ip2_flags);
				2971	xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
				2972	}
				2973	if (dp2_flags) {
				2974	xfs_trans_ichgtime(tp, dp2, dp2_flags);
				2975	xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
				2976	}
				2977	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2978	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	2979	return xfs_finish_rename(tp);
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	2980
				2981	out_trans_abort:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	2982	xfs_trans_cancel(tp);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	2983	return error;
				2984	}
				2985
				2986	/*
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2987	* xfs_rename_alloc_whiteout()
				2988	*
Randy Dunlap	b63da6c	2020-08-05 08:49:58 -0700	[diff] [blame]	2989	* Return a referenced, unlinked, unlocked inode that can be used as a
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2990	* whiteout in a rename transaction. We use a tmpfile inode here so that if we
				2991	* crash between allocating the inode and linking it into the rename transaction
				2992	* recovery will free the inode and we won't leak it.
				2993	*/
				2994	static int
				2995	xfs_rename_alloc_whiteout(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	2996	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	2997	struct xfs_inode *dp,
				2998	struct xfs_inode **wip)
				2999	{
				3000	struct xfs_inode *tmpfile;
				3001	int error;
				3002
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3003	error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR \| WHITEOUT_MODE,
				3004	&tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3005	if (error)
				3006	return error;
				3007
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	3008	/*
				3009	* Prepare the tmpfile inode as if it were created through the VFS.
Darrick J. Wong	c4a6bf7	2019-02-13 11:15:17 -0800	[diff] [blame]	3010	* Complete the inode setup and flag it as linkable. nlink is already
				3011	* zero, so we can skip the drop_nlink.
Brian Foster	22419ac	2015-05-29 08:14:55 +1000	[diff] [blame]	3012	*/
Christoph Hellwig	2b3d1d4	2016-04-06 07:48:27 +1000	[diff] [blame]	3013	xfs_setup_iops(tmpfile);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3014	xfs_finish_inode_setup(tmpfile);
				3015	VFS_I(tmpfile)->i_state \|= I_LINKABLE;
				3016
				3017	*wip = tmpfile;
				3018	return 0;
				3019	}
				3020
				3021	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3022	* xfs_rename
				3023	*/
				3024	int
				3025	xfs_rename(
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3026	struct user_namespace *mnt_userns,
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3027	struct xfs_inode *src_dp,
				3028	struct xfs_name *src_name,
				3029	struct xfs_inode *src_ip,
				3030	struct xfs_inode *target_dp,
				3031	struct xfs_name *target_name,
				3032	struct xfs_inode *target_ip,
				3033	unsigned int flags)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3034	{
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3035	struct xfs_mount *mp = src_dp->i_mount;
				3036	struct xfs_trans *tp;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3037	struct xfs_inode wip = NULL; / whiteout inode */
				3038	struct xfs_inode *inodes[__XFS_SORT_INODES];
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	3039	int i;
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3040	int num_inodes = __XFS_SORT_INODES;
Dave Chinner	2b93681	2015-03-25 15:12:30 +1100	[diff] [blame]	3041	bool new_parent = (src_dp != target_dp);
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3042	bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3043	int spaceres;
				3044	int error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3045
				3046	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
				3047
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3048	if ((flags & RENAME_EXCHANGE) && !target_ip)
				3049	return -EINVAL;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3050
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3051	/*
				3052	* If we are doing a whiteout operation, allocate the whiteout inode
				3053	* we will be placing at the target and ensure the type is set
				3054	* appropriately.
				3055	*/
				3056	if (flags & RENAME_WHITEOUT) {
				3057	ASSERT(!(flags & (RENAME_NOREPLACE \| RENAME_EXCHANGE)));
Christoph Hellwig	f736d93	2021-01-21 14:19:58 +0100	[diff] [blame]	3058	error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3059	if (error)
				3060	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3061
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3062	/* setup target dirent info as whiteout */
				3063	src_name->type = XFS_DIR3_FT_CHRDEV;
				3064	}
				3065
				3066	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3067	inodes, &num_inodes);
				3068
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3069	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3070	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3071	if (error == -ENOSPC) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3072	spaceres = 0;
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3073	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
				3074	&tp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3075	}
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3076	if (error)
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3077	goto out_release_wip;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3078
				3079	/*
				3080	* Attach the dquots to the inodes
				3081	*/
				3082	error = xfs_qm_vop_rename_dqattach(inodes);
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3083	if (error)
				3084	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3085
				3086	/*
				3087	* Lock all the participating inodes. Depending upon whether
				3088	* the target_name exists in the target directory, and
				3089	* whether the target directory is the same as the source
				3090	* directory, we can lock from 2 to 4 inodes.
				3091	*/
				3092	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
				3093
				3094	/*
				3095	* Join all the inodes to the transaction. From this point on,
				3096	* we can rely on either trans_commit or trans_cancel to unlock
				3097	* them.
				3098	*/
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3099	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3100	if (new_parent)
Christoph Hellwig	6552321	2016-11-30 14:33:25 +1100	[diff] [blame]	3101	xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3102	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
				3103	if (target_ip)
				3104	xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3105	if (wip)
				3106	xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3107
				3108	/*
				3109	* If we are using project inheritance, we only allow renames
				3110	* into our tree when the project IDs are the same; else the
				3111	* tree quota mechanism would be circumvented.
				3112	*/
Christoph Hellwig	db07349	2021-03-29 11:11:44 -0700	[diff] [blame]	3113	if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
Christoph Hellwig	ceaf603	2021-03-29 11:11:39 -0700	[diff] [blame]	3114	target_dp->i_projid != src_ip->i_projid)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3115	error = -EXDEV;
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3116	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3117	}
				3118
Dave Chinner	eeacd32	2015-03-25 14:08:07 +1100	[diff] [blame]	3119	/* RENAME_EXCHANGE is unique from here on. */
				3120	if (flags & RENAME_EXCHANGE)
				3121	return xfs_cross_rename(tp, src_dp, src_name, src_ip,
				3122	target_dp, target_name, target_ip,
Brian Foster	f16dea5	2018-07-11 22:26:20 -0700	[diff] [blame]	3123	spaceres);
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame]	3124
				3125	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3126	* Check for expected errors before we dirty the transaction
				3127	* so we can return an error without a transaction abort.
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3128	*
				3129	* Extent count overflow check:
				3130	*
				3131	* From the perspective of src_dp, a rename operation is essentially a
				3132	* directory entry remove operation. Hence the only place where we check
				3133	* for extent count overflow for src_dp is in
				3134	* xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
				3135	* -ENOSPC when it detects a possible extent count overflow and in
				3136	* response, the higher layers of directory handling code do the
				3137	* following:
				3138	* 1. Data/Free blocks: XFS lets these blocks linger until a
				3139	* future remove operation removes them.
				3140	* 2. Dabtree blocks: XFS swaps the blocks with the last block in the
				3141	* Leaf space and unmaps the last block.
				3142	*
				3143	* For target_dp, there are two cases depending on whether the
				3144	* destination directory entry exists or not.
				3145	*
				3146	* When destination directory entry does not exist (i.e. target_ip ==
				3147	* NULL), extent count overflow check is performed only when transaction
				3148	* has a non-zero sized space reservation associated with it. With a
				3149	* zero-sized space reservation, XFS allows a rename operation to
				3150	* continue only when the directory has sufficient free space in its
				3151	* data/leaf/free space blocks to hold the new entry.
				3152	*
				3153	* When destination directory entry exists (i.e. target_ip != NULL), all
				3154	* we need to do is change the inode number associated with the already
				3155	* existing entry. Hence there is no need to perform an extent count
				3156	* overflow check.
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3157	*/
				3158	if (target_ip == NULL) {
				3159	/*
				3160	* If there's no space reservation, check the entry will
				3161	* fit before actually inserting it.
				3162	*/
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3163	if (!spaceres) {
				3164	error = xfs_dir_canenter(tp, target_dp, target_name);
				3165	if (error)
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3166	goto out_trans_cancel;
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3167	} else {
				3168	error = xfs_iext_count_may_overflow(target_dp,
				3169	XFS_DATA_FORK,
				3170	XFS_IEXT_DIR_MANIP_CNT(mp));
				3171	if (error)
				3172	goto out_trans_cancel;
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	3173	}
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3174	} else {
				3175	/*
				3176	* If target exists and it's a directory, check that whether
				3177	* it can be destroyed.
				3178	*/
				3179	if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
				3180	(!xfs_dir_isempty(target_ip) \|\|
				3181	(VFS_I(target_ip)->i_nlink > 2))) {
				3182	error = -EEXIST;
				3183	goto out_trans_cancel;
				3184	}
				3185	}
				3186
				3187	/*
Darrick J. Wong	6da1b4b	2021-01-22 16:48:32 -0800	[diff] [blame]	3188	* Lock the AGI buffers we need to handle bumping the nlink of the
				3189	* whiteout inode off the unlinked list and to handle dropping the
				3190	* nlink of the target inode. Per locking order rules, do this in
				3191	* increasing AG order and before directory block allocation tries to
				3192	* grab AGFs because we grab AGIs before AGFs.
				3193	*
				3194	* The (vfs) caller must ensure that if src is a directory then
				3195	* target_ip is either null or an empty directory.
				3196	*/
				3197	for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
				3198	if (inodes[i] == wip \|\|
				3199	(inodes[i] == target_ip &&
				3200	(VFS_I(target_ip)->i_nlink == 1 \|\| src_is_directory))) {
				3201	struct xfs_buf *bp;
				3202	xfs_agnumber_t agno;
				3203
				3204	agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino);
				3205	error = xfs_read_agi(mp, tp, agno, &bp);
				3206	if (error)
				3207	goto out_trans_cancel;
				3208	}
				3209	}
				3210
				3211	/*
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3212	* Directory entry creation below may acquire the AGF. Remove
				3213	* the whiteout from the unlinked list first to preserve correct
				3214	* AGI/AGF locking order. This dirties the transaction so failures
				3215	* after this point will abort and log recovery will clean up the
				3216	* mess.
				3217	*
				3218	* For whiteouts, we need to bump the link count on the whiteout
				3219	* inode. After this point, we have a real link, clear the tmpfile
				3220	* state flag from the inode so it doesn't accidentally get misused
				3221	* in future.
				3222	*/
				3223	if (wip) {
				3224	ASSERT(VFS_I(wip)->i_nlink == 0);
				3225	error = xfs_iunlink_remove(tp, wip);
				3226	if (error)
				3227	goto out_trans_cancel;
				3228
				3229	xfs_bumplink(tp, wip);
kaixuxia	bc56ad8	2019-09-03 21:06:50 -0700	[diff] [blame]	3230	VFS_I(wip)->i_state &= ~I_LINKABLE;
				3231	}
				3232
				3233	/*
				3234	* Set up the target.
				3235	*/
				3236	if (target_ip == NULL) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3237	/*
				3238	* If target does not exist and the rename crosses
				3239	* directories, adjust the target directory link count
				3240	* to account for the ".." reference from the new entry.
				3241	*/
				3242	error = xfs_dir_createname(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3243	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3244	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3245	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3246
				3247	xfs_trans_ichgtime(tp, target_dp,
				3248	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3249
				3250	if (new_parent && src_is_directory) {
Eric Sandeen	9108326	2019-05-01 20:26:30 -0700	[diff] [blame]	3251	xfs_bumplink(tp, target_dp);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3252	}
				3253	} else { /* target_ip != NULL */
				3254	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3255	* Link the source inode under the target name.
				3256	* If the source inode is a directory and we are moving
				3257	* it across directories, its ".." entry will be
				3258	* inconsistent until we replace that down below.
				3259	*
				3260	* In case there is already an entry with the same
				3261	* name at the destination directory, remove it first.
				3262	*/
				3263	error = xfs_dir_replace(tp, target_dp, target_name,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3264	src_ip->i_ino, spaceres);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3265	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3266	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3267
				3268	xfs_trans_ichgtime(tp, target_dp,
				3269	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3270
				3271	/*
				3272	* Decrement the link count on the target since the target
				3273	* dir no longer points to it.
				3274	*/
				3275	error = xfs_droplink(tp, target_ip);
				3276	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3277	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3278
				3279	if (src_is_directory) {
				3280	/*
				3281	* Drop the link from the old "." entry.
				3282	*/
				3283	error = xfs_droplink(tp, target_ip);
				3284	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3285	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3286	}
				3287	} /* target_ip != NULL */
				3288
				3289	/*
				3290	* Remove the source.
				3291	*/
				3292	if (new_parent && src_is_directory) {
				3293	/*
				3294	* Rewrite the ".." entry to point to the new
				3295	* directory.
				3296	*/
				3297	error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3298	target_dp->i_ino, spaceres);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3299	ASSERT(error != -EEXIST);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3300	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3301	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3302	}
				3303
				3304	/*
				3305	* We always want to hit the ctime on the source inode.
				3306	*
				3307	* This isn't strictly required by the standards since the source
				3308	* inode isn't really being changed, but old unix file systems did
				3309	* it and some incremental backup programs won't work without it.
				3310	*/
				3311	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
				3312	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
				3313
				3314	/*
				3315	* Adjust the link count on src_dp. This is necessary when
				3316	* renaming a directory, either within one parent when
				3317	* the target existed, or across two parent directories.
				3318	*/
				3319	if (src_is_directory && (new_parent \|\| target_ip != NULL)) {
				3320
				3321	/*
				3322	* Decrement link count on src_directory since the
				3323	* entry that's moved no longer points to it.
				3324	*/
				3325	error = xfs_droplink(tp, src_dp);
				3326	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3327	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3328	}
				3329
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3330	/*
				3331	* For whiteouts, we only need to update the source dirent with the
				3332	* inode number of the whiteout inode rather than removing it
				3333	* altogether.
				3334	*/
				3335	if (wip) {
				3336	error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3337	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3338	} else {
				3339	/*
				3340	* NOTE: We don't need to check for extent count overflow here
				3341	* because the dir remove name code will leave the dir block in
				3342	* place if the extent count would overflow.
				3343	*/
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3344	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
Brian Foster	381eee6	2018-07-11 22:26:21 -0700	[diff] [blame]	3345	spaceres);
Chandan Babu R	02092a2	2021-01-22 16:48:13 -0800	[diff] [blame]	3346	}
				3347
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3348	if (error)
Brian Foster	c8eac49	2018-07-24 13:43:13 -0700	[diff] [blame]	3349	goto out_trans_cancel;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3350
				3351	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3352	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
				3353	if (new_parent)
				3354	xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
				3355
Brian Foster	c9cfdb3	2018-07-11 22:26:08 -0700	[diff] [blame]	3356	error = xfs_finish_rename(tp);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3357	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3358	xfs_irele(wip);
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3359	return error;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3360
Dave Chinner	445883e	2015-03-25 14:05:43 +1100	[diff] [blame]	3361	out_trans_cancel:
Christoph Hellwig	4906e21	2015-06-04 13:47:56 +1000	[diff] [blame]	3362	xfs_trans_cancel(tp);
Christoph Hellwig	253f491	2016-04-06 09:19:55 +1000	[diff] [blame]	3363	out_release_wip:
Dave Chinner	7dcf5c3	2015-03-25 14:08:08 +1100	[diff] [blame]	3364	if (wip)
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3365	xfs_irele(wip);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3366	return error;
				3367	}
				3368
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3369	static int
				3370	xfs_iflush(
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3371	struct xfs_inode *ip,
				3372	struct xfs_buf *bp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3373	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3374	struct xfs_inode_log_item *iip = ip->i_itemp;
				3375	struct xfs_dinode *dip;
				3376	struct xfs_mount *mp = ip->i_mount;
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3377	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3378
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3379	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3380	ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3381	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3382	ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Dave Chinner	90c60e1	2020-06-29 14:49:19 -0700	[diff] [blame]	3383	ASSERT(iip->ili_item.li_buf == bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3384
Christoph Hellwig	88ee2df	2015-06-22 09:44:29 +1000	[diff] [blame]	3385	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3386
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3387	/*
				3388	* We don't flush the inode if any of the following checks fail, but we
				3389	* do still update the log item and attach to the backing buffer as if
				3390	* the flush happened. This is a formality to facilitate predictable
				3391	* error handling as the caller will shutdown and fail the buffer.
				3392	*/
				3393	error = -EFSCORRUPTED;
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	3394	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3395	mp, XFS_ERRTAG_IFLUSH_1)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3396	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3397	"%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3398	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3399	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3400	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3401	if (S_ISREG(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3402	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3403	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3404	ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3405	mp, XFS_ERRTAG_IFLUSH_3)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3406	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3407	"%s: Bad regular inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3408	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3409	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3410	}
Dave Chinner	c19b3b05	2016-02-09 16:54:58 +1100	[diff] [blame]	3411	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3412	if (XFS_TEST_ERROR(
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3413	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
				3414	ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
				3415	ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3416	mp, XFS_ERRTAG_IFLUSH_4)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3417	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3418	"%s: Bad directory inode %Lu, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3419	__func__, ip->i_ino, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3420	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3421	}
				3422	}
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3423	if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) >
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3424	ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3425	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3426	"%s: detected corrupt incore inode %Lu, "
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3427	"total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3428	__func__, ip->i_ino,
Christoph Hellwig	daf8396	2020-05-18 10:27:22 -0700	[diff] [blame]	3429	ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp),
Christoph Hellwig	6e73a54	2021-03-29 11:11:40 -0700	[diff] [blame]	3430	ip->i_nblocks, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3431	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3432	}
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3433	if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
Darrick J. Wong	9e24cfd	2017-06-20 17:54:47 -0700	[diff] [blame]	3434	mp, XFS_ERRTAG_IFLUSH_6)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3435	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
Darrick J. Wong	c969004	2018-01-09 12:02:55 -0800	[diff] [blame]	3436	"%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
Christoph Hellwig	7821ea3	2021-03-29 11:11:44 -0700	[diff] [blame]	3437	__func__, ip->i_ino, ip->i_forkoff, ip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3438	goto flush_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3439	}
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3440
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3441	/*
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3442	* Inode item log recovery for v2 inodes are dependent on the flushiter
				3443	* count for correct sequencing. We bump the flush iteration count so
				3444	* we can detect flushes which postdate a log record during recovery.
				3445	* This is redundant as we now log every change and hence this can't
				3446	* happen but we need to still do it to ensure backwards compatibility
				3447	* with old kernels that predate logging all inode changes.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3448	*/
Christoph Hellwig	6471e9c	2020-03-18 08:15:11 -0700	[diff] [blame]	3449	if (!xfs_sb_version_has_v3inode(&mp->m_sb))
Christoph Hellwig	965e0a1	2021-03-29 11:11:42 -0700	[diff] [blame]	3450	ip->i_flushiter++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3451
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3452	/*
				3453	* If there are inline format data / attr forks attached to this inode,
				3454	* make sure they are not corrupt.
				3455	*/
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3456	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3457	xfs_ifork_verify_local_data(ip))
				3458	goto flush_out;
Christoph Hellwig	f7e67b2	2020-05-18 10:28:05 -0700	[diff] [blame]	3459	if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL &&
Christoph Hellwig	0f45a1b	2020-05-14 14:01:31 -0700	[diff] [blame]	3460	xfs_ifork_verify_local_attr(ip))
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3461	goto flush_out;
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3462
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3463	/*
Dave Chinner	3987848	2016-02-09 16:54:58 +1100	[diff] [blame]	3464	* Copy the dirty parts of the inode into the on-disk inode. We always
				3465	* copy out the core of the inode, because if the inode is dirty at all
				3466	* the core must be.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3467	*/
Dave Chinner	93f958f	2016-02-09 16:54:58 +1100	[diff] [blame]	3468	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3469
				3470	/* Wrap, we never let the log put out DI_MAX_FLUSH */
Christoph Hellwig	ee7b83f	2021-03-29 11:11:43 -0700	[diff] [blame]	3471	if (!xfs_sb_version_has_v3inode(&mp->m_sb)) {
				3472	if (ip->i_flushiter == DI_MAX_FLUSH)
				3473	ip->i_flushiter = 0;
				3474	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3475
Darrick J. Wong	005c5db	2017-03-28 14:51:10 -0700	[diff] [blame]	3476	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
				3477	if (XFS_IFORK_Q(ip))
				3478	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3479
				3480	/*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3481	* We've recorded everything logged in the inode, so we'd like to clear
				3482	* the ili_fields bits so we don't log and flush things unnecessarily.
				3483	* However, we can't stop logging all this information until the data
				3484	* we've copied into the disk buffer is written to disk. If we did we
				3485	* might overwrite the copy of the inode in the log with all the data
				3486	* after re-logging only part of it, and in the face of a crash we
				3487	* wouldn't have all the data we need to recover.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3488	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3489	* What we do is move the bits to the ili_last_fields field. When
				3490	* logging the inode, these bits are moved back to the ili_fields field.
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3491	* In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
				3492	* we know that the information those bits represent is permanently on
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3493	* disk. As long as the flush completes before the inode is logged
				3494	* again, then both ili_fields and ili_last_fields will be cleared.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3495	*/
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3496	error = 0;
				3497	flush_out:
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3498	spin_lock(&iip->ili_lock);
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3499	iip->ili_last_fields = iip->ili_fields;
				3500	iip->ili_fields = 0;
Dave Chinner	fc0561c	2015-11-03 13:14:59 +1100	[diff] [blame]	3501	iip->ili_fsync_fields = 0;
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3502	spin_unlock(&iip->ili_lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3503
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3504	/*
				3505	* Store the current LSN of the inode so that we can tell whether the
Christoph Hellwig	664ffb8	2020-09-01 10:55:29 -0700	[diff] [blame]	3506	* item has moved in the AIL from xfs_buf_inode_iodone().
Dave Chinner	1319ebe	2020-06-29 14:48:46 -0700	[diff] [blame]	3507	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3508	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				3509	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3510
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3511	/* generate the checksum. */
				3512	xfs_dinode_calc_crc(mp, dip);
Brian Foster	f201929	2020-05-06 13:25:20 -0700	[diff] [blame]	3513	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3514	}
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3515
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3516	/*
				3517	* Non-blocking flush of dirty inode metadata into the backing buffer.
				3518	*
				3519	* The caller must have a reference to the inode and hold the cluster buffer
				3520	* locked. The function will walk across all the inodes on the cluster buffer it
				3521	* can find and lock without blocking, and flush them to the cluster buffer.
				3522	*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3523	* On successful flushing of at least one inode, the caller must write out the
				3524	* buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
				3525	* the caller needs to release the buffer. On failure, the filesystem will be
				3526	* shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
				3527	* will be returned.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3528	*/
				3529	int
				3530	xfs_iflush_cluster(
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3531	struct xfs_buf *bp)
				3532	{
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3533	struct xfs_mount *mp = bp->b_mount;
				3534	struct xfs_log_item lip, n;
				3535	struct xfs_inode *ip;
				3536	struct xfs_inode_log_item *iip;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3537	int clcount = 0;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3538	int error = 0;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3539
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3540	/*
				3541	* We must use the safe variant here as on shutdown xfs_iflush_abort()
				3542	* can remove itself from the list.
				3543	*/
				3544	list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
				3545	iip = (struct xfs_inode_log_item *)lip;
				3546	ip = iip->ili_inode;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3547
				3548	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3549	* Quick and dirty check to avoid locks if possible.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3550	*/
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3551	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING))
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3552	continue;
				3553	if (xfs_ipincount(ip))
				3554	continue;
				3555
				3556	/*
				3557	* The inode is still attached to the buffer, which means it is
				3558	* dirty but reclaim might try to grab it. Check carefully for
				3559	* that, and grab the ilock while still holding the i_flags_lock
				3560	* to guarantee reclaim will not be able to reclaim this inode
				3561	* once we drop the i_flags_lock.
				3562	*/
				3563	spin_lock(&ip->i_flags_lock);
				3564	ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3565	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING)) {
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3566	spin_unlock(&ip->i_flags_lock);
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3567	continue;
				3568	}
				3569
				3570	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3571	* ILOCK will pin the inode against reclaim and prevent
				3572	* concurrent transactions modifying the inode while we are
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3573	* flushing the inode. If we get the lock, set the flushing
				3574	* state before we drop the i_flags_lock.
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3575	*/
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3576	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
				3577	spin_unlock(&ip->i_flags_lock);
				3578	continue;
				3579	}
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3580	__xfs_iflags_set(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3581	spin_unlock(&ip->i_flags_lock);
				3582
				3583	/*
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3584	* Abort flushing this inode if we are shut down because the
				3585	* inode may not currently be in the AIL. This can occur when
				3586	* log I/O failure unpins the inode without inserting into the
				3587	* AIL, leaving a dirty/unpinned inode attached to the buffer
				3588	* that otherwise looks like it should be flushed.
				3589	*/
				3590	if (XFS_FORCED_SHUTDOWN(mp)) {
				3591	xfs_iunpin_wait(ip);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3592	xfs_iflush_abort(ip);
				3593	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3594	error = -EIO;
				3595	continue;
				3596	}
				3597
				3598	/* don't block waiting on a log force to unpin dirty inodes */
				3599	if (xfs_ipincount(ip)) {
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3600	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3601	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3602	continue;
				3603	}
				3604
				3605	if (!xfs_inode_clean(ip))
				3606	error = xfs_iflush(ip, bp);
				3607	else
Dave Chinner	718ecc5	2020-08-17 16:41:01 -0700	[diff] [blame]	3608	xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3609	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3610	if (error)
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3611	break;
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3612	clcount++;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3613	}
				3614
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3615	if (error) {
				3616	bp->b_flags \|= XBF_ASYNC;
				3617	xfs_buf_ioend_fail(bp);
				3618	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3619	return error;
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3620	}
Dave Chinner	5717ea4	2020-06-29 14:49:20 -0700	[diff] [blame]	3621
				3622	if (!clcount)
				3623	return -EAGAIN;
				3624
				3625	XFS_STATS_INC(mp, xs_icluster_flushcnt);
				3626	XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
				3627	return 0;
				3628
Dave Chinner	e6187b3	2020-06-29 14:49:19 -0700	[diff] [blame]	3629	}
				3630
Darrick J. Wong	44a8736	2018-07-25 12:52:32 -0700	[diff] [blame]	3631	/* Release an inode. */
				3632	void
				3633	xfs_irele(
				3634	struct xfs_inode *ip)
				3635	{
				3636	trace_xfs_irele(ip, _RET_IP_);
				3637	iput(VFS_I(ip));
				3638	}
Christoph Hellwig	54fbdd1	2020-04-03 11:45:37 -0700	[diff] [blame]	3639
				3640	/*
				3641	* Ensure all commited transactions touching the inode are written to the log.
				3642	*/
				3643	int
				3644	xfs_log_force_inode(
				3645	struct xfs_inode *ip)
				3646	{
				3647	xfs_lsn_t lsn = 0;
				3648
				3649	xfs_ilock(ip, XFS_ILOCK_SHARED);
				3650	if (xfs_ipincount(ip))
				3651	lsn = ip->i_itemp->ili_last_lsn;
				3652	xfs_iunlock(ip, XFS_ILOCK_SHARED);
				3653
				3654	if (!lsn)
				3655	return 0;
				3656	return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
				3657	}
Darrick J. Wong	e2aaee9	2020-06-29 14:47:20 -0700	[diff] [blame]	3658
				3659	/*
				3660	* Grab the exclusive iolock for a data copy from src to dest, making sure to
				3661	* abide vfs locking order (lowest pointer value goes first) and breaking the
				3662	* layout leases before proceeding. The loop is needed because we cannot call
				3663	* the blocking break_layout() with the iolocks held, and therefore have to
				3664	* back out both locks.
				3665	*/
				3666	static int
				3667	xfs_iolock_two_inodes_and_break_layout(
				3668	struct inode *src,
				3669	struct inode *dest)
				3670	{
				3671	int error;
				3672
				3673	if (src > dest)
				3674	swap(src, dest);
				3675
				3676	retry:
				3677	/* Wait to break both inodes' layouts before we start locking. */
				3678	error = break_layout(src, true);
				3679	if (error)
				3680	return error;
				3681	if (src != dest) {
				3682	error = break_layout(dest, true);
				3683	if (error)
				3684	return error;
				3685	}
				3686
				3687	/* Lock one inode and make sure nobody got in and leased it. */
				3688	inode_lock(src);
				3689	error = break_layout(src, false);
				3690	if (error) {
				3691	inode_unlock(src);
				3692	if (error == -EWOULDBLOCK)
				3693	goto retry;
				3694	return error;
				3695	}
				3696
				3697	if (src == dest)
				3698	return 0;
				3699
				3700	/* Lock the other inode and make sure nobody got in and leased it. */
				3701	inode_lock_nested(dest, I_MUTEX_NONDIR2);
				3702	error = break_layout(dest, false);
				3703	if (error) {
				3704	inode_unlock(src);
				3705	inode_unlock(dest);
				3706	if (error == -EWOULDBLOCK)
				3707	goto retry;
				3708	return error;
				3709	}
				3710
				3711	return 0;
				3712	}
				3713
				3714	/*
				3715	* Lock two inodes so that userspace cannot initiate I/O via file syscalls or
				3716	* mmap activity.
				3717	*/
				3718	int
				3719	xfs_ilock2_io_mmap(
				3720	struct xfs_inode *ip1,
				3721	struct xfs_inode *ip2)
				3722	{
				3723	int ret;
				3724
				3725	ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
				3726	if (ret)
				3727	return ret;
				3728	if (ip1 == ip2)
				3729	xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
				3730	else
				3731	xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
				3732	ip2, XFS_MMAPLOCK_EXCL);
				3733	return 0;
				3734	}
				3735
				3736	/* Unlock both inodes to allow IO and mmap activity. */
				3737	void
				3738	xfs_iunlock2_io_mmap(
				3739	struct xfs_inode *ip1,
				3740	struct xfs_inode *ip2)
				3741	{
				3742	bool same_inode = (ip1 == ip2);
				3743
				3744	xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
				3745	if (!same_inode)
				3746	xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
				3747	inode_unlock(VFS_I(ip2));
				3748	if (!same_inode)
				3749	inode_unlock(VFS_I(ip1));
				3750	}