Blame - fs/xfs/xfs_inode.c - SHIFTPHONES/mainline/linux

blob: 1aaef689794d6a7b5aa75725a74839d3f2aa97fd [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	2	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Robert P. J. Day	40ebd81	2007-11-23 16:30:51 +1100	[diff] [blame]	18	#include <linux/log2.h>
				19
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_fs.h"
Dave Chinner	6ca1c90	2013-08-12 20:49:26 +1000	[diff] [blame]	22	#include "xfs_format.h"
Dave Chinner	70a9883	2013-10-23 10:36:05 +1100	[diff] [blame^]	23	#include "xfs_shared.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	25	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	26	#include "xfs_trans.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	27	#include "xfs_trans_space.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include "xfs_trans_priv.h"
				29	#include "xfs_sb.h"
				30	#include "xfs_ag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31	#include "xfs_mount.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	32	#include "xfs_da_btree.h"
				33	#include "xfs_dir2_format.h"
				34	#include "xfs_dir2.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35	#include "xfs_bmap_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	36	#include "xfs_alloc_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	#include "xfs_ialloc_btree.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	38	#include "xfs_attr_sf.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	39	#include "xfs_attr.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40	#include "xfs_dinode.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include "xfs_inode.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	42	#include "xfs_buf_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	43	#include "xfs_inode_item.h"
				44	#include "xfs_btree.h"
				45	#include "xfs_alloc.h"
				46	#include "xfs_ialloc.h"
				47	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame]	48	#include "xfs_bmap_util.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	49	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	50	#include "xfs_quota.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	51	#include "xfs_filestream.h"
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	52	#include "xfs_cksum.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	53	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	54	#include "xfs_icache.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	55	#include "xfs_symlink.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	56
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	57	kmem_zone_t *xfs_inode_zone;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58
				59	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	60	* Used in xfs_itruncate_extents(). This is the maximum number of extents
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	61	* freed from a file in a single transaction.
				62	*/
				63	#define XFS_ITRUNC_MAX_EXTENTS 2
				64
				65	STATIC int xfs_iflush_int(xfs_inode_t , xfs_buf_t );
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	66
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	67	/*
				68	* helper function to extract extent size hint from inode
				69	*/
				70	xfs_extlen_t
				71	xfs_get_extsz_hint(
				72	struct xfs_inode *ip)
				73	{
				74	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
				75	return ip->i_d.di_extsize;
				76	if (XFS_IS_REALTIME_INODE(ip))
				77	return ip->i_mount->m_sb.sb_rextsize;
				78	return 0;
				79	}
				80
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	81	/*
				82	* This is a wrapper routine around the xfs_ilock() routine used to centralize
				83	* some grungy code. It is used in places that wish to lock the inode solely
				84	* for reading the extents. The reason these places can't just call
				85	* xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the
				86	* extents from disk for a file in b-tree format. If the inode is in b-tree
				87	* format, then we need to lock the inode exclusively until the extents are read
				88	* in. Locking it exclusively all the time would limit our parallelism
				89	* unnecessarily, though. What we do instead is check to see if the extents
				90	* have been read in yet, and only lock the inode exclusively if they have not.
				91	*
				92	* The function returns a value which should be given to the corresponding
				93	* xfs_iunlock_map_shared(). This value is the mode in which the lock was
				94	* actually taken.
				95	*/
				96	uint
				97	xfs_ilock_map_shared(
				98	xfs_inode_t *ip)
				99	{
				100	uint lock_mode;
				101
				102	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
				103	((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
				104	lock_mode = XFS_ILOCK_EXCL;
				105	} else {
				106	lock_mode = XFS_ILOCK_SHARED;
				107	}
				108
				109	xfs_ilock(ip, lock_mode);
				110
				111	return lock_mode;
				112	}
				113
				114	/*
				115	* This is simply the unlock routine to go with xfs_ilock_map_shared().
				116	* All it does is call xfs_iunlock() with the given lock_mode.
				117	*/
				118	void
				119	xfs_iunlock_map_shared(
				120	xfs_inode_t *ip,
				121	unsigned int lock_mode)
				122	{
				123	xfs_iunlock(ip, lock_mode);
				124	}
				125
				126	/*
				127	* The xfs inode contains 2 locks: a multi-reader lock called the
				128	* i_iolock and a multi-reader lock called the i_lock. This routine
				129	* allows either or both of the locks to be obtained.
				130	*
				131	* The 2 locks should always be ordered so that the IO lock is
				132	* obtained first in order to prevent deadlock.
				133	*
				134	* ip -- the inode being locked
				135	* lock_flags -- this parameter indicates the inode's locks
				136	* to be locked. It can be:
				137	* XFS_IOLOCK_SHARED,
				138	* XFS_IOLOCK_EXCL,
				139	* XFS_ILOCK_SHARED,
				140	* XFS_ILOCK_EXCL,
				141	* XFS_IOLOCK_SHARED \| XFS_ILOCK_SHARED,
				142	* XFS_IOLOCK_SHARED \| XFS_ILOCK_EXCL,
				143	* XFS_IOLOCK_EXCL \| XFS_ILOCK_SHARED,
				144	* XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL
				145	*/
				146	void
				147	xfs_ilock(
				148	xfs_inode_t *ip,
				149	uint lock_flags)
				150	{
				151	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
				152
				153	/*
				154	* You can't set both SHARED and EXCL for the same lock,
				155	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				156	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				157	*/
				158	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				159	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				160	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				161	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				162	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				163
				164	if (lock_flags & XFS_IOLOCK_EXCL)
				165	mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
				166	else if (lock_flags & XFS_IOLOCK_SHARED)
				167	mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
				168
				169	if (lock_flags & XFS_ILOCK_EXCL)
				170	mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				171	else if (lock_flags & XFS_ILOCK_SHARED)
				172	mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				173	}
				174
				175	/*
				176	* This is just like xfs_ilock(), except that the caller
				177	* is guaranteed not to sleep. It returns 1 if it gets
				178	* the requested locks and 0 otherwise. If the IO lock is
				179	* obtained but the inode lock cannot be, then the IO lock
				180	* is dropped before returning.
				181	*
				182	* ip -- the inode being locked
				183	* lock_flags -- this parameter indicates the inode's locks to be
				184	* to be locked. See the comment for xfs_ilock() for a list
				185	* of valid values.
				186	*/
				187	int
				188	xfs_ilock_nowait(
				189	xfs_inode_t *ip,
				190	uint lock_flags)
				191	{
				192	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
				193
				194	/*
				195	* You can't set both SHARED and EXCL for the same lock,
				196	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				197	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				198	*/
				199	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				200	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				201	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				202	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				203	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				204
				205	if (lock_flags & XFS_IOLOCK_EXCL) {
				206	if (!mrtryupdate(&ip->i_iolock))
				207	goto out;
				208	} else if (lock_flags & XFS_IOLOCK_SHARED) {
				209	if (!mrtryaccess(&ip->i_iolock))
				210	goto out;
				211	}
				212	if (lock_flags & XFS_ILOCK_EXCL) {
				213	if (!mrtryupdate(&ip->i_lock))
				214	goto out_undo_iolock;
				215	} else if (lock_flags & XFS_ILOCK_SHARED) {
				216	if (!mrtryaccess(&ip->i_lock))
				217	goto out_undo_iolock;
				218	}
				219	return 1;
				220
				221	out_undo_iolock:
				222	if (lock_flags & XFS_IOLOCK_EXCL)
				223	mrunlock_excl(&ip->i_iolock);
				224	else if (lock_flags & XFS_IOLOCK_SHARED)
				225	mrunlock_shared(&ip->i_iolock);
				226	out:
				227	return 0;
				228	}
				229
				230	/*
				231	* xfs_iunlock() is used to drop the inode locks acquired with
				232	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
				233	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
				234	* that we know which locks to drop.
				235	*
				236	* ip -- the inode being unlocked
				237	* lock_flags -- this parameter indicates the inode's locks to be
				238	* to be unlocked. See the comment for xfs_ilock() for a list
				239	* of valid values for this parameter.
				240	*
				241	*/
				242	void
				243	xfs_iunlock(
				244	xfs_inode_t *ip,
				245	uint lock_flags)
				246	{
				247	/*
				248	* You can't set both SHARED and EXCL for the same lock,
				249	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				250	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				251	*/
				252	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				253	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				254	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				255	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				256	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				257	ASSERT(lock_flags != 0);
				258
				259	if (lock_flags & XFS_IOLOCK_EXCL)
				260	mrunlock_excl(&ip->i_iolock);
				261	else if (lock_flags & XFS_IOLOCK_SHARED)
				262	mrunlock_shared(&ip->i_iolock);
				263
				264	if (lock_flags & XFS_ILOCK_EXCL)
				265	mrunlock_excl(&ip->i_lock);
				266	else if (lock_flags & XFS_ILOCK_SHARED)
				267	mrunlock_shared(&ip->i_lock);
				268
				269	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
				270	}
				271
				272	/*
				273	* give up write locks. the i/o lock cannot be held nested
				274	* if it is being demoted.
				275	*/
				276	void
				277	xfs_ilock_demote(
				278	xfs_inode_t *ip,
				279	uint lock_flags)
				280	{
				281	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_ILOCK_EXCL));
				282	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL\|XFS_ILOCK_EXCL)) == 0);
				283
				284	if (lock_flags & XFS_ILOCK_EXCL)
				285	mrdemote(&ip->i_lock);
				286	if (lock_flags & XFS_IOLOCK_EXCL)
				287	mrdemote(&ip->i_iolock);
				288
				289	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
				290	}
				291
Dave Chinner	742ae1e	2013-04-30 21:39:34 +1000	[diff] [blame]	292	#if defined(DEBUG) \|\| defined(XFS_WARN)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	293	int
				294	xfs_isilocked(
				295	xfs_inode_t *ip,
				296	uint lock_flags)
				297	{
				298	if (lock_flags & (XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED)) {
				299	if (!(lock_flags & XFS_ILOCK_SHARED))
				300	return !!ip->i_lock.mr_writer;
				301	return rwsem_is_locked(&ip->i_lock.mr_lock);
				302	}
				303
				304	if (lock_flags & (XFS_IOLOCK_EXCL\|XFS_IOLOCK_SHARED)) {
				305	if (!(lock_flags & XFS_IOLOCK_SHARED))
				306	return !!ip->i_iolock.mr_writer;
				307	return rwsem_is_locked(&ip->i_iolock.mr_lock);
				308	}
				309
				310	ASSERT(0);
				311	return 0;
				312	}
				313	#endif
				314
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	315	#ifdef DEBUG
				316	int xfs_locked_n;
				317	int xfs_small_retries;
				318	int xfs_middle_retries;
				319	int xfs_lots_retries;
				320	int xfs_lock_delays;
				321	#endif
				322
				323	/*
				324	* Bump the subclass so xfs_lock_inodes() acquires each lock with
				325	* a different value
				326	*/
				327	static inline int
				328	xfs_lock_inumorder(int lock_mode, int subclass)
				329	{
				330	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL))
				331	lock_mode \|= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
				332	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL))
				333	lock_mode \|= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
				334
				335	return lock_mode;
				336	}
				337
				338	/*
				339	* The following routine will lock n inodes in exclusive mode.
				340	* We assume the caller calls us with the inodes in i_ino order.
				341	*
				342	* We need to detect deadlock where an inode that we lock
				343	* is in the AIL and we start waiting for another inode that is locked
				344	* by a thread in a long running transaction (such as truncate). This can
				345	* result in deadlock since the long running trans might need to wait
				346	* for the inode we just locked in order to push the tail and free space
				347	* in the log.
				348	*/
				349	void
				350	xfs_lock_inodes(
				351	xfs_inode_t **ips,
				352	int inodes,
				353	uint lock_mode)
				354	{
				355	int attempts = 0, i, j, try_lock;
				356	xfs_log_item_t *lp;
				357
				358	ASSERT(ips && (inodes >= 2)); /* we need at least two */
				359
				360	try_lock = 0;
				361	i = 0;
				362
				363	again:
				364	for (; i < inodes; i++) {
				365	ASSERT(ips[i]);
				366
				367	if (i && (ips[i] == ips[i-1])) /* Already locked */
				368	continue;
				369
				370	/*
				371	* If try_lock is not set yet, make sure all locked inodes
				372	* are not in the AIL.
				373	* If any are, set try_lock to be used later.
				374	*/
				375
				376	if (!try_lock) {
				377	for (j = (i - 1); j >= 0 && !try_lock; j--) {
				378	lp = (xfs_log_item_t *)ips[j]->i_itemp;
				379	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				380	try_lock++;
				381	}
				382	}
				383	}
				384
				385	/*
				386	* If any of the previous locks we have locked is in the AIL,
				387	* we must TRY to get the second and subsequent locks. If
				388	* we can't get any, we must release all we have
				389	* and try again.
				390	*/
				391
				392	if (try_lock) {
				393	/* try_lock must be 0 if i is 0. */
				394	/*
				395	* try_lock means we have an inode locked
				396	* that is in the AIL.
				397	*/
				398	ASSERT(i != 0);
				399	if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
				400	attempts++;
				401
				402	/*
				403	* Unlock all previous guys and try again.
				404	* xfs_iunlock will try to push the tail
				405	* if the inode is in the AIL.
				406	*/
				407
				408	for(j = i - 1; j >= 0; j--) {
				409
				410	/*
				411	* Check to see if we've already
				412	* unlocked this one.
				413	* Not the first one going back,
				414	* and the inode ptr is the same.
				415	*/
				416	if ((j != (i - 1)) && ips[j] ==
				417	ips[j+1])
				418	continue;
				419
				420	xfs_iunlock(ips[j], lock_mode);
				421	}
				422
				423	if ((attempts % 5) == 0) {
				424	delay(1); /* Don't just spin the CPU */
				425	#ifdef DEBUG
				426	xfs_lock_delays++;
				427	#endif
				428	}
				429	i = 0;
				430	try_lock = 0;
				431	goto again;
				432	}
				433	} else {
				434	xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
				435	}
				436	}
				437
				438	#ifdef DEBUG
				439	if (attempts) {
				440	if (attempts < 5) xfs_small_retries++;
				441	else if (attempts < 100) xfs_middle_retries++;
				442	else xfs_lots_retries++;
				443	} else {
				444	xfs_locked_n++;
				445	}
				446	#endif
				447	}
				448
				449	/*
				450	* xfs_lock_two_inodes() can only be used to lock one type of lock
				451	* at a time - the iolock or the ilock, but not both at once. If
				452	* we lock both at once, lockdep will report false positives saying
				453	* we have violated locking orders.
				454	*/
				455	void
				456	xfs_lock_two_inodes(
				457	xfs_inode_t *ip0,
				458	xfs_inode_t *ip1,
				459	uint lock_mode)
				460	{
				461	xfs_inode_t *temp;
				462	int attempts = 0;
				463	xfs_log_item_t *lp;
				464
				465	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL))
				466	ASSERT((lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) == 0);
				467	ASSERT(ip0->i_ino != ip1->i_ino);
				468
				469	if (ip0->i_ino > ip1->i_ino) {
				470	temp = ip0;
				471	ip0 = ip1;
				472	ip1 = temp;
				473	}
				474
				475	again:
				476	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
				477
				478	/*
				479	* If the first lock we have locked is in the AIL, we must TRY to get
				480	* the second lock. If we can't get it, we must release the first one
				481	* and try again.
				482	*/
				483	lp = (xfs_log_item_t *)ip0->i_itemp;
				484	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				485	if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
				486	xfs_iunlock(ip0, lock_mode);
				487	if ((++attempts % 5) == 0)
				488	delay(1); /* Don't just spin the CPU */
				489	goto again;
				490	}
				491	} else {
				492	xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
				493	}
				494	}
				495
				496
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	497	void
				498	__xfs_iflock(
				499	struct xfs_inode *ip)
				500	{
				501	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
				502	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
				503
				504	do {
				505	prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
				506	if (xfs_isiflocked(ip))
				507	io_schedule();
				508	} while (!xfs_iflock_nowait(ip));
				509
				510	finish_wait(wq, &wait.wait);
				511	}
				512
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	513	STATIC uint
				514	_xfs_dic2xflags(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	515	__uint16_t di_flags)
				516	{
				517	uint flags = 0;
				518
				519	if (di_flags & XFS_DIFLAG_ANY) {
				520	if (di_flags & XFS_DIFLAG_REALTIME)
				521	flags \|= XFS_XFLAG_REALTIME;
				522	if (di_flags & XFS_DIFLAG_PREALLOC)
				523	flags \|= XFS_XFLAG_PREALLOC;
				524	if (di_flags & XFS_DIFLAG_IMMUTABLE)
				525	flags \|= XFS_XFLAG_IMMUTABLE;
				526	if (di_flags & XFS_DIFLAG_APPEND)
				527	flags \|= XFS_XFLAG_APPEND;
				528	if (di_flags & XFS_DIFLAG_SYNC)
				529	flags \|= XFS_XFLAG_SYNC;
				530	if (di_flags & XFS_DIFLAG_NOATIME)
				531	flags \|= XFS_XFLAG_NOATIME;
				532	if (di_flags & XFS_DIFLAG_NODUMP)
				533	flags \|= XFS_XFLAG_NODUMP;
				534	if (di_flags & XFS_DIFLAG_RTINHERIT)
				535	flags \|= XFS_XFLAG_RTINHERIT;
				536	if (di_flags & XFS_DIFLAG_PROJINHERIT)
				537	flags \|= XFS_XFLAG_PROJINHERIT;
				538	if (di_flags & XFS_DIFLAG_NOSYMLINKS)
				539	flags \|= XFS_XFLAG_NOSYMLINKS;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	540	if (di_flags & XFS_DIFLAG_EXTSIZE)
				541	flags \|= XFS_XFLAG_EXTSIZE;
				542	if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
				543	flags \|= XFS_XFLAG_EXTSZINHERIT;
Barry Naujok	d3446ea	2006-06-09 14:54:19 +1000	[diff] [blame]	544	if (di_flags & XFS_DIFLAG_NODEFRAG)
				545	flags \|= XFS_XFLAG_NODEFRAG;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	546	if (di_flags & XFS_DIFLAG_FILESTREAM)
				547	flags \|= XFS_XFLAG_FILESTREAM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548	}
				549
				550	return flags;
				551	}
				552
				553	uint
				554	xfs_ip2xflags(
				555	xfs_inode_t *ip)
				556	{
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	557	xfs_icdinode_t *dic = &ip->i_d;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	558
Nathan Scott	a916e2b	2006-06-09 17:12:17 +1000	[diff] [blame]	559	return _xfs_dic2xflags(dic->di_flags) \|
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	560	(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	561	}
				562
				563	uint
				564	xfs_dic2xflags(
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	565	xfs_dinode_t *dip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	566	{
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	567	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) \|
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	568	(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	569	}
				570
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	571	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	572	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
				573	* is allowed, otherwise it has to be an exact match. If a CI match is found,
				574	* ci_name->name will point to a the actual name (caller must free) or
				575	* will be set to NULL if an exact match is found.
				576	*/
				577	int
				578	xfs_lookup(
				579	xfs_inode_t *dp,
				580	struct xfs_name *name,
				581	xfs_inode_t **ipp,
				582	struct xfs_name *ci_name)
				583	{
				584	xfs_ino_t inum;
				585	int error;
				586	uint lock_mode;
				587
				588	trace_xfs_lookup(dp, name);
				589
				590	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
				591	return XFS_ERROR(EIO);
				592
				593	lock_mode = xfs_ilock_map_shared(dp);
				594	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
				595	xfs_iunlock_map_shared(dp, lock_mode);
				596
				597	if (error)
				598	goto out;
				599
				600	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
				601	if (error)
				602	goto out_free_name;
				603
				604	return 0;
				605
				606	out_free_name:
				607	if (ci_name)
				608	kmem_free(ci_name->name);
				609	out:
				610	*ipp = NULL;
				611	return error;
				612	}
				613
				614	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	615	* Allocate an inode on disk and return a copy of its in-core version.
				616	* The in-core inode is locked exclusively. Set mode, nlink, and rdev
				617	* appropriately within the inode. The uid and gid for the inode are
				618	* set according to the contents of the given cred structure.
				619	*
				620	* Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
Carlos Maiolino	cd856db	2012-10-20 11:08:19 -0300	[diff] [blame]	621	* has a free inode available, call xfs_iget() to obtain the in-core
				622	* version of the allocated inode. Finally, fill in the inode and
				623	* log its initial contents. In this case, ialloc_context would be
				624	* set to NULL.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	625	*
Carlos Maiolino	cd856db	2012-10-20 11:08:19 -0300	[diff] [blame]	626	* If xfs_dialloc() does not have an available inode, it will replenish
				627	* its supply by doing an allocation. Since we can only do one
				628	* allocation within a transaction without deadlocks, we must commit
				629	* the current transaction before returning the inode itself.
				630	* In this case, therefore, we will set ialloc_context and return.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	631	* The caller should then commit the current transaction, start a new
				632	* transaction, and call xfs_ialloc() again to actually get the inode.
				633	*
				634	* To ensure that some other process does not grab the inode that
				635	* was allocated during the first call to xfs_ialloc(), this routine
				636	* also returns the [locked] bp pointing to the head of the freelist
				637	* as ialloc_context. The caller should hold this buffer across
				638	* the commit and pass it back into this routine on the second call.
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	639	*
				640	* If we are allocating quota inodes, we do not have a parent inode
				641	* to attach to or associate with (i.e. pip == NULL) because they
				642	* are not linked into the directory structure - they are attached
				643	* directly to the superblock - and so have no parent.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	644	*/
				645	int
				646	xfs_ialloc(
				647	xfs_trans_t *tp,
				648	xfs_inode_t *pip,
Al Viro	576b1d6	2011-07-26 02:50:15 -0400	[diff] [blame]	649	umode_t mode,
Nathan Scott	31b084a	2005-05-05 13:25:00 -0700	[diff] [blame]	650	xfs_nlink_t nlink,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	651	xfs_dev_t rdev,
Arkadiusz Mi?kiewicz	6743099	2010-09-26 06:10:18 +0000	[diff] [blame]	652	prid_t prid,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	653	int okalloc,
				654	xfs_buf_t **ialloc_context,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	655	xfs_inode_t **ipp)
				656	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	657	struct xfs_mount *mp = tp->t_mountp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	658	xfs_ino_t ino;
				659	xfs_inode_t *ip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	660	uint flags;
				661	int error;
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	662	timespec_t tv;
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	663	int filestreams = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	664
				665	/*
				666	* Call the space management code to pick
				667	* the on-disk inode to be allocated.
				668	*/
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	669	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
Christoph Hellwig	0835890	2012-07-04 10:54:47 -0400	[diff] [blame]	670	ialloc_context, &ino);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	671	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	672	return error;
Christoph Hellwig	0835890	2012-07-04 10:54:47 -0400	[diff] [blame]	673	if (*ialloc_context \|\| ino == NULLFSINO) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	674	*ipp = NULL;
				675	return 0;
				676	}
				677	ASSERT(*ialloc_context == NULL);
				678
				679	/*
				680	* Get the in-core inode with the lock held exclusively.
				681	* This is because we're setting fields here we need
				682	* to prevent others from looking at until we're done.
				683	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	684	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
Christoph Hellwig	ec3ba85	2011-02-13 13:26:42 +0000	[diff] [blame]	685	XFS_ILOCK_EXCL, &ip);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	686	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	687	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	688	ASSERT(ip != NULL);
				689
Al Viro	576b1d6	2011-07-26 02:50:15 -0400	[diff] [blame]	690	ip->i_d.di_mode = mode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	691	ip->i_d.di_onlink = 0;
				692	ip->i_d.di_nlink = nlink;
				693	ASSERT(ip->i_d.di_nlink == nlink);
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	694	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
				695	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
Arkadiusz Mi?kiewicz	6743099	2010-09-26 06:10:18 +0000	[diff] [blame]	696	xfs_set_projid(ip, prid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	697	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
				698
				699	/*
				700	* If the superblock version is up to where we support new format
				701	* inodes and this is currently an old format inode, then change
				702	* the inode version number now. This way we only do the conversion
				703	* here rather than here and in the flush/logging code.
				704	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	705	if (xfs_sb_version_hasnlink(&mp->m_sb) &&
Christoph Hellwig	51ce16d	2008-11-28 14:23:39 +1100	[diff] [blame]	706	ip->i_d.di_version == 1) {
				707	ip->i_d.di_version = 2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	708	/*
				709	* We've already zeroed the old link count, the projid field,
				710	* and the pad field.
				711	*/
				712	}
				713
				714	/*
				715	* Project ids won't be stored on disk if we are using a version 1 inode.
				716	*/
Christoph Hellwig	51ce16d	2008-11-28 14:23:39 +1100	[diff] [blame]	717	if ((prid != 0) && (ip->i_d.di_version == 1))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	718	xfs_bump_ino_vers2(tp, ip);
				719
Christoph Hellwig	bd186aa	2007-08-30 17:21:12 +1000	[diff] [blame]	720	if (pip && XFS_INHERIT_GID(pip)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	721	ip->i_d.di_gid = pip->i_d.di_gid;
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	722	if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	723	ip->i_d.di_mode \|= S_ISGID;
				724	}
				725	}
				726
				727	/*
				728	* If the group ID of the new file does not match the effective group
				729	* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
				730	* (and only if the irix_sgid_inherit compatibility variable is set).
				731	*/
				732	if ((irix_sgid_inherit) &&
				733	(ip->i_d.di_mode & S_ISGID) &&
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	734	(!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	735	ip->i_d.di_mode &= ~S_ISGID;
				736	}
				737
				738	ip->i_d.di_size = 0;
				739	ip->i_d.di_nextents = 0;
				740	ASSERT(ip->i_d.di_nblocks == 0);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	741
				742	nanotime(&tv);
				743	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
				744	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
				745	ip->i_d.di_atime = ip->i_d.di_mtime;
				746	ip->i_d.di_ctime = ip->i_d.di_mtime;
				747
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	748	/*
				749	* di_gen will have been taken care of in xfs_iread.
				750	*/
				751	ip->i_d.di_extsize = 0;
				752	ip->i_d.di_dmevmask = 0;
				753	ip->i_d.di_dmstate = 0;
				754	ip->i_d.di_flags = 0;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	755
				756	if (ip->i_d.di_version == 3) {
				757	ASSERT(ip->i_d.di_ino == ino);
				758	ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
				759	ip->i_d.di_crc = 0;
				760	ip->i_d.di_changecount = 1;
				761	ip->i_d.di_lsn = 0;
				762	ip->i_d.di_flags2 = 0;
				763	memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
				764	ip->i_d.di_crtime = ip->i_d.di_mtime;
				765	}
				766
				767
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	768	flags = XFS_ILOG_CORE;
				769	switch (mode & S_IFMT) {
				770	case S_IFIFO:
				771	case S_IFCHR:
				772	case S_IFBLK:
				773	case S_IFSOCK:
				774	ip->i_d.di_format = XFS_DINODE_FMT_DEV;
				775	ip->i_df.if_u2.if_rdev = rdev;
				776	ip->i_df.if_flags = 0;
				777	flags \|= XFS_ILOG_DEV;
				778	break;
				779	case S_IFREG:
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	780	/*
				781	* we can't set up filestreams until after the VFS inode
				782	* is set up properly.
				783	*/
				784	if (pip && xfs_inode_is_filestream(pip))
				785	filestreams = 1;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	786	/* fall through */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	787	case S_IFDIR:
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	788	if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	789	uint di_flags = 0;
				790
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	791	if (S_ISDIR(mode)) {
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	792	if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
				793	di_flags \|= XFS_DIFLAG_RTINHERIT;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	794	if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
				795	di_flags \|= XFS_DIFLAG_EXTSZINHERIT;
				796	ip->i_d.di_extsize = pip->i_d.di_extsize;
				797	}
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	798	} else if (S_ISREG(mode)) {
Christoph Hellwig	613d704	2007-10-11 17:44:08 +1000	[diff] [blame]	799	if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	800	di_flags \|= XFS_DIFLAG_REALTIME;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	801	if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
				802	di_flags \|= XFS_DIFLAG_EXTSIZE;
				803	ip->i_d.di_extsize = pip->i_d.di_extsize;
				804	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805	}
				806	if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
				807	xfs_inherit_noatime)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	808	di_flags \|= XFS_DIFLAG_NOATIME;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	809	if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
				810	xfs_inherit_nodump)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	811	di_flags \|= XFS_DIFLAG_NODUMP;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	812	if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
				813	xfs_inherit_sync)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	814	di_flags \|= XFS_DIFLAG_SYNC;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	815	if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
				816	xfs_inherit_nosymlinks)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	817	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
				818	if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				819	di_flags \|= XFS_DIFLAG_PROJINHERIT;
Barry Naujok	d3446ea	2006-06-09 14:54:19 +1000	[diff] [blame]	820	if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
				821	xfs_inherit_nodefrag)
				822	di_flags \|= XFS_DIFLAG_NODEFRAG;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	823	if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
				824	di_flags \|= XFS_DIFLAG_FILESTREAM;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	825	ip->i_d.di_flags \|= di_flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	826	}
				827	/* FALLTHROUGH */
				828	case S_IFLNK:
				829	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
				830	ip->i_df.if_flags = XFS_IFEXTENTS;
				831	ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
				832	ip->i_df.if_u1.if_extents = NULL;
				833	break;
				834	default:
				835	ASSERT(0);
				836	}
				837	/*
				838	* Attribute fork settings for new inode.
				839	*/
				840	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
				841	ip->i_d.di_anextents = 0;
				842
				843	/*
				844	* Log the new values stuffed into the inode.
				845	*/
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	846	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	847	xfs_trans_log_inode(tp, ip, flags);
				848
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	849	/* now that we have an i_mode we can setup inode ops and unlock */
Christoph Hellwig	41be8be	2008-08-13 16:23:13 +1000	[diff] [blame]	850	xfs_setup_inode(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	851
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	852	/* now we have set up the vfs inode we can associate the filestream */
				853	if (filestreams) {
				854	error = xfs_filestream_associate(pip, ip);
				855	if (error < 0)
				856	return -error;
				857	if (!error)
				858	xfs_iflags_set(ip, XFS_IFILESTREAM);
				859	}
				860
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	861	*ipp = ip;
				862	return 0;
				863	}
				864
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	865	/*
				866	* Allocates a new inode from disk and return a pointer to the
				867	* incore copy. This routine will internally commit the current
				868	* transaction and allocate a new one if the Space Manager needed
				869	* to do an allocation to replenish the inode free-list.
				870	*
				871	* This routine is designed to be called from xfs_create and
				872	* xfs_create_dir.
				873	*
				874	*/
				875	int
				876	xfs_dir_ialloc(
				877	xfs_trans_t *tpp, / input: current transaction;
				878	output: may be a new transaction. */
				879	xfs_inode_t dp, / directory within whose allocate
				880	the inode. */
				881	umode_t mode,
				882	xfs_nlink_t nlink,
				883	xfs_dev_t rdev,
				884	prid_t prid, /* project id */
				885	int okalloc, /* ok to allocate new space */
				886	xfs_inode_t *ipp, / pointer to inode; it will be
				887	locked. */
				888	int *committed)
				889
				890	{
				891	xfs_trans_t *tp;
				892	xfs_trans_t *ntp;
				893	xfs_inode_t *ip;
				894	xfs_buf_t *ialloc_context = NULL;
				895	int code;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	896	void *dqinfo;
				897	uint tflags;
				898
				899	tp = *tpp;
				900	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
				901
				902	/*
				903	* xfs_ialloc will return a pointer to an incore inode if
				904	* the Space Manager has an available inode on the free
				905	* list. Otherwise, it will do an allocation and replenish
				906	* the freelist. Since we can only do one allocation per
				907	* transaction without deadlocks, we will need to commit the
				908	* current transaction and start a new one. We will then
				909	* need to call xfs_ialloc again to get the inode.
				910	*
				911	* If xfs_ialloc did an allocation to replenish the freelist,
				912	* it returns the bp containing the head of the freelist as
				913	* ialloc_context. We will hold a lock on it across the
				914	* transaction commit so that no other process can steal
				915	* the inode(s) that we've just allocated.
				916	*/
				917	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
				918	&ialloc_context, &ip);
				919
				920	/*
				921	* Return an error if we were unable to allocate a new inode.
				922	* This should only happen if we run out of space on disk or
				923	* encounter a disk error.
				924	*/
				925	if (code) {
				926	*ipp = NULL;
				927	return code;
				928	}
				929	if (!ialloc_context && !ip) {
				930	*ipp = NULL;
				931	return XFS_ERROR(ENOSPC);
				932	}
				933
				934	/*
				935	* If the AGI buffer is non-NULL, then we were unable to get an
				936	* inode in one operation. We need to commit the current
				937	* transaction and call xfs_ialloc() again. It is guaranteed
				938	* to succeed the second time.
				939	*/
				940	if (ialloc_context) {
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	941	struct xfs_trans_res tres;
				942
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	943	/*
				944	* Normally, xfs_trans_commit releases all the locks.
				945	* We call bhold to hang on to the ialloc_context across
				946	* the commit. Holding this buffer prevents any other
				947	* processes from doing any allocations in this
				948	* allocation group.
				949	*/
				950	xfs_trans_bhold(tp, ialloc_context);
				951	/*
				952	* Save the log reservation so we can use
				953	* them in the next transaction.
				954	*/
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	955	tres.tr_logres = xfs_trans_get_log_res(tp);
				956	tres.tr_logcount = xfs_trans_get_log_count(tp);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	957
				958	/*
				959	* We want the quota changes to be associated with the next
				960	* transaction, NOT this one. So, detach the dqinfo from this
				961	* and attach it to the next transaction.
				962	*/
				963	dqinfo = NULL;
				964	tflags = 0;
				965	if (tp->t_dqinfo) {
				966	dqinfo = (void *)tp->t_dqinfo;
				967	tp->t_dqinfo = NULL;
				968	tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
				969	tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
				970	}
				971
				972	ntp = xfs_trans_dup(tp);
				973	code = xfs_trans_commit(tp, 0);
				974	tp = ntp;
				975	if (committed != NULL) {
				976	*committed = 1;
				977	}
				978	/*
				979	* If we get an error during the commit processing,
				980	* release the buffer that is still held and return
				981	* to the caller.
				982	*/
				983	if (code) {
				984	xfs_buf_relse(ialloc_context);
				985	if (dqinfo) {
				986	tp->t_dqinfo = dqinfo;
				987	xfs_trans_free_dqinfo(tp);
				988	}
				989	*tpp = ntp;
				990	*ipp = NULL;
				991	return code;
				992	}
				993
				994	/*
				995	* transaction commit worked ok so we can drop the extra ticket
				996	* reference that we gained in xfs_trans_dup()
				997	*/
				998	xfs_log_ticket_put(tp->t_ticket);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	999	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
				1000	code = xfs_trans_reserve(tp, &tres, 0, 0);
				1001
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	1002	/*
				1003	* Re-attach the quota info that we detached from prev trx.
				1004	*/
				1005	if (dqinfo) {
				1006	tp->t_dqinfo = dqinfo;
				1007	tp->t_flags \|= tflags;
				1008	}
				1009
				1010	if (code) {
				1011	xfs_buf_relse(ialloc_context);
				1012	*tpp = ntp;
				1013	*ipp = NULL;
				1014	return code;
				1015	}
				1016	xfs_trans_bjoin(tp, ialloc_context);
				1017
				1018	/*
				1019	* Call ialloc again. Since we've locked out all
				1020	* other allocations in this allocation group,
				1021	* this call should always succeed.
				1022	*/
				1023	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
				1024	okalloc, &ialloc_context, &ip);
				1025
				1026	/*
				1027	* If we get an error at this point, return to the caller
				1028	* so that the current transaction can be aborted.
				1029	*/
				1030	if (code) {
				1031	*tpp = tp;
				1032	*ipp = NULL;
				1033	return code;
				1034	}
				1035	ASSERT(!ialloc_context && ip);
				1036
				1037	} else {
				1038	if (committed != NULL)
				1039	*committed = 0;
				1040	}
				1041
				1042	*ipp = ip;
				1043	*tpp = tp;
				1044
				1045	return 0;
				1046	}
				1047
				1048	/*
				1049	* Decrement the link count on an inode & log the change.
				1050	* If this causes the link count to go to zero, initiate the
				1051	* logging activity required to truncate a file.
				1052	*/
				1053	int /* error */
				1054	xfs_droplink(
				1055	xfs_trans_t *tp,
				1056	xfs_inode_t *ip)
				1057	{
				1058	int error;
				1059
				1060	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				1061
				1062	ASSERT (ip->i_d.di_nlink > 0);
				1063	ip->i_d.di_nlink--;
				1064	drop_nlink(VFS_I(ip));
				1065	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1066
				1067	error = 0;
				1068	if (ip->i_d.di_nlink == 0) {
				1069	/*
				1070	* We're dropping the last link to this file.
				1071	* Move the on-disk inode to the AGI unlinked list.
				1072	* From xfs_inactive() we will pull the inode from
				1073	* the list and free it.
				1074	*/
				1075	error = xfs_iunlink(tp, ip);
				1076	}
				1077	return error;
				1078	}
				1079
				1080	/*
				1081	* This gets called when the inode's version needs to be changed from 1 to 2.
				1082	* Currently this happens when the nlink field overflows the old 16-bit value
				1083	* or when chproj is called to change the project for the first time.
				1084	* As a side effect the superblock version will also get rev'd
				1085	* to contain the NLINK bit.
				1086	*/
				1087	void
				1088	xfs_bump_ino_vers2(
				1089	xfs_trans_t *tp,
				1090	xfs_inode_t *ip)
				1091	{
				1092	xfs_mount_t *mp;
				1093
				1094	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1095	ASSERT(ip->i_d.di_version == 1);
				1096
				1097	ip->i_d.di_version = 2;
				1098	ip->i_d.di_onlink = 0;
				1099	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
				1100	mp = tp->t_mountp;
				1101	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
				1102	spin_lock(&mp->m_sb_lock);
				1103	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
				1104	xfs_sb_version_addnlink(&mp->m_sb);
				1105	spin_unlock(&mp->m_sb_lock);
				1106	xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
				1107	} else {
				1108	spin_unlock(&mp->m_sb_lock);
				1109	}
				1110	}
				1111	/* Caller must log the inode */
				1112	}
				1113
				1114	/*
				1115	* Increment the link count on an inode & log the change.
				1116	*/
				1117	int
				1118	xfs_bumplink(
				1119	xfs_trans_t *tp,
				1120	xfs_inode_t *ip)
				1121	{
				1122	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				1123
				1124	ASSERT(ip->i_d.di_nlink > 0);
				1125	ip->i_d.di_nlink++;
				1126	inc_nlink(VFS_I(ip));
				1127	if ((ip->i_d.di_version == 1) &&
				1128	(ip->i_d.di_nlink > XFS_MAXLINK_1)) {
				1129	/*
				1130	* The inode has increased its number of links beyond
				1131	* what can fit in an old format inode. It now needs
				1132	* to be converted to a version 2 inode with a 32 bit
				1133	* link count. If this is the first inode in the file
				1134	* system to do this, then we need to bump the superblock
				1135	* version number as well.
				1136	*/
				1137	xfs_bump_ino_vers2(tp, ip);
				1138	}
				1139
				1140	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1141	return 0;
				1142	}
				1143
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1144	int
				1145	xfs_create(
				1146	xfs_inode_t *dp,
				1147	struct xfs_name *name,
				1148	umode_t mode,
				1149	xfs_dev_t rdev,
				1150	xfs_inode_t **ipp)
				1151	{
				1152	int is_dir = S_ISDIR(mode);
				1153	struct xfs_mount *mp = dp->i_mount;
				1154	struct xfs_inode *ip = NULL;
				1155	struct xfs_trans *tp = NULL;
				1156	int error;
				1157	xfs_bmap_free_t free_list;
				1158	xfs_fsblock_t first_block;
				1159	bool unlock_dp_on_error = false;
				1160	uint cancel_flags;
				1161	int committed;
				1162	prid_t prid;
				1163	struct xfs_dquot *udqp = NULL;
				1164	struct xfs_dquot *gdqp = NULL;
				1165	struct xfs_dquot *pdqp = NULL;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1166	struct xfs_trans_res tres;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1167	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1168
				1169	trace_xfs_create(dp, name);
				1170
				1171	if (XFS_FORCED_SHUTDOWN(mp))
				1172	return XFS_ERROR(EIO);
				1173
				1174	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				1175	prid = xfs_get_projid(dp);
				1176	else
				1177	prid = XFS_PROJID_DEFAULT;
				1178
				1179	/*
				1180	* Make sure that we have allocated dquot(s) on disk.
				1181	*/
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	1182	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
				1183	xfs_kgid_to_gid(current_fsgid()), prid,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1184	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1185	&udqp, &gdqp, &pdqp);
				1186	if (error)
				1187	return error;
				1188
				1189	if (is_dir) {
				1190	rdev = 0;
				1191	resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1192	tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres;
				1193	tres.tr_logcount = XFS_MKDIR_LOG_COUNT;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1194	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
				1195	} else {
				1196	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1197	tres.tr_logres = M_RES(mp)->tr_create.tr_logres;
				1198	tres.tr_logcount = XFS_CREATE_LOG_COUNT;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1199	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
				1200	}
				1201
				1202	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1203
				1204	/*
				1205	* Initially assume that the file does not exist and
				1206	* reserve the resources for that case. If that is not
				1207	* the case we'll drop the one we have and get a more
				1208	* appropriate transaction later.
				1209	*/
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1210	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
				1211	error = xfs_trans_reserve(tp, &tres, resblks, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1212	if (error == ENOSPC) {
				1213	/* flush outstanding delalloc blocks and retry */
				1214	xfs_flush_inodes(mp);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1215	error = xfs_trans_reserve(tp, &tres, resblks, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1216	}
				1217	if (error == ENOSPC) {
				1218	/* No space at all so try a "no-allocation" reservation */
				1219	resblks = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1220	error = xfs_trans_reserve(tp, &tres, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1221	}
				1222	if (error) {
				1223	cancel_flags = 0;
				1224	goto out_trans_cancel;
				1225	}
				1226
				1227	xfs_ilock(dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
				1228	unlock_dp_on_error = true;
				1229
				1230	xfs_bmap_init(&free_list, &first_block);
				1231
				1232	/*
				1233	* Reserve disk quota and the inode.
				1234	*/
				1235	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
				1236	pdqp, resblks, 1, 0);
				1237	if (error)
				1238	goto out_trans_cancel;
				1239
				1240	error = xfs_dir_canenter(tp, dp, name, resblks);
				1241	if (error)
				1242	goto out_trans_cancel;
				1243
				1244	/*
				1245	* A newly created regular or special file just has one directory
				1246	* entry pointing to them, but a directory also the "." entry
				1247	* pointing to itself.
				1248	*/
				1249	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
				1250	prid, resblks > 0, &ip, &committed);
				1251	if (error) {
				1252	if (error == ENOSPC)
				1253	goto out_trans_cancel;
				1254	goto out_trans_abort;
				1255	}
				1256
				1257	/*
				1258	* Now we join the directory inode to the transaction. We do not do it
				1259	* earlier because xfs_dir_ialloc might commit the previous transaction
				1260	* (and release all the locks). An error from here on will result in
				1261	* the transaction cancel unlocking dp so don't do it explicitly in the
				1262	* error path.
				1263	*/
				1264	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				1265	unlock_dp_on_error = false;
				1266
				1267	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
				1268	&first_block, &free_list, resblks ?
				1269	resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
				1270	if (error) {
				1271	ASSERT(error != ENOSPC);
				1272	goto out_trans_abort;
				1273	}
				1274	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1275	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1276
				1277	if (is_dir) {
				1278	error = xfs_dir_init(tp, ip, dp);
				1279	if (error)
				1280	goto out_bmap_cancel;
				1281
				1282	error = xfs_bumplink(tp, dp);
				1283	if (error)
				1284	goto out_bmap_cancel;
				1285	}
				1286
				1287	/*
				1288	* If this is a synchronous mount, make sure that the
				1289	* create transaction goes to disk before returning to
				1290	* the user.
				1291	*/
				1292	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				1293	xfs_trans_set_sync(tp);
				1294
				1295	/*
				1296	* Attach the dquot(s) to the inodes and modify them incore.
				1297	* These ids of the inode couldn't have changed since the new
				1298	* inode has been locked ever since it was created.
				1299	*/
				1300	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1301
				1302	error = xfs_bmap_finish(&tp, &free_list, &committed);
				1303	if (error)
				1304	goto out_bmap_cancel;
				1305
				1306	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1307	if (error)
				1308	goto out_release_inode;
				1309
				1310	xfs_qm_dqrele(udqp);
				1311	xfs_qm_dqrele(gdqp);
				1312	xfs_qm_dqrele(pdqp);
				1313
				1314	*ipp = ip;
				1315	return 0;
				1316
				1317	out_bmap_cancel:
				1318	xfs_bmap_cancel(&free_list);
				1319	out_trans_abort:
				1320	cancel_flags \|= XFS_TRANS_ABORT;
				1321	out_trans_cancel:
				1322	xfs_trans_cancel(tp, cancel_flags);
				1323	out_release_inode:
				1324	/*
				1325	* Wait until after the current transaction is aborted to
				1326	* release the inode. This prevents recursive transactions
				1327	* and deadlocks from xfs_inactive.
				1328	*/
				1329	if (ip)
				1330	IRELE(ip);
				1331
				1332	xfs_qm_dqrele(udqp);
				1333	xfs_qm_dqrele(gdqp);
				1334	xfs_qm_dqrele(pdqp);
				1335
				1336	if (unlock_dp_on_error)
				1337	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				1338	return error;
				1339	}
				1340
				1341	int
				1342	xfs_link(
				1343	xfs_inode_t *tdp,
				1344	xfs_inode_t *sip,
				1345	struct xfs_name *target_name)
				1346	{
				1347	xfs_mount_t *mp = tdp->i_mount;
				1348	xfs_trans_t *tp;
				1349	int error;
				1350	xfs_bmap_free_t free_list;
				1351	xfs_fsblock_t first_block;
				1352	int cancel_flags;
				1353	int committed;
				1354	int resblks;
				1355
				1356	trace_xfs_link(tdp, target_name);
				1357
				1358	ASSERT(!S_ISDIR(sip->i_d.di_mode));
				1359
				1360	if (XFS_FORCED_SHUTDOWN(mp))
				1361	return XFS_ERROR(EIO);
				1362
				1363	error = xfs_qm_dqattach(sip, 0);
				1364	if (error)
				1365	goto std_return;
				1366
				1367	error = xfs_qm_dqattach(tdp, 0);
				1368	if (error)
				1369	goto std_return;
				1370
				1371	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
				1372	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1373	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1374	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1375	if (error == ENOSPC) {
				1376	resblks = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1377	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1378	}
				1379	if (error) {
				1380	cancel_flags = 0;
				1381	goto error_return;
				1382	}
				1383
				1384	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
				1385
				1386	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
				1387	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
				1388
				1389	/*
				1390	* If we are using project inheritance, we only allow hard link
				1391	* creation in our tree when the project IDs are the same; else
				1392	* the tree quota mechanism could be circumvented.
				1393	*/
				1394	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
				1395	(xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
				1396	error = XFS_ERROR(EXDEV);
				1397	goto error_return;
				1398	}
				1399
				1400	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
				1401	if (error)
				1402	goto error_return;
				1403
				1404	xfs_bmap_init(&free_list, &first_block);
				1405
				1406	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
				1407	&first_block, &free_list, resblks);
				1408	if (error)
				1409	goto abort_return;
				1410	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1411	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				1412
				1413	error = xfs_bumplink(tp, sip);
				1414	if (error)
				1415	goto abort_return;
				1416
				1417	/*
				1418	* If this is a synchronous mount, make sure that the
				1419	* link transaction goes to disk before returning to
				1420	* the user.
				1421	*/
				1422	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				1423	xfs_trans_set_sync(tp);
				1424	}
				1425
				1426	error = xfs_bmap_finish (&tp, &free_list, &committed);
				1427	if (error) {
				1428	xfs_bmap_cancel(&free_list);
				1429	goto abort_return;
				1430	}
				1431
				1432	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1433
				1434	abort_return:
				1435	cancel_flags \|= XFS_TRANS_ABORT;
				1436	error_return:
				1437	xfs_trans_cancel(tp, cancel_flags);
				1438	std_return:
				1439	return error;
				1440	}
				1441
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1442	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1443	* Free up the underlying blocks past new_size. The new size must be smaller
				1444	* than the current size. This routine can be used both for the attribute and
				1445	* data fork, and does not modify the inode size, which is left to the caller.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1446	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1447	* The transaction passed to this routine must have made a permanent log
				1448	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
				1449	* given transaction and start new ones, so make sure everything involved in
				1450	* the transaction is tidy before calling here. Some transaction will be
				1451	* returned to the caller to be committed. The incoming transaction must
				1452	* already include the inode, and both inode locks must be held exclusively.
				1453	* The inode must also be "held" within the transaction. On return the inode
				1454	* will be "held" within the returned transaction. This routine does NOT
				1455	* require any disk space to be reserved for it within the transaction.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1456	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1457	* If we get an error, we must return with the inode locked and linked into the
				1458	* current transaction. This keeps things simple for the higher level code,
				1459	* because it always knows that the inode is locked and held in the transaction
				1460	* that returns to it whether errors occur or not. We don't mark the inode
				1461	* dirty on error so that transactions can be easily aborted if possible.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1462	*/
				1463	int
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1464	xfs_itruncate_extents(
				1465	struct xfs_trans **tpp,
				1466	struct xfs_inode *ip,
				1467	int whichfork,
				1468	xfs_fsize_t new_size)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1469	{
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1470	struct xfs_mount *mp = ip->i_mount;
				1471	struct xfs_trans tp = tpp;
				1472	struct xfs_trans *ntp;
				1473	xfs_bmap_free_t free_list;
				1474	xfs_fsblock_t first_block;
				1475	xfs_fileoff_t first_unmap_block;
				1476	xfs_fileoff_t last_block;
				1477	xfs_filblks_t unmap_len;
				1478	int committed;
				1479	int error = 0;
				1480	int done = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1481
Christoph Hellwig	0b56185	2012-07-04 11:13:31 -0400	[diff] [blame]	1482	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1483	ASSERT(!atomic_read(&VFS_I(ip)->i_count) \|\|
				1484	xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	1485	ASSERT(new_size <= XFS_ISIZE(ip));
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1486	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1487	ASSERT(ip->i_itemp != NULL);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1488	ASSERT(ip->i_itemp->ili_lock_flags == 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1489	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1490
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1491	trace_xfs_itruncate_extents_start(ip, new_size);
				1492
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1493	/*
				1494	* Since it is possible for space to become allocated beyond
				1495	* the end of the file (in a crash where the space is allocated
				1496	* but the inode size is not yet updated), simply remove any
				1497	* blocks which show up between the new EOF and the maximum
				1498	* possible file size. If the first block to be removed is
				1499	* beyond the maximum file size (ie it is the same as last_block),
				1500	* then there is nothing to do.
				1501	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1502	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
Dave Chinner	3297238	2012-06-08 15:44:54 +1000	[diff] [blame]	1503	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1504	if (first_unmap_block == last_block)
				1505	return 0;
				1506
				1507	ASSERT(first_unmap_block < last_block);
				1508	unmap_len = last_block - first_unmap_block + 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1509	while (!done) {
Eric Sandeen	9d87c31	2009-01-14 23:22:07 -0600	[diff] [blame]	1510	xfs_bmap_init(&free_list, &first_block);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1511	error = xfs_bunmapi(tp, ip,
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	1512	first_unmap_block, unmap_len,
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1513	xfs_bmapi_aflag(whichfork),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1514	XFS_ITRUNC_MAX_EXTENTS,
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	1515	&first_block, &free_list,
Christoph Hellwig	b4e9181	2010-06-23 18:11:15 +1000	[diff] [blame]	1516	&done);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1517	if (error)
				1518	goto out_bmap_cancel;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1519
				1520	/*
				1521	* Duplicate the transaction that has the permanent
				1522	* reservation and commit the old transaction.
				1523	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1524	error = xfs_bmap_finish(&tp, &free_list, &committed);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1525	if (committed)
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	1526	xfs_trans_ijoin(tp, ip, 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1527	if (error)
				1528	goto out_bmap_cancel;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1529
				1530	if (committed) {
				1531	/*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1532	* Mark the inode dirty so it will be logged and
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	1533	* moved forward in the log as part of every commit.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1534	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1535	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1536	}
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1537
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1538	ntp = xfs_trans_dup(tp);
				1539	error = xfs_trans_commit(tp, 0);
				1540	tp = ntp;
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	1541
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	1542	xfs_trans_ijoin(tp, ip, 0);
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1543
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1544	if (error)
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1545	goto out;
				1546
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1547	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1548	* Transaction commit worked ok so we can drop the extra ticket
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1549	* reference that we gained in xfs_trans_dup()
				1550	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1551	xfs_log_ticket_put(tp->t_ticket);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1552	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1553	if (error)
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1554	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1555	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1556
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1557	/*
				1558	* Always re-log the inode so that our permanent transaction can keep
				1559	* on rolling it forward in the log.
				1560	*/
				1561	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1562
				1563	trace_xfs_itruncate_extents_end(ip, new_size);
				1564
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1565	out:
				1566	*tpp = tp;
				1567	return error;
				1568	out_bmap_cancel:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1569	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1570	* If the bunmapi call encounters an error, return to the caller where
				1571	* the transaction can be properly aborted. We just need to make sure
				1572	* we're not holding any resources that we were not when we came in.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1573	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1574	xfs_bmap_cancel(&free_list);
				1575	goto out;
				1576	}
				1577
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1578	int
				1579	xfs_release(
				1580	xfs_inode_t *ip)
				1581	{
				1582	xfs_mount_t *mp = ip->i_mount;
				1583	int error;
				1584
				1585	if (!S_ISREG(ip->i_d.di_mode) \|\| (ip->i_d.di_mode == 0))
				1586	return 0;
				1587
				1588	/* If this is a read-only mount, don't do this (would generate I/O) */
				1589	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1590	return 0;
				1591
				1592	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1593	int truncated;
				1594
				1595	/*
				1596	* If we are using filestreams, and we have an unlinked
				1597	* file that we are processing the last close on, then nothing
				1598	* will be able to reopen and write to this file. Purge this
				1599	* inode from the filestreams cache so that it doesn't delay
				1600	* teardown of the inode.
				1601	*/
				1602	if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
				1603	xfs_filestream_deassociate(ip);
				1604
				1605	/*
				1606	* If we previously truncated this file and removed old data
				1607	* in the process, we want to initiate "early" writeout on
				1608	* the last close. This is an attempt to combat the notorious
				1609	* NULL files problem which is particularly noticeable from a
				1610	* truncate down, buffered (re-)write (delalloc), followed by
				1611	* a crash. What we are effectively doing here is
				1612	* significantly reducing the time window where we'd otherwise
				1613	* be exposed to that problem.
				1614	*/
				1615	truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
				1616	if (truncated) {
				1617	xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
				1618	if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
				1619	error = -filemap_flush(VFS_I(ip)->i_mapping);
				1620	if (error)
				1621	return error;
				1622	}
				1623	}
				1624	}
				1625
				1626	if (ip->i_d.di_nlink == 0)
				1627	return 0;
				1628
				1629	if (xfs_can_free_eofblocks(ip, false)) {
				1630
				1631	/*
				1632	* If we can't get the iolock just skip truncating the blocks
				1633	* past EOF because we could deadlock with the mmap_sem
				1634	* otherwise. We'll get another chance to drop them once the
				1635	* last reference to the inode is dropped, so we'll never leak
				1636	* blocks permanently.
				1637	*
				1638	* Further, check if the inode is being opened, written and
				1639	* closed frequently and we have delayed allocation blocks
				1640	* outstanding (e.g. streaming writes from the NFS server),
				1641	* truncating the blocks past EOF will cause fragmentation to
				1642	* occur.
				1643	*
				1644	* In this case don't do the truncation, either, but we have to
				1645	* be careful how we detect this case. Blocks beyond EOF show
				1646	* up as i_delayed_blks even when the inode is clean, so we
				1647	* need to truncate them away first before checking for a dirty
				1648	* release. Hence on the first dirty close we will still remove
				1649	* the speculative allocation, but after that we will leave it
				1650	* in place.
				1651	*/
				1652	if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
				1653	return 0;
				1654
				1655	error = xfs_free_eofblocks(mp, ip, true);
				1656	if (error && error != EAGAIN)
				1657	return error;
				1658
				1659	/* delalloc blocks after truncation means it really is dirty */
				1660	if (ip->i_delayed_blks)
				1661	xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
				1662	}
				1663	return 0;
				1664	}
				1665
				1666	/*
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1667	* xfs_inactive_truncate
				1668	*
				1669	* Called to perform a truncate when an inode becomes unlinked.
				1670	*/
				1671	STATIC int
				1672	xfs_inactive_truncate(
				1673	struct xfs_inode *ip)
				1674	{
				1675	struct xfs_mount *mp = ip->i_mount;
				1676	struct xfs_trans *tp;
				1677	int error;
				1678
				1679	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1680	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
				1681	if (error) {
				1682	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1683	xfs_trans_cancel(tp, 0);
				1684	return error;
				1685	}
				1686
				1687	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1688	xfs_trans_ijoin(tp, ip, 0);
				1689
				1690	/*
				1691	* Log the inode size first to prevent stale data exposure in the event
				1692	* of a system crash before the truncate completes. See the related
				1693	* comment in xfs_setattr_size() for details.
				1694	*/
				1695	ip->i_d.di_size = 0;
				1696	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1697
				1698	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
				1699	if (error)
				1700	goto error_trans_cancel;
				1701
				1702	ASSERT(ip->i_d.di_nextents == 0);
				1703
				1704	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1705	if (error)
				1706	goto error_unlock;
				1707
				1708	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1709	return 0;
				1710
				1711	error_trans_cancel:
				1712	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				1713	error_unlock:
				1714	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1715	return error;
				1716	}
				1717
				1718	/*
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1719	* xfs_inactive_ifree()
				1720	*
				1721	* Perform the inode free when an inode is unlinked.
				1722	*/
				1723	STATIC int
				1724	xfs_inactive_ifree(
				1725	struct xfs_inode *ip)
				1726	{
				1727	xfs_bmap_free_t free_list;
				1728	xfs_fsblock_t first_block;
				1729	int committed;
				1730	struct xfs_mount *mp = ip->i_mount;
				1731	struct xfs_trans *tp;
				1732	int error;
				1733
				1734	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1735	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
				1736	if (error) {
				1737	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1738	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
				1739	return error;
				1740	}
				1741
				1742	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1743	xfs_trans_ijoin(tp, ip, 0);
				1744
				1745	xfs_bmap_init(&free_list, &first_block);
				1746	error = xfs_ifree(tp, ip, &free_list);
				1747	if (error) {
				1748	/*
				1749	* If we fail to free the inode, shut down. The cancel
				1750	* might do that, we need to make sure. Otherwise the
				1751	* inode might be lost for a long time or forever.
				1752	*/
				1753	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1754	xfs_notice(mp, "%s: xfs_ifree returned error %d",
				1755	__func__, error);
				1756	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
				1757	}
				1758	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES\|XFS_TRANS_ABORT);
				1759	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1760	return error;
				1761	}
				1762
				1763	/*
				1764	* Credit the quota account(s). The inode is gone.
				1765	*/
				1766	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1767
				1768	/*
				1769	* Just ignore errors at this point. There is nothing we can
				1770	* do except to try to keep going. Make sure it's not a silent
				1771	* error.
				1772	*/
				1773	error = xfs_bmap_finish(&tp, &free_list, &committed);
				1774	if (error)
				1775	xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
				1776	__func__, error);
				1777	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1778	if (error)
				1779	xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
				1780	__func__, error);
				1781
				1782	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1783	return 0;
				1784	}
				1785
				1786	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1787	* xfs_inactive
				1788	*
				1789	* This is called when the vnode reference count for the vnode
				1790	* goes to zero. If the file has been unlinked, then it must
				1791	* now be truncated. Also, we clear all of the read-ahead state
				1792	* kept for the inode here since the file is now closed.
				1793	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1794	void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1795	xfs_inactive(
				1796	xfs_inode_t *ip)
				1797	{
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1798	struct xfs_mount *mp;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1799	int error;
				1800	int truncate = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1801
				1802	/*
				1803	* If the inode is already free, then there can be nothing
				1804	* to clean up here.
				1805	*/
Ben Myers	d948709	2013-09-10 18:11:22 -0500	[diff] [blame]	1806	if (ip->i_d.di_mode == 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1807	ASSERT(ip->i_df.if_real_bytes == 0);
				1808	ASSERT(ip->i_df.if_broot_bytes == 0);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1809	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1810	}
				1811
				1812	mp = ip->i_mount;
				1813
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1814	/* If this is a read-only mount, don't do this (would generate I/O) */
				1815	if (mp->m_flags & XFS_MOUNT_RDONLY)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1816	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1817
				1818	if (ip->i_d.di_nlink != 0) {
				1819	/*
				1820	* force is true because we are evicting an inode from the
				1821	* cache. Post-eof blocks must be freed, lest we end up with
				1822	* broken free space accounting.
				1823	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1824	if (xfs_can_free_eofblocks(ip, true))
				1825	xfs_free_eofblocks(mp, ip, false);
				1826
				1827	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1828	}
				1829
				1830	if (S_ISREG(ip->i_d.di_mode) &&
				1831	(ip->i_d.di_size != 0 \|\| XFS_ISIZE(ip) != 0 \|\|
				1832	ip->i_d.di_nextents > 0 \|\| ip->i_delayed_blks > 0))
				1833	truncate = 1;
				1834
				1835	error = xfs_qm_dqattach(ip, 0);
				1836	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1837	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1838
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1839	if (S_ISLNK(ip->i_d.di_mode))
Brian Foster	36b21dd	2013-09-20 11:06:09 -0400	[diff] [blame]	1840	error = xfs_inactive_symlink(ip);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1841	else if (truncate)
				1842	error = xfs_inactive_truncate(ip);
				1843	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1844	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1845
				1846	/*
				1847	* If there are attributes associated with the file then blow them away
				1848	* now. The code calls a routine that recursively deconstructs the
				1849	* attribute fork. We need to just commit the current transaction
				1850	* because we can't use it for xfs_attr_inactive().
				1851	*/
				1852	if (ip->i_d.di_anextents > 0) {
				1853	ASSERT(ip->i_d.di_forkoff != 0);
				1854
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1855	error = xfs_attr_inactive(ip);
				1856	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1857	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1858	}
				1859
				1860	if (ip->i_afp)
				1861	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
				1862
				1863	ASSERT(ip->i_d.di_anextents == 0);
				1864
				1865	/*
				1866	* Free the inode.
				1867	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1868	error = xfs_inactive_ifree(ip);
				1869	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1870	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1871
				1872	/*
				1873	* Release the dquots held by inode, if any.
				1874	*/
				1875	xfs_qm_dqdetach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1876	}
				1877
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1878	/*
				1879	* This is called when the inode's link count goes to 0.
				1880	* We place the on-disk inode on a list in the AGI. It
				1881	* will be pulled from this list when the inode is freed.
				1882	*/
				1883	int
				1884	xfs_iunlink(
				1885	xfs_trans_t *tp,
				1886	xfs_inode_t *ip)
				1887	{
				1888	xfs_mount_t *mp;
				1889	xfs_agi_t *agi;
				1890	xfs_dinode_t *dip;
				1891	xfs_buf_t *agibp;
				1892	xfs_buf_t *ibp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1893	xfs_agino_t agino;
				1894	short bucket_index;
				1895	int offset;
				1896	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1897
				1898	ASSERT(ip->i_d.di_nlink == 0);
				1899	ASSERT(ip->i_d.di_mode != 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1900
				1901	mp = tp->t_mountp;
				1902
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1903	/*
				1904	* Get the agi buffer first. It ensures lock ordering
				1905	* on the list.
				1906	*/
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1907	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
Vlad Apostolov	859d718	2007-10-11 17:44:18 +1000	[diff] [blame]	1908	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1909	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1910	agi = XFS_BUF_TO_AGI(agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1911
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1912	/*
				1913	* Get the index into the agi hash table for the
				1914	* list this inode will go on.
				1915	*/
				1916	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				1917	ASSERT(agino != 0);
				1918	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
				1919	ASSERT(agi->agi_unlinked[bucket_index]);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	1920	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1921
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	1922	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1923	/*
				1924	* There is already another inode in the bucket we need
				1925	* to add ourselves to. Add us at the front of the list.
				1926	* Here we put the head pointer into our next pointer,
				1927	* and then we fall through to point the head at us.
				1928	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	1929	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				1930	0, 0);
Vlad Apostolov	c319b58	2007-11-23 16:27:51 +1100	[diff] [blame]	1931	if (error)
				1932	return error;
				1933
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	1934	ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1935	dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	1936	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1937	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	1938
				1939	/* need to recalc the inode CRC if appropriate */
				1940	xfs_dinode_calc_crc(mp, dip);
				1941
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1942	xfs_trans_inode_buf(tp, ibp);
				1943	xfs_trans_log_buf(tp, ibp, offset,
				1944	(offset + sizeof(xfs_agino_t) - 1));
				1945	xfs_inobp_check(mp, ibp);
				1946	}
				1947
				1948	/*
				1949	* Point the bucket head pointer at the inode being inserted.
				1950	*/
				1951	ASSERT(agino != 0);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	1952	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1953	offset = offsetof(xfs_agi_t, agi_unlinked) +
				1954	(sizeof(xfs_agino_t) * bucket_index);
				1955	xfs_trans_log_buf(tp, agibp, offset,
				1956	(offset + sizeof(xfs_agino_t) - 1));
				1957	return 0;
				1958	}
				1959
				1960	/*
				1961	* Pull the on-disk inode from the AGI unlinked list.
				1962	*/
				1963	STATIC int
				1964	xfs_iunlink_remove(
				1965	xfs_trans_t *tp,
				1966	xfs_inode_t *ip)
				1967	{
				1968	xfs_ino_t next_ino;
				1969	xfs_mount_t *mp;
				1970	xfs_agi_t *agi;
				1971	xfs_dinode_t *dip;
				1972	xfs_buf_t *agibp;
				1973	xfs_buf_t *ibp;
				1974	xfs_agnumber_t agno;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1975	xfs_agino_t agino;
				1976	xfs_agino_t next_agino;
				1977	xfs_buf_t *last_ibp;
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	1978	xfs_dinode_t *last_dip = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1979	short bucket_index;
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	1980	int offset, last_offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1981	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1982
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1983	mp = tp->t_mountp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1984	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1985
				1986	/*
				1987	* Get the agi buffer first. It ensures lock ordering
				1988	* on the list.
				1989	*/
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1990	error = xfs_read_agi(mp, tp, agno, &agibp);
				1991	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1992	return error;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1993
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1994	agi = XFS_BUF_TO_AGI(agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1995
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1996	/*
				1997	* Get the index into the agi hash table for the
				1998	* list this inode will go on.
				1999	*/
				2000	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2001	ASSERT(agino != 0);
				2002	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	2003	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2004	ASSERT(agi->agi_unlinked[bucket_index]);
				2005
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2006	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2007	/*
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2008	* We're at the head of the list. Get the inode's on-disk
				2009	* buffer to see if there is anyone after us on the list.
				2010	* Only modify our next pointer if it is not already NULLAGINO.
				2011	* This saves us the overhead of dealing with the buffer when
				2012	* there is no need to change it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2013	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2014	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				2015	0, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2016	if (error) {
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2017	xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2018	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2019	return error;
				2020	}
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2021	next_agino = be32_to_cpu(dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2022	ASSERT(next_agino != 0);
				2023	if (next_agino != NULLAGINO) {
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2024	dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	2025	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2026	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2027
				2028	/* need to recalc the inode CRC if appropriate */
				2029	xfs_dinode_calc_crc(mp, dip);
				2030
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2031	xfs_trans_inode_buf(tp, ibp);
				2032	xfs_trans_log_buf(tp, ibp, offset,
				2033	(offset + sizeof(xfs_agino_t) - 1));
				2034	xfs_inobp_check(mp, ibp);
				2035	} else {
				2036	xfs_trans_brelse(tp, ibp);
				2037	}
				2038	/*
				2039	* Point the bucket head pointer at the next inode.
				2040	*/
				2041	ASSERT(next_agino != 0);
				2042	ASSERT(next_agino != agino);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2043	agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2044	offset = offsetof(xfs_agi_t, agi_unlinked) +
				2045	(sizeof(xfs_agino_t) * bucket_index);
				2046	xfs_trans_log_buf(tp, agibp, offset,
				2047	(offset + sizeof(xfs_agino_t) - 1));
				2048	} else {
				2049	/*
				2050	* We need to search the list for the inode being freed.
				2051	*/
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2052	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2053	last_ibp = NULL;
				2054	while (next_agino != agino) {
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2055	struct xfs_imap imap;
				2056
				2057	if (last_ibp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2058	xfs_trans_brelse(tp, last_ibp);
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2059
				2060	imap.im_blkno = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2061	next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2062
				2063	error = xfs_imap(mp, tp, next_ino, &imap, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2064	if (error) {
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2065	xfs_warn(mp,
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2066	"%s: xfs_imap returned error %d.",
				2067	__func__, error);
				2068	return error;
				2069	}
				2070
				2071	error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
				2072	&last_ibp, 0, 0);
				2073	if (error) {
				2074	xfs_warn(mp,
				2075	"%s: xfs_imap_to_bp returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2076	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2077	return error;
				2078	}
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2079
				2080	last_offset = imap.im_boffset;
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2081	next_agino = be32_to_cpu(last_dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2082	ASSERT(next_agino != NULLAGINO);
				2083	ASSERT(next_agino != 0);
				2084	}
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2085
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2086	/*
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2087	* Now last_ibp points to the buffer previous to us on the
				2088	* unlinked list. Pull us from the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2089	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2090	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				2091	0, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2092	if (error) {
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2093	xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2094	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2095	return error;
				2096	}
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2097	next_agino = be32_to_cpu(dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2098	ASSERT(next_agino != 0);
				2099	ASSERT(next_agino != agino);
				2100	if (next_agino != NULLAGINO) {
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2101	dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	2102	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2103	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2104
				2105	/* need to recalc the inode CRC if appropriate */
				2106	xfs_dinode_calc_crc(mp, dip);
				2107
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2108	xfs_trans_inode_buf(tp, ibp);
				2109	xfs_trans_log_buf(tp, ibp, offset,
				2110	(offset + sizeof(xfs_agino_t) - 1));
				2111	xfs_inobp_check(mp, ibp);
				2112	} else {
				2113	xfs_trans_brelse(tp, ibp);
				2114	}
				2115	/*
				2116	* Point the previous inode on the list to the next inode.
				2117	*/
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2118	last_dip->di_next_unlinked = cpu_to_be32(next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2119	ASSERT(next_agino != 0);
				2120	offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2121
				2122	/* need to recalc the inode CRC if appropriate */
				2123	xfs_dinode_calc_crc(mp, last_dip);
				2124
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2125	xfs_trans_inode_buf(tp, last_ibp);
				2126	xfs_trans_log_buf(tp, last_ibp, offset,
				2127	(offset + sizeof(xfs_agino_t) - 1));
				2128	xfs_inobp_check(mp, last_ibp);
				2129	}
				2130	return 0;
				2131	}
				2132
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2133	/*
Zhi Yong Wu	0b8182d	2013-08-12 03:14:59 +0000	[diff] [blame]	2134	* A big issue when freeing the inode cluster is that we _cannot_ skip any
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2135	* inodes that are in memory - they all must be marked stale and attached to
				2136	* the cluster buffer.
				2137	*/
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2138	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2139	xfs_ifree_cluster(
				2140	xfs_inode_t *free_ip,
				2141	xfs_trans_t *tp,
				2142	xfs_ino_t inum)
				2143	{
				2144	xfs_mount_t *mp = free_ip->i_mount;
				2145	int blks_per_cluster;
				2146	int nbufs;
				2147	int ninodes;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2148	int i, j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2149	xfs_daddr_t blkno;
				2150	xfs_buf_t *bp;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2151	xfs_inode_t *ip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2152	xfs_inode_log_item_t *iip;
				2153	xfs_log_item_t *lip;
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2154	struct xfs_perag *pag;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2155
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2156	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2157	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
				2158	blks_per_cluster = 1;
				2159	ninodes = mp->m_sb.sb_inopblock;
				2160	nbufs = XFS_IALLOC_BLOCKS(mp);
				2161	} else {
				2162	blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
				2163	mp->m_sb.sb_blocksize;
				2164	ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
				2165	nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
				2166	}
				2167
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2168	for (j = 0; j < nbufs; j++, inum += ninodes) {
				2169	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
				2170	XFS_INO_TO_AGBNO(mp, inum));
				2171
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2172	/*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2173	* We obtain and lock the backing buffer first in the process
				2174	* here, as we have to ensure that any dirty inode that we
				2175	* can't get the flush lock on is attached to the buffer.
				2176	* If we scan the in-memory inodes first, then buffer IO can
				2177	* complete before we get a lock on it, and hence we may fail
				2178	* to mark all the active inodes on the buffer stale.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2179	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2180	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
Dave Chinner	b6aff29	2012-11-02 11:38:42 +1100	[diff] [blame]	2181	mp->m_bsize * blks_per_cluster,
				2182	XBF_UNMAPPED);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2183
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2184	if (!bp)
				2185	return ENOMEM;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2186
				2187	/*
				2188	* This buffer may not have been correctly initialised as we
				2189	* didn't read it from disk. That's not important because we are
				2190	* only using to mark the buffer as stale in the log, and to
				2191	* attach stale cached inodes on it. That means it will never be
				2192	* dispatched for IO. If it is, we want to know about it, and we
				2193	* want it to fail. We can acheive this by adding a write
				2194	* verifier to the buffer.
				2195	*/
Dave Chinner	1813dd6	2012-11-14 17:54:40 +1100	[diff] [blame]	2196	bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2197
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2198	/*
				2199	* Walk the inodes already attached to the buffer and mark them
				2200	* stale. These will all have the flush locks held, so an
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2201	* in-memory inode walk can't lock them. By marking them all
				2202	* stale first, we will not attempt to lock them in the loop
				2203	* below as the XFS_ISTALE flag will be set.
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2204	*/
Christoph Hellwig	adadbee	2011-07-13 13:43:49 +0200	[diff] [blame]	2205	lip = bp->b_fspriv;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2206	while (lip) {
				2207	if (lip->li_type == XFS_LI_INODE) {
				2208	iip = (xfs_inode_log_item_t *)lip;
				2209	ASSERT(iip->ili_logged == 1);
Christoph Hellwig	ca30b2a	2010-06-23 18:11:15 +1000	[diff] [blame]	2210	lip->li_cb = xfs_istale_done;
David Chinner	7b2e2a3	2008-10-30 17:39:12 +1100	[diff] [blame]	2211	xfs_trans_ail_copy_lsn(mp->m_ail,
				2212	&iip->ili_flush_lsn,
				2213	&iip->ili_item.li_lsn);
David Chinner	e5ffd2b	2006-11-21 18:55:33 +1100	[diff] [blame]	2214	xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2215	}
				2216	lip = lip->li_bio_list;
				2217	}
				2218
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2219
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2220	/*
				2221	* For each inode in memory attempt to add it to the inode
				2222	* buffer and set it up for being staled on buffer IO
				2223	* completion. This is safe as we've locked out tail pushing
				2224	* and flushing by locking the buffer.
				2225	*
				2226	* We have already marked every inode that was part of a
				2227	* transaction stale above, which means there is no point in
				2228	* even trying to lock them.
				2229	*/
				2230	for (i = 0; i < ninodes; i++) {
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2231	retry:
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2232	rcu_read_lock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2233	ip = radix_tree_lookup(&pag->pag_ici_root,
				2234	XFS_INO_TO_AGINO(mp, (inum + i)));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2235
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2236	/* Inode not in memory, nothing to do */
				2237	if (!ip) {
				2238	rcu_read_unlock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2239	continue;
				2240	}
				2241
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2242	/*
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2243	* because this is an RCU protected lookup, we could
				2244	* find a recently freed or even reallocated inode
				2245	* during the lookup. We need to check under the
				2246	* i_flags_lock for a valid inode here. Skip it if it
				2247	* is not valid, the wrong inode or stale.
				2248	*/
				2249	spin_lock(&ip->i_flags_lock);
				2250	if (ip->i_ino != inum + i \|\|
				2251	__xfs_iflags_test(ip, XFS_ISTALE)) {
				2252	spin_unlock(&ip->i_flags_lock);
				2253	rcu_read_unlock();
				2254	continue;
				2255	}
				2256	spin_unlock(&ip->i_flags_lock);
				2257
				2258	/*
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2259	* Don't try to lock/unlock the current inode, but we
				2260	* _cannot_ skip the other inodes that we did not find
				2261	* in the list attached to the buffer and are not
				2262	* already marked stale. If we can't lock it, back off
				2263	* and retry.
				2264	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2265	if (ip != free_ip &&
				2266	!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2267	rcu_read_unlock();
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2268	delay(1);
				2269	goto retry;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2270	}
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2271	rcu_read_unlock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2272
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2273	xfs_iflock(ip);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2274	xfs_iflags_set(ip, XFS_ISTALE);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2275
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2276	/*
				2277	* we don't need to attach clean inodes or those only
				2278	* with unlogged changes (which we throw away, anyway).
				2279	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2280	iip = ip->i_itemp;
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2281	if (!iip \|\| xfs_inode_clean(ip)) {
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2282	ASSERT(ip != free_ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2283	xfs_ifunlock(ip);
				2284	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				2285	continue;
				2286	}
				2287
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	2288	iip->ili_last_fields = iip->ili_fields;
				2289	iip->ili_fields = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2290	iip->ili_logged = 1;
David Chinner	7b2e2a3	2008-10-30 17:39:12 +1100	[diff] [blame]	2291	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				2292	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2293
Christoph Hellwig	ca30b2a	2010-06-23 18:11:15 +1000	[diff] [blame]	2294	xfs_buf_attach_iodone(bp, xfs_istale_done,
				2295	&iip->ili_item);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2296
				2297	if (ip != free_ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2298	xfs_iunlock(ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2299	}
				2300
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2301	xfs_trans_stale_inode_buf(tp, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2302	xfs_trans_binval(tp, bp);
				2303	}
				2304
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2305	xfs_perag_put(pag);
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2306	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2307	}
				2308
				2309	/*
				2310	* This is called to return an inode to the inode free list.
				2311	* The inode should already be truncated to 0 length and have
				2312	* no pages associated with it. This routine also assumes that
				2313	* the inode is already a part of the transaction.
				2314	*
				2315	* The on-disk copy of the inode will have been added to the list
				2316	* of unlinked inodes in the AGI. We need to remove the inode from
				2317	* that list atomically with respect to freeing it here.
				2318	*/
				2319	int
				2320	xfs_ifree(
				2321	xfs_trans_t *tp,
				2322	xfs_inode_t *ip,
				2323	xfs_bmap_free_t *flist)
				2324	{
				2325	int error;
				2326	int delete;
				2327	xfs_ino_t first_ino;
				2328
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2329	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2330	ASSERT(ip->i_d.di_nlink == 0);
				2331	ASSERT(ip->i_d.di_nextents == 0);
				2332	ASSERT(ip->i_d.di_anextents == 0);
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	2333	ASSERT(ip->i_d.di_size == 0 \|\| !S_ISREG(ip->i_d.di_mode));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2334	ASSERT(ip->i_d.di_nblocks == 0);
				2335
				2336	/*
				2337	* Pull the on-disk inode from the AGI unlinked list.
				2338	*/
				2339	error = xfs_iunlink_remove(tp, ip);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2340	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2341	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2342
				2343	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2344	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2345	return error;
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2346
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2347	ip->i_d.di_mode = 0; /* mark incore inode as free */
				2348	ip->i_d.di_flags = 0;
				2349	ip->i_d.di_dmevmask = 0;
				2350	ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2351	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
				2352	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
				2353	/*
				2354	* Bump the generation count so no one will be confused
				2355	* by reincarnations of this inode.
				2356	*/
				2357	ip->i_d.di_gen++;
				2358	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				2359
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2360	if (delete)
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2361	error = xfs_ifree_cluster(ip, tp, first_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2362
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2363	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2364	}
				2365
				2366	/*
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2367	* This is called to unpin an inode. The caller must have the inode locked
				2368	* in at least shared mode so that the buffer cannot be subsequently pinned
				2369	* once someone is waiting for it to be unpinned.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2370	*/
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2371	static void
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2372	xfs_iunpin(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2373	struct xfs_inode *ip)
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2374	{
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2375	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2376
Dave Chinner	4aaf15d	2010-03-08 11:24:07 +1100	[diff] [blame]	2377	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
				2378
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2379	/* Give the log a push to start the unpinning I/O */
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2380	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	2381
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2382	}
				2383
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2384	static void
				2385	__xfs_iunpin_wait(
				2386	struct xfs_inode *ip)
				2387	{
				2388	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
				2389	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
				2390
				2391	xfs_iunpin(ip);
				2392
				2393	do {
				2394	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
				2395	if (xfs_ipincount(ip))
				2396	io_schedule();
				2397	} while (xfs_ipincount(ip));
				2398	finish_wait(wq, &wait.wait);
				2399	}
				2400
Dave Chinner	777df5a	2010-02-06 12:37:26 +1100	[diff] [blame]	2401	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2402	xfs_iunpin_wait(
Christoph Hellwig	60ec678	2010-02-17 19:43:56 +0000	[diff] [blame]	2403	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2404	{
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2405	if (xfs_ipincount(ip))
				2406	__xfs_iunpin_wait(ip);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2407	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2408
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2409	int
				2410	xfs_remove(
				2411	xfs_inode_t *dp,
				2412	struct xfs_name *name,
				2413	xfs_inode_t *ip)
				2414	{
				2415	xfs_mount_t *mp = dp->i_mount;
				2416	xfs_trans_t *tp = NULL;
				2417	int is_dir = S_ISDIR(ip->i_d.di_mode);
				2418	int error = 0;
				2419	xfs_bmap_free_t free_list;
				2420	xfs_fsblock_t first_block;
				2421	int cancel_flags;
				2422	int committed;
				2423	int link_zero;
				2424	uint resblks;
				2425	uint log_count;
				2426
				2427	trace_xfs_remove(dp, name);
				2428
				2429	if (XFS_FORCED_SHUTDOWN(mp))
				2430	return XFS_ERROR(EIO);
				2431
				2432	error = xfs_qm_dqattach(dp, 0);
				2433	if (error)
				2434	goto std_return;
				2435
				2436	error = xfs_qm_dqattach(ip, 0);
				2437	if (error)
				2438	goto std_return;
				2439
				2440	if (is_dir) {
				2441	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
				2442	log_count = XFS_DEFAULT_LOG_COUNT;
				2443	} else {
				2444	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
				2445	log_count = XFS_REMOVE_LOG_COUNT;
				2446	}
				2447	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2448
				2449	/*
				2450	* We try to get the real space reservation first,
				2451	* allowing for directory btree deletion(s) implying
				2452	* possible bmap insert(s). If we can't get the space
				2453	* reservation then we use 0 instead, and avoid the bmap
				2454	* btree insert(s) in the directory code by, if the bmap
				2455	* insert tries to happen, instead trimming the LAST
				2456	* block from the directory.
				2457	*/
				2458	resblks = XFS_REMOVE_SPACE_RES(mp);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2459	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2460	if (error == ENOSPC) {
				2461	resblks = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2462	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2463	}
				2464	if (error) {
				2465	ASSERT(error != ENOSPC);
				2466	cancel_flags = 0;
				2467	goto out_trans_cancel;
				2468	}
				2469
				2470	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
				2471
				2472	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				2473	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2474
				2475	/*
				2476	* If we're removing a directory perform some additional validation.
				2477	*/
				2478	if (is_dir) {
				2479	ASSERT(ip->i_d.di_nlink >= 2);
				2480	if (ip->i_d.di_nlink != 2) {
				2481	error = XFS_ERROR(ENOTEMPTY);
				2482	goto out_trans_cancel;
				2483	}
				2484	if (!xfs_dir_isempty(ip)) {
				2485	error = XFS_ERROR(ENOTEMPTY);
				2486	goto out_trans_cancel;
				2487	}
				2488	}
				2489
				2490	xfs_bmap_init(&free_list, &first_block);
				2491	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
				2492	&first_block, &free_list, resblks);
				2493	if (error) {
				2494	ASSERT(error != ENOENT);
				2495	goto out_bmap_cancel;
				2496	}
				2497	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2498
				2499	if (is_dir) {
				2500	/*
				2501	* Drop the link from ip's "..".
				2502	*/
				2503	error = xfs_droplink(tp, dp);
				2504	if (error)
				2505	goto out_bmap_cancel;
				2506
				2507	/*
				2508	* Drop the "." link from ip to self.
				2509	*/
				2510	error = xfs_droplink(tp, ip);
				2511	if (error)
				2512	goto out_bmap_cancel;
				2513	} else {
				2514	/*
				2515	* When removing a non-directory we need to log the parent
				2516	* inode here. For a directory this is done implicitly
				2517	* by the xfs_droplink call for the ".." entry.
				2518	*/
				2519	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2520	}
				2521
				2522	/*
				2523	* Drop the link from dp to ip.
				2524	*/
				2525	error = xfs_droplink(tp, ip);
				2526	if (error)
				2527	goto out_bmap_cancel;
				2528
				2529	/*
				2530	* Determine if this is the last link while
				2531	* we are in the transaction.
				2532	*/
				2533	link_zero = (ip->i_d.di_nlink == 0);
				2534
				2535	/*
				2536	* If this is a synchronous mount, make sure that the
				2537	* remove transaction goes to disk before returning to
				2538	* the user.
				2539	*/
				2540	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2541	xfs_trans_set_sync(tp);
				2542
				2543	error = xfs_bmap_finish(&tp, &free_list, &committed);
				2544	if (error)
				2545	goto out_bmap_cancel;
				2546
				2547	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				2548	if (error)
				2549	goto std_return;
				2550
				2551	/*
				2552	* If we are using filestreams, kill the stream association.
				2553	* If the file is still open it may get a new one but that
				2554	* will get killed on last close in xfs_close() so we don't
				2555	* have to worry about that.
				2556	*/
				2557	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
				2558	xfs_filestream_deassociate(ip);
				2559
				2560	return 0;
				2561
				2562	out_bmap_cancel:
				2563	xfs_bmap_cancel(&free_list);
				2564	cancel_flags \|= XFS_TRANS_ABORT;
				2565	out_trans_cancel:
				2566	xfs_trans_cancel(tp, cancel_flags);
				2567	std_return:
				2568	return error;
				2569	}
				2570
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2571	/*
				2572	* Enter all inodes for a rename transaction into a sorted array.
				2573	*/
				2574	STATIC void
				2575	xfs_sort_for_rename(
				2576	xfs_inode_t dp1, / in: old (source) directory inode */
				2577	xfs_inode_t dp2, / in: new (target) directory inode */
				2578	xfs_inode_t ip1, / in: inode of old entry */
				2579	xfs_inode_t ip2, / in: inode of new entry, if it
				2580	already exists, NULL otherwise. */
				2581	xfs_inode_t *i_tab,/ out: array of inode returned, sorted */
				2582	int num_inodes) / out: number of inodes in array */
				2583	{
				2584	xfs_inode_t *temp;
				2585	int i, j;
				2586
				2587	/*
				2588	* i_tab contains a list of pointers to inodes. We initialize
				2589	* the table here & we'll sort it. We will then use it to
				2590	* order the acquisition of the inode locks.
				2591	*
				2592	* Note that the table may contain duplicates. e.g., dp1 == dp2.
				2593	*/
				2594	i_tab[0] = dp1;
				2595	i_tab[1] = dp2;
				2596	i_tab[2] = ip1;
				2597	if (ip2) {
				2598	*num_inodes = 4;
				2599	i_tab[3] = ip2;
				2600	} else {
				2601	*num_inodes = 3;
				2602	i_tab[3] = NULL;
				2603	}
				2604
				2605	/*
				2606	* Sort the elements via bubble sort. (Remember, there are at
				2607	* most 4 elements to sort, so this is adequate.)
				2608	*/
				2609	for (i = 0; i < *num_inodes; i++) {
				2610	for (j = 1; j < *num_inodes; j++) {
				2611	if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
				2612	temp = i_tab[j];
				2613	i_tab[j] = i_tab[j-1];
				2614	i_tab[j-1] = temp;
				2615	}
				2616	}
				2617	}
				2618	}
				2619
				2620	/*
				2621	* xfs_rename
				2622	*/
				2623	int
				2624	xfs_rename(
				2625	xfs_inode_t *src_dp,
				2626	struct xfs_name *src_name,
				2627	xfs_inode_t *src_ip,
				2628	xfs_inode_t *target_dp,
				2629	struct xfs_name *target_name,
				2630	xfs_inode_t *target_ip)
				2631	{
				2632	xfs_trans_t *tp = NULL;
				2633	xfs_mount_t *mp = src_dp->i_mount;
				2634	int new_parent; /* moving to a new dir */
				2635	int src_is_directory; /* src_name is a directory */
				2636	int error;
				2637	xfs_bmap_free_t free_list;
				2638	xfs_fsblock_t first_block;
				2639	int cancel_flags;
				2640	int committed;
				2641	xfs_inode_t *inodes[4];
				2642	int spaceres;
				2643	int num_inodes;
				2644
				2645	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
				2646
				2647	new_parent = (src_dp != target_dp);
				2648	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
				2649
				2650	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
				2651	inodes, &num_inodes);
				2652
				2653	xfs_bmap_init(&free_list, &first_block);
				2654	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
				2655	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2656	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2657	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2658	if (error == ENOSPC) {
				2659	spaceres = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2660	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2661	}
				2662	if (error) {
				2663	xfs_trans_cancel(tp, 0);
				2664	goto std_return;
				2665	}
				2666
				2667	/*
				2668	* Attach the dquots to the inodes
				2669	*/
				2670	error = xfs_qm_vop_rename_dqattach(inodes);
				2671	if (error) {
				2672	xfs_trans_cancel(tp, cancel_flags);
				2673	goto std_return;
				2674	}
				2675
				2676	/*
				2677	* Lock all the participating inodes. Depending upon whether
				2678	* the target_name exists in the target directory, and
				2679	* whether the target directory is the same as the source
				2680	* directory, we can lock from 2 to 4 inodes.
				2681	*/
				2682	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
				2683
				2684	/*
				2685	* Join all the inodes to the transaction. From this point on,
				2686	* we can rely on either trans_commit or trans_cancel to unlock
				2687	* them.
				2688	*/
				2689	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
				2690	if (new_parent)
				2691	xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
				2692	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
				2693	if (target_ip)
				2694	xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
				2695
				2696	/*
				2697	* If we are using project inheritance, we only allow renames
				2698	* into our tree when the project IDs are the same; else the
				2699	* tree quota mechanism would be circumvented.
				2700	*/
				2701	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
				2702	(xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
				2703	error = XFS_ERROR(EXDEV);
				2704	goto error_return;
				2705	}
				2706
				2707	/*
				2708	* Set up the target.
				2709	*/
				2710	if (target_ip == NULL) {
				2711	/*
				2712	* If there's no space reservation, check the entry will
				2713	* fit before actually inserting it.
				2714	*/
				2715	error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
				2716	if (error)
				2717	goto error_return;
				2718	/*
				2719	* If target does not exist and the rename crosses
				2720	* directories, adjust the target directory link count
				2721	* to account for the ".." reference from the new entry.
				2722	*/
				2723	error = xfs_dir_createname(tp, target_dp, target_name,
				2724	src_ip->i_ino, &first_block,
				2725	&free_list, spaceres);
				2726	if (error == ENOSPC)
				2727	goto error_return;
				2728	if (error)
				2729	goto abort_return;
				2730
				2731	xfs_trans_ichgtime(tp, target_dp,
				2732	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2733
				2734	if (new_parent && src_is_directory) {
				2735	error = xfs_bumplink(tp, target_dp);
				2736	if (error)
				2737	goto abort_return;
				2738	}
				2739	} else { /* target_ip != NULL */
				2740	/*
				2741	* If target exists and it's a directory, check that both
				2742	* target and source are directories and that target can be
				2743	* destroyed, or that neither is a directory.
				2744	*/
				2745	if (S_ISDIR(target_ip->i_d.di_mode)) {
				2746	/*
				2747	* Make sure target dir is empty.
				2748	*/
				2749	if (!(xfs_dir_isempty(target_ip)) \|\|
				2750	(target_ip->i_d.di_nlink > 2)) {
				2751	error = XFS_ERROR(EEXIST);
				2752	goto error_return;
				2753	}
				2754	}
				2755
				2756	/*
				2757	* Link the source inode under the target name.
				2758	* If the source inode is a directory and we are moving
				2759	* it across directories, its ".." entry will be
				2760	* inconsistent until we replace that down below.
				2761	*
				2762	* In case there is already an entry with the same
				2763	* name at the destination directory, remove it first.
				2764	*/
				2765	error = xfs_dir_replace(tp, target_dp, target_name,
				2766	src_ip->i_ino,
				2767	&first_block, &free_list, spaceres);
				2768	if (error)
				2769	goto abort_return;
				2770
				2771	xfs_trans_ichgtime(tp, target_dp,
				2772	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2773
				2774	/*
				2775	* Decrement the link count on the target since the target
				2776	* dir no longer points to it.
				2777	*/
				2778	error = xfs_droplink(tp, target_ip);
				2779	if (error)
				2780	goto abort_return;
				2781
				2782	if (src_is_directory) {
				2783	/*
				2784	* Drop the link from the old "." entry.
				2785	*/
				2786	error = xfs_droplink(tp, target_ip);
				2787	if (error)
				2788	goto abort_return;
				2789	}
				2790	} /* target_ip != NULL */
				2791
				2792	/*
				2793	* Remove the source.
				2794	*/
				2795	if (new_parent && src_is_directory) {
				2796	/*
				2797	* Rewrite the ".." entry to point to the new
				2798	* directory.
				2799	*/
				2800	error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
				2801	target_dp->i_ino,
				2802	&first_block, &free_list, spaceres);
				2803	ASSERT(error != EEXIST);
				2804	if (error)
				2805	goto abort_return;
				2806	}
				2807
				2808	/*
				2809	* We always want to hit the ctime on the source inode.
				2810	*
				2811	* This isn't strictly required by the standards since the source
				2812	* inode isn't really being changed, but old unix file systems did
				2813	* it and some incremental backup programs won't work without it.
				2814	*/
				2815	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
				2816	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
				2817
				2818	/*
				2819	* Adjust the link count on src_dp. This is necessary when
				2820	* renaming a directory, either within one parent when
				2821	* the target existed, or across two parent directories.
				2822	*/
				2823	if (src_is_directory && (new_parent \|\| target_ip != NULL)) {
				2824
				2825	/*
				2826	* Decrement link count on src_directory since the
				2827	* entry that's moved no longer points to it.
				2828	*/
				2829	error = xfs_droplink(tp, src_dp);
				2830	if (error)
				2831	goto abort_return;
				2832	}
				2833
				2834	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
				2835	&first_block, &free_list, spaceres);
				2836	if (error)
				2837	goto abort_return;
				2838
				2839	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2840	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
				2841	if (new_parent)
				2842	xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
				2843
				2844	/*
				2845	* If this is a synchronous mount, make sure that the
				2846	* rename transaction goes to disk before returning to
				2847	* the user.
				2848	*/
				2849	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				2850	xfs_trans_set_sync(tp);
				2851	}
				2852
				2853	error = xfs_bmap_finish(&tp, &free_list, &committed);
				2854	if (error) {
				2855	xfs_bmap_cancel(&free_list);
				2856	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES \|
				2857	XFS_TRANS_ABORT));
				2858	goto std_return;
				2859	}
				2860
				2861	/*
				2862	* trans_commit will unlock src_ip, target_ip & decrement
				2863	* the vnode references.
				2864	*/
				2865	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				2866
				2867	abort_return:
				2868	cancel_flags \|= XFS_TRANS_ABORT;
				2869	error_return:
				2870	xfs_bmap_cancel(&free_list);
				2871	xfs_trans_cancel(tp, cancel_flags);
				2872	std_return:
				2873	return error;
				2874	}
				2875
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2876	STATIC int
				2877	xfs_iflush_cluster(
				2878	xfs_inode_t *ip,
				2879	xfs_buf_t *bp)
				2880	{
				2881	xfs_mount_t *mp = ip->i_mount;
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2882	struct xfs_perag *pag;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2883	unsigned long first_index, mask;
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	2884	unsigned long inodes_per_cluster;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2885	int ilist_size;
				2886	xfs_inode_t **ilist;
				2887	xfs_inode_t *iq;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2888	int nr_found;
				2889	int clcount = 0;
				2890	int bufwasdelwri;
				2891	int i;
				2892
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2893	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2894
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	2895	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
				2896	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
David Chinner	49383b0	2008-05-19 16:29:34 +1000	[diff] [blame]	2897	ilist = kmem_alloc(ilist_size, KM_MAYFAIL\|KM_NOFS);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2898	if (!ilist)
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	2899	goto out_put;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2900
				2901	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
				2902	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2903	rcu_read_lock();
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2904	/* really need a gang lookup range call here */
				2905	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	2906	first_index, inodes_per_cluster);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2907	if (nr_found == 0)
				2908	goto out_free;
				2909
				2910	for (i = 0; i < nr_found; i++) {
				2911	iq = ilist[i];
				2912	if (iq == ip)
				2913	continue;
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2914
				2915	/*
				2916	* because this is an RCU protected lookup, we could find a
				2917	* recently freed or even reallocated inode during the lookup.
				2918	* We need to check under the i_flags_lock for a valid inode
				2919	* here. Skip it if it is not valid or the wrong inode.
				2920	*/
				2921	spin_lock(&ip->i_flags_lock);
				2922	if (!ip->i_ino \|\|
				2923	(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
				2924	spin_unlock(&ip->i_flags_lock);
				2925	continue;
				2926	}
				2927	spin_unlock(&ip->i_flags_lock);
				2928
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2929	/*
				2930	* Do an un-protected check to see if the inode is dirty and
				2931	* is a candidate for flushing. These checks will be repeated
				2932	* later after the appropriate locks are acquired.
				2933	*/
David Chinner	3354040	2008-03-06 13:43:59 +1100	[diff] [blame]	2934	if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2935	continue;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2936
				2937	/*
				2938	* Try to get locks. If any are unavailable or it is pinned,
				2939	* then this inode cannot be flushed and is skipped.
				2940	*/
				2941
				2942	if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
				2943	continue;
				2944	if (!xfs_iflock_nowait(iq)) {
				2945	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				2946	continue;
				2947	}
				2948	if (xfs_ipincount(iq)) {
				2949	xfs_ifunlock(iq);
				2950	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				2951	continue;
				2952	}
				2953
				2954	/*
				2955	* arriving here means that this inode can be flushed. First
				2956	* re-check that it's dirty before flushing.
				2957	*/
David Chinner	3354040	2008-03-06 13:43:59 +1100	[diff] [blame]	2958	if (!xfs_inode_clean(iq)) {
				2959	int error;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2960	error = xfs_iflush_int(iq, bp);
				2961	if (error) {
				2962	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				2963	goto cluster_corrupt_out;
				2964	}
				2965	clcount++;
				2966	} else {
				2967	xfs_ifunlock(iq);
				2968	}
				2969	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				2970	}
				2971
				2972	if (clcount) {
				2973	XFS_STATS_INC(xs_icluster_flushcnt);
				2974	XFS_STATS_ADD(xs_icluster_flushinode, clcount);
				2975	}
				2976
				2977	out_free:
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2978	rcu_read_unlock();
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	2979	kmem_free(ilist);
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	2980	out_put:
				2981	xfs_perag_put(pag);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2982	return 0;
				2983
				2984
				2985	cluster_corrupt_out:
				2986	/*
				2987	* Corruption detected in the clustering loop. Invalidate the
				2988	* inode buffer and shut down the filesystem.
				2989	*/
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2990	rcu_read_unlock();
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2991	/*
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2992	* Clean up the buffer. If it was delwri, just release it --
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2993	* brelse can handle it with no problems. If not, shut down the
				2994	* filesystem before releasing the buffer.
				2995	*/
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	2996	bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	2997	if (bufwasdelwri)
				2998	xfs_buf_relse(bp);
				2999
				3000	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
				3001
				3002	if (!bufwasdelwri) {
				3003	/*
				3004	* Just like incore_relse: if we have b_iodone functions,
				3005	* mark the buffer as an error and call them. Otherwise
				3006	* mark it as stale and brelse.
				3007	*/
Christoph Hellwig	cb669ca	2011-07-13 13:43:49 +0200	[diff] [blame]	3008	if (bp->b_iodone) {
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3009	XFS_BUF_UNDONE(bp);
Christoph Hellwig	c867cb6	2011-10-10 16:52:46 +0000	[diff] [blame]	3010	xfs_buf_stale(bp);
Chandra Seetharaman	5a52c2a58	2011-07-22 23:39:51 +0000	[diff] [blame]	3011	xfs_buf_ioerror(bp, EIO);
Christoph Hellwig	1a1a3e9	2010-10-06 18:41:18 +0000	[diff] [blame]	3012	xfs_buf_ioend(bp, 0);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3013	} else {
Christoph Hellwig	c867cb6	2011-10-10 16:52:46 +0000	[diff] [blame]	3014	xfs_buf_stale(bp);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3015	xfs_buf_relse(bp);
				3016	}
				3017	}
				3018
				3019	/*
				3020	* Unlocks the flush lock
				3021	*/
Dave Chinner	04913fd	2012-04-23 15:58:41 +1000	[diff] [blame]	3022	xfs_iflush_abort(iq, false);
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	3023	kmem_free(ilist);
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	3024	xfs_perag_put(pag);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3025	return XFS_ERROR(EFSCORRUPTED);
				3026	}
				3027
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3028	/*
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3029	* Flush dirty inode metadata into the backing buffer.
				3030	*
				3031	* The caller must have the inode lock and the inode flush lock held. The
				3032	* inode lock will still be held upon return to the caller, and the inode
				3033	* flush lock will be released after the inode has reached the disk.
				3034	*
				3035	* The caller must write out the buffer returned in *bpp and release it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3036	*/
				3037	int
				3038	xfs_iflush(
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3039	struct xfs_inode *ip,
				3040	struct xfs_buf **bpp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3041	{
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3042	struct xfs_mount *mp = ip->i_mount;
				3043	struct xfs_buf *bp;
				3044	struct xfs_dinode *dip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3045	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3046
				3047	XFS_STATS_INC(xs_iflush_count);
				3048
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3049	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Christoph Hellwig	474fce0	2011-12-18 20:00:09 +0000	[diff] [blame]	3050	ASSERT(xfs_isiflocked(ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3051	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	8096b1e	2011-12-18 20:00:07 +0000	[diff] [blame]	3052	ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3053
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3054	*bpp = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3055
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3056	xfs_iunpin_wait(ip);
				3057
				3058	/*
Dave Chinner	4b6a468	2010-01-11 11:45:21 +0000	[diff] [blame]	3059	* For stale inodes we cannot rely on the backing buffer remaining
				3060	* stale in cache for the remaining life of the stale inode and so
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	3061	* xfs_imap_to_bp() below may give us a buffer that no longer contains
Dave Chinner	4b6a468	2010-01-11 11:45:21 +0000	[diff] [blame]	3062	* inodes below. We have to check this after ensuring the inode is
				3063	* unpinned so that it is safe to reclaim the stale inode after the
				3064	* flush call.
				3065	*/
				3066	if (xfs_iflags_test(ip, XFS_ISTALE)) {
				3067	xfs_ifunlock(ip);
				3068	return 0;
				3069	}
				3070
				3071	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3072	* This may have been unpinned because the filesystem is shutting
				3073	* down forcibly. If that's the case we must not write this inode
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3074	* to disk, because the log record didn't make it to disk.
				3075	*
				3076	* We also have to remove the log item from the AIL in this case,
				3077	* as we wait for an empty AIL as part of the unmount process.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3078	*/
				3079	if (XFS_FORCED_SHUTDOWN(mp)) {
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3080	error = XFS_ERROR(EIO);
				3081	goto abort_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3082	}
				3083
				3084	/*
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3085	* Get the buffer containing the on-disk inode.
				3086	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	3087	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
				3088	0);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3089	if (error \|\| !bp) {
				3090	xfs_ifunlock(ip);
				3091	return error;
				3092	}
				3093
				3094	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3095	* First flush out the inode that xfs_iflush was called with.
				3096	*/
				3097	error = xfs_iflush_int(ip, bp);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3098	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3099	goto corrupt_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3100
				3101	/*
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3102	* If the buffer is pinned then push on the log now so we won't
				3103	* get stuck waiting in the write for too long.
				3104	*/
Chandra Seetharaman	811e64c	2011-07-22 23:40:27 +0000	[diff] [blame]	3105	if (xfs_buf_ispinned(bp))
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	3106	xfs_log_force(mp, 0);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3107
				3108	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3109	* inode clustering:
				3110	* see if other inodes can be gathered into this write
				3111	*/
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3112	error = xfs_iflush_cluster(ip, bp);
				3113	if (error)
				3114	goto cluster_corrupt_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3115
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3116	*bpp = bp;
				3117	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3118
				3119	corrupt_out:
				3120	xfs_buf_relse(bp);
Nathan Scott	7d04a33	2006-06-09 14:58:38 +1000	[diff] [blame]	3121	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3122	cluster_corrupt_out:
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3123	error = XFS_ERROR(EFSCORRUPTED);
				3124	abort_out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3125	/*
				3126	* Unlocks the flush lock
				3127	*/
Dave Chinner	04913fd	2012-04-23 15:58:41 +1000	[diff] [blame]	3128	xfs_iflush_abort(ip, false);
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3129	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3130	}
				3131
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3132	STATIC int
				3133	xfs_iflush_int(
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3134	struct xfs_inode *ip,
				3135	struct xfs_buf *bp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3136	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3137	struct xfs_inode_log_item *iip = ip->i_itemp;
				3138	struct xfs_dinode *dip;
				3139	struct xfs_mount *mp = ip->i_mount;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3140
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3141	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Christoph Hellwig	474fce0	2011-12-18 20:00:09 +0000	[diff] [blame]	3142	ASSERT(xfs_isiflocked(ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3143	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	8096b1e	2011-12-18 20:00:07 +0000	[diff] [blame]	3144	ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3145	ASSERT(iip != NULL && iip->ili_fields != 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3146
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3147	/* set dip = inode's place in the buffer /
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	3148	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3149
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	3150	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3151	mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3152	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3153	"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
				3154	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3155	goto corrupt_out;
				3156	}
				3157	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
				3158	mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3159	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3160	"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
				3161	__func__, ip->i_ino, ip, ip->i_d.di_magic);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3162	goto corrupt_out;
				3163	}
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	3164	if (S_ISREG(ip->i_d.di_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3165	if (XFS_TEST_ERROR(
				3166	(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
				3167	(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
				3168	mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3169	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3170	"%s: Bad regular inode %Lu, ptr 0x%p",
				3171	__func__, ip->i_ino, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3172	goto corrupt_out;
				3173	}
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	3174	} else if (S_ISDIR(ip->i_d.di_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3175	if (XFS_TEST_ERROR(
				3176	(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
				3177	(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
				3178	(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
				3179	mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3180	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3181	"%s: Bad directory inode %Lu, ptr 0x%p",
				3182	__func__, ip->i_ino, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3183	goto corrupt_out;
				3184	}
				3185	}
				3186	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
				3187	ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
				3188	XFS_RANDOM_IFLUSH_5)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3189	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3190	"%s: detected corrupt incore inode %Lu, "
				3191	"total extents = %d, nblocks = %Ld, ptr 0x%p",
				3192	__func__, ip->i_ino,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3193	ip->i_d.di_nextents + ip->i_d.di_anextents,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3194	ip->i_d.di_nblocks, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3195	goto corrupt_out;
				3196	}
				3197	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
				3198	mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3199	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3200	"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
				3201	__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3202	goto corrupt_out;
				3203	}
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3204
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3205	/*
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3206	* Inode item log recovery for v1/v2 inodes are dependent on the
				3207	* di_flushiter count for correct sequencing. We bump the flush
				3208	* iteration count so we can detect flushes which postdate a log record
				3209	* during recovery. This is redundant as we now log every change and
				3210	* hence this can't happen but we need to still do it to ensure
				3211	* backwards compatibility with old kernels that predate logging all
				3212	* inode changes.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3213	*/
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3214	if (ip->i_d.di_version < 3)
				3215	ip->i_d.di_flushiter++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3216
				3217	/*
				3218	* Copy the dirty parts of the inode into the on-disk
				3219	* inode. We always copy out the core of the inode,
				3220	* because if the inode is dirty at all the core must
				3221	* be.
				3222	*/
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	3223	xfs_dinode_to_disk(dip, &ip->i_d);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3224
				3225	/* Wrap, we never let the log put out DI_MAX_FLUSH */
				3226	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
				3227	ip->i_d.di_flushiter = 0;
				3228
				3229	/*
				3230	* If this is really an old format inode and the superblock version
				3231	* has not been updated to support only new format inodes, then
				3232	* convert back to the old inode format. If the superblock version
				3233	* has been updated, then make the conversion permanent.
				3234	*/
Christoph Hellwig	51ce16d	2008-11-28 14:23:39 +1100	[diff] [blame]	3235	ASSERT(ip->i_d.di_version == 1 \|\| xfs_sb_version_hasnlink(&mp->m_sb));
				3236	if (ip->i_d.di_version == 1) {
Eric Sandeen	6211870	2008-03-06 13:44:28 +1100	[diff] [blame]	3237	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3238	/*
				3239	* Convert it back.
				3240	*/
				3241	ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	3242	dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3243	} else {
				3244	/*
				3245	* The superblock version has already been bumped,
				3246	* so just make the conversion to the new inode
				3247	* format permanent.
				3248	*/
Christoph Hellwig	51ce16d	2008-11-28 14:23:39 +1100	[diff] [blame]	3249	ip->i_d.di_version = 2;
				3250	dip->di_version = 2;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3251	ip->i_d.di_onlink = 0;
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	3252	dip->di_onlink = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3253	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	3254	memset(&(dip->di_pad[0]), 0,
				3255	sizeof(dip->di_pad));
Arkadiusz Mi?kiewicz	6743099	2010-09-26 06:10:18 +0000	[diff] [blame]	3256	ASSERT(xfs_get_projid(ip) == 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3257	}
				3258	}
				3259
David Chinner	e4ac967	2008-04-10 12:23:58 +1000	[diff] [blame]	3260	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
				3261	if (XFS_IFORK_Q(ip))
				3262	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3263	xfs_inobp_check(mp, bp);
				3264
				3265	/*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3266	* We've recorded everything logged in the inode, so we'd like to clear
				3267	* the ili_fields bits so we don't log and flush things unnecessarily.
				3268	* However, we can't stop logging all this information until the data
				3269	* we've copied into the disk buffer is written to disk. If we did we
				3270	* might overwrite the copy of the inode in the log with all the data
				3271	* after re-logging only part of it, and in the face of a crash we
				3272	* wouldn't have all the data we need to recover.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3273	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3274	* What we do is move the bits to the ili_last_fields field. When
				3275	* logging the inode, these bits are moved back to the ili_fields field.
				3276	* In the xfs_iflush_done() routine we clear ili_last_fields, since we
				3277	* know that the information those bits represent is permanently on
				3278	* disk. As long as the flush completes before the inode is logged
				3279	* again, then both ili_fields and ili_last_fields will be cleared.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3280	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3281	* We can play with the ili_fields bits here, because the inode lock
				3282	* must be held exclusively in order to set bits there and the flush
				3283	* lock protects the ili_last_fields bits. Set ili_logged so the flush
				3284	* done routine can tell whether or not to look in the AIL. Also, store
				3285	* the current LSN of the inode so that we can tell whether the item has
				3286	* moved in the AIL from xfs_iflush_done(). In order to read the lsn we
				3287	* need the AIL lock, because it is a 64 bit value that cannot be read
				3288	* atomically.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3289	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3290	iip->ili_last_fields = iip->ili_fields;
				3291	iip->ili_fields = 0;
				3292	iip->ili_logged = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3293
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3294	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				3295	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3296
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3297	/*
				3298	* Attach the function xfs_iflush_done to the inode's
				3299	* buffer. This will remove the inode from the AIL
				3300	* and unlock the inode's flush lock when the inode is
				3301	* completely written to disk.
				3302	*/
				3303	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3304
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3305	/* update the lsn in the on disk inode if required */
				3306	if (ip->i_d.di_version == 3)
				3307	dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3308
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3309	/* generate the checksum. */
				3310	xfs_dinode_calc_crc(mp, dip);
				3311
				3312	ASSERT(bp->b_fspriv != NULL);
				3313	ASSERT(bp->b_iodone != NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3314	return 0;
				3315
				3316	corrupt_out:
				3317	return XFS_ERROR(EFSCORRUPTED);
				3318	}