Blame - fs/xfs/xfs_filestream.c - SHIFTPHONES/kernel/common

blob: d0e8890dac8674ad59f542ae0ca26b5315252492 [file] [log] [blame]

David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	1	/*
				2	* Copyright (c) 2006-2007 Silicon Graphics, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
Dave Chinner	6943283	2013-08-12 20:49:23 +1000	[diff] [blame]	19	#include "xfs_log.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	20	#include "xfs_bmap_btree.h"
				21	#include "xfs_inum.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	22	#include "xfs_dinode.h"
				23	#include "xfs_inode.h"
				24	#include "xfs_ag.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	25	#include "xfs_trans.h"
				26	#include "xfs_sb.h"
				27	#include "xfs_mount.h"
				28	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame^]	29	#include "xfs_bmap_util.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	30	#include "xfs_alloc.h"
				31	#include "xfs_utils.h"
				32	#include "xfs_mru_cache.h"
				33	#include "xfs_filestream.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	34	#include "xfs_trace.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	35
				36	#ifdef XFS_FILESTREAMS_TRACE
				37
				38	ktrace_t *xfs_filestreams_trace_buf;
				39
				40	STATIC void
				41	xfs_filestreams_trace(
				42	xfs_mount_t mp, / mount point */
				43	int type, /* type of trace */
				44	const char func, / source function */
				45	int line, /* source line number */
				46	__psunsigned_t arg0,
				47	__psunsigned_t arg1,
				48	__psunsigned_t arg2,
				49	__psunsigned_t arg3,
				50	__psunsigned_t arg4,
				51	__psunsigned_t arg5)
				52	{
				53	ktrace_enter(xfs_filestreams_trace_buf,
				54	(void *)(__psint_t)(type \| (line << 16)),
				55	(void *)func,
				56	(void *)(__psunsigned_t)current_pid(),
				57	(void *)mp,
				58	(void *)(__psunsigned_t)arg0,
				59	(void *)(__psunsigned_t)arg1,
				60	(void *)(__psunsigned_t)arg2,
				61	(void *)(__psunsigned_t)arg3,
				62	(void *)(__psunsigned_t)arg4,
				63	(void *)(__psunsigned_t)arg5,
				64	NULL, NULL, NULL, NULL, NULL, NULL);
				65	}
				66
				67	#define TRACE0(mp,t) TRACE6(mp,t,0,0,0,0,0,0)
				68	#define TRACE1(mp,t,a0) TRACE6(mp,t,a0,0,0,0,0,0)
				69	#define TRACE2(mp,t,a0,a1) TRACE6(mp,t,a0,a1,0,0,0,0)
				70	#define TRACE3(mp,t,a0,a1,a2) TRACE6(mp,t,a0,a1,a2,0,0,0)
				71	#define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0)
				72	#define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0)
				73	#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
Harvey Harrison	34a622b	2008-04-10 12:19:21 +1000	[diff] [blame]	74	xfs_filestreams_trace(mp, t, __func__, __LINE__, \
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	75	(__psunsigned_t)a0, (__psunsigned_t)a1, \
				76	(__psunsigned_t)a2, (__psunsigned_t)a3, \
				77	(__psunsigned_t)a4, (__psunsigned_t)a5)
				78
				79	#define TRACE_AG_SCAN(mp, ag, ag2) \
				80	TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
				81	#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
				82	TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
				83	#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
				84	TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
				85	cnt, free, scan, flag)
				86	#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
				87	TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
				88	#define TRACE_FREE(mp, ip, pip, ag, cnt) \
				89	TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
				90	#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
				91	TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
				92	#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
				93	TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
				94	#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
				95	TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
				96	#define TRACE_ORPHAN(mp, ip, ag) \
				97	TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
				98
				99
				100	#else
				101	#define TRACE_AG_SCAN(mp, ag, ag2)
				102	#define TRACE_AG_PICK1(mp, max_ag, maxfree)
				103	#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
				104	#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
				105	#define TRACE_FREE(mp, ip, pip, ag, cnt)
				106	#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
				107	#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
				108	#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
				109	#define TRACE_ORPHAN(mp, ip, ag)
				110	#endif
				111
				112	static kmem_zone_t *item_zone;
				113
				114	/*
				115	* Structure for associating a file or a directory with an allocation group.
				116	* The parent directory pointer is only needed for files, but since there will
				117	* generally be vastly more files than directories in the cache, using the same
				118	* data structure simplifies the code with very little memory overhead.
				119	*/
				120	typedef struct fstrm_item
				121	{
				122	xfs_agnumber_t ag; /* AG currently in use for the file/directory. */
				123	xfs_inode_t ip; / inode self-pointer. */
				124	xfs_inode_t pip; / Parent directory inode pointer. */
				125	} fstrm_item_t;
				126
Christoph Hellwig	0664ce8	2010-07-20 17:31:01 +1000	[diff] [blame]	127	/*
				128	* Allocation group filestream associations are tracked with per-ag atomic
				129	* counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
				130	* particular AG already has active filestreams associated with it. The mount
				131	* point's m_peraglock is used to protect these counters from per-ag array
				132	* re-allocation during a growfs operation. When xfs_growfs_data_private() is
				133	* about to reallocate the array, it calls xfs_filestream_flush() with the
				134	* m_peraglock held in write mode.
				135	*
				136	* Since xfs_mru_cache_flush() guarantees that all the free functions for all
				137	* the cache elements have finished executing before it returns, it's safe for
				138	* the free functions to use the atomic counters without m_peraglock protection.
				139	* This allows the implementation of xfs_fstrm_free_func() to be agnostic about
				140	* whether it was called with the m_peraglock held in read mode, write mode or
				141	* not held at all. The race condition this addresses is the following:
				142	*
				143	* - The work queue scheduler fires and pulls a filestream directory cache
				144	* element off the LRU end of the cache for deletion, then gets pre-empted.
				145	* - A growfs operation grabs the m_peraglock in write mode, flushes all the
				146	* remaining items from the cache and reallocates the mount point's per-ag
				147	* array, resetting all the counters to zero.
				148	* - The work queue thread resumes and calls the free function for the element
				149	* it started cleaning up earlier. In the process it decrements the
				150	* filestreams counter for an AG that now has no references.
				151	*
				152	* With a shrinkfs feature, the above scenario could panic the system.
				153	*
				154	* All other uses of the following macros should be protected by either the
				155	* m_peraglock held in read mode, or the cache's internal locking exposed by the
				156	* interval between a call to xfs_mru_cache_lookup() and a call to
				157	* xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
				158	* when new elements are added to the cache.
				159	*
				160	* Combined, these locking rules ensure that no associations will ever exist in
				161	* the cache that reference per-ag array elements that have since been
				162	* reallocated.
				163	*/
				164	static int
				165	xfs_filestream_peek_ag(
				166	xfs_mount_t *mp,
				167	xfs_agnumber_t agno)
				168	{
				169	struct xfs_perag *pag;
				170	int ret;
				171
				172	pag = xfs_perag_get(mp, agno);
				173	ret = atomic_read(&pag->pagf_fstrms);
				174	xfs_perag_put(pag);
				175	return ret;
				176	}
				177
				178	static int
				179	xfs_filestream_get_ag(
				180	xfs_mount_t *mp,
				181	xfs_agnumber_t agno)
				182	{
				183	struct xfs_perag *pag;
				184	int ret;
				185
				186	pag = xfs_perag_get(mp, agno);
				187	ret = atomic_inc_return(&pag->pagf_fstrms);
				188	xfs_perag_put(pag);
				189	return ret;
				190	}
				191
				192	static void
				193	xfs_filestream_put_ag(
				194	xfs_mount_t *mp,
				195	xfs_agnumber_t agno)
				196	{
				197	struct xfs_perag *pag;
				198
				199	pag = xfs_perag_get(mp, agno);
				200	atomic_dec(&pag->pagf_fstrms);
				201	xfs_perag_put(pag);
				202	}
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	203
				204	/*
				205	* Scan the AGs starting at startag looking for an AG that isn't in use and has
				206	* at least minlen blocks free.
				207	*/
				208	static int
				209	_xfs_filestream_pick_ag(
				210	xfs_mount_t *mp,
				211	xfs_agnumber_t startag,
				212	xfs_agnumber_t *agp,
				213	int flags,
				214	xfs_extlen_t minlen)
				215	{
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	216	int streams, max_streams;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	217	int err, trylock, nscan;
Dave Chinner	6cc8764	2009-03-16 08:29:46 +0100	[diff] [blame]	218	xfs_extlen_t longest, free, minfree, maxfree = 0;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	219	xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
				220	struct xfs_perag *pag;
				221
				222	/* 2% of an AG's blocks must be free for it to be chosen. */
				223	minfree = mp->m_sb.sb_agblocks / 50;
				224
				225	ag = startag;
				226	*agp = NULLAGNUMBER;
				227
				228	/* For the first pass, don't sleep trying to init the per-AG. */
				229	trylock = XFS_ALLOC_FLAG_TRYLOCK;
				230
				231	for (nscan = 0; 1; nscan++) {
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	232	pag = xfs_perag_get(mp, ag);
				233	TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	234
				235	if (!pag->pagf_init) {
				236	err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	237	if (err && !trylock) {
				238	xfs_perag_put(pag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	239	return err;
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	240	}
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	241	}
				242
				243	/* Might fail sometimes during the 1st pass with trylock set. */
				244	if (!pag->pagf_init)
				245	goto next_ag;
				246
				247	/* Keep track of the AG with the most free blocks. */
				248	if (pag->pagf_freeblks > maxfree) {
				249	maxfree = pag->pagf_freeblks;
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	250	max_streams = atomic_read(&pag->pagf_fstrms);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	251	max_ag = ag;
				252	}
				253
				254	/*
				255	* The AG reference count does two things: it enforces mutual
				256	* exclusion when examining the suitability of an AG in this
				257	* loop, and it guards against two filestreams being established
				258	* in the same AG as each other.
				259	*/
				260	if (xfs_filestream_get_ag(mp, ag) > 1) {
				261	xfs_filestream_put_ag(mp, ag);
				262	goto next_ag;
				263	}
				264
Dave Chinner	6cc8764	2009-03-16 08:29:46 +0100	[diff] [blame]	265	longest = xfs_alloc_longest_free_extent(mp, pag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	266	if (((minlen && longest >= minlen) \|\|
				267	(!minlen && pag->pagf_freeblks >= minfree)) &&
				268	(!pag->pagf_metadata \|\| !(flags & XFS_PICK_USERDATA) \|\|
				269	(flags & XFS_PICK_LOWSPACE))) {
				270
				271	/* Break out, retaining the reference on the AG. */
				272	free = pag->pagf_freeblks;
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	273	streams = atomic_read(&pag->pagf_fstrms);
				274	xfs_perag_put(pag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	275	*agp = ag;
				276	break;
				277	}
				278
				279	/* Drop the reference on this AG, it's not usable. */
				280	xfs_filestream_put_ag(mp, ag);
				281	next_ag:
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	282	xfs_perag_put(pag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	283	/* Move to the next AG, wrapping to AG 0 if necessary. */
				284	if (++ag >= mp->m_sb.sb_agcount)
				285	ag = 0;
				286
				287	/* If a full pass of the AGs hasn't been done yet, continue. */
				288	if (ag != startag)
				289	continue;
				290
				291	/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
				292	if (trylock != 0) {
				293	trylock = 0;
				294	continue;
				295	}
				296
				297	/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
				298	if (!(flags & XFS_PICK_LOWSPACE)) {
				299	flags \|= XFS_PICK_LOWSPACE;
				300	continue;
				301	}
				302
				303	/*
				304	* Take the AG with the most free space, regardless of whether
				305	* it's already in use by another filestream.
				306	*/
				307	if (max_ag != NULLAGNUMBER) {
				308	xfs_filestream_get_ag(mp, max_ag);
				309	TRACE_AG_PICK1(mp, max_ag, maxfree);
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	310	streams = max_streams;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	311	free = maxfree;
				312	*agp = max_ag;
				313	break;
				314	}
				315
				316	/* take AG 0 if none matched */
				317	TRACE_AG_PICK1(mp, max_ag, maxfree);
				318	*agp = 0;
				319	return 0;
				320	}
				321
Dave Chinner	4196ac0	2010-01-11 11:47:42 +0000	[diff] [blame]	322	TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	323
				324	return 0;
				325	}
				326
				327	/*
				328	* Set the allocation group number for a file or a directory, updating inode
Dave Chinner	1c1c6eb	2010-01-11 11:47:44 +0000	[diff] [blame]	329	* references and per-AG references as appropriate.
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	330	*/
				331	static int
				332	_xfs_filestream_update_ag(
				333	xfs_inode_t *ip,
				334	xfs_inode_t *pip,
				335	xfs_agnumber_t ag)
				336	{
				337	int err = 0;
				338	xfs_mount_t *mp;
				339	xfs_mru_cache_t *cache;
				340	fstrm_item_t *item;
				341	xfs_agnumber_t old_ag;
				342	xfs_inode_t *old_pip;
				343
				344	/*
				345	* Either ip is a regular file and pip is a directory, or ip is a
				346	* directory and pip is NULL.
				347	*/
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	348	ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
Al Viro	0320937	2011-07-25 20:54:24 -0400	[diff] [blame]	349	S_ISDIR(pip->i_d.di_mode)) \|\|
				350	(S_ISDIR(ip->i_d.di_mode) && !pip)));
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	351
				352	mp = ip->i_mount;
				353	cache = mp->m_filestream;
				354
				355	item = xfs_mru_cache_lookup(cache, ip->i_ino);
				356	if (item) {
				357	ASSERT(item->ip == ip);
				358	old_ag = item->ag;
				359	item->ag = ag;
				360	old_pip = item->pip;
				361	item->pip = pip;
				362	xfs_mru_cache_done(cache);
				363
				364	/*
				365	* If the AG has changed, drop the old ref and take a new one,
				366	* effectively transferring the reference from old to new AG.
				367	*/
				368	if (ag != old_ag) {
				369	xfs_filestream_put_ag(mp, old_ag);
				370	xfs_filestream_get_ag(mp, ag);
				371	}
				372
				373	/*
				374	* If ip is a file and its pip has changed, drop the old ref and
				375	* take a new one.
				376	*/
				377	if (pip && pip != old_pip) {
				378	IRELE(old_pip);
				379	IHOLD(pip);
				380	}
				381
				382	TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
				383	ag, xfs_filestream_peek_ag(mp, ag));
				384	return 0;
				385	}
				386
				387	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
				388	if (!item)
				389	return ENOMEM;
				390
				391	item->ag = ag;
				392	item->ip = ip;
				393	item->pip = pip;
				394
				395	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
				396	if (err) {
				397	kmem_zone_free(item_zone, item);
				398	return err;
				399	}
				400
				401	/* Take a reference on the AG. */
				402	xfs_filestream_get_ag(mp, ag);
				403
				404	/*
				405	* Take a reference on the inode itself regardless of whether it's a
				406	* regular file or a directory.
				407	*/
				408	IHOLD(ip);
				409
				410	/*
				411	* In the case of a regular file, take a reference on the parent inode
				412	* as well to ensure it remains in-core.
				413	*/
				414	if (pip)
				415	IHOLD(pip);
				416
				417	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
				418	ag, xfs_filestream_peek_ag(mp, ag));
				419
				420	return 0;
				421	}
				422
				423	/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
David Chinner	a8272ce	2007-11-23 16:28:09 +1100	[diff] [blame]	424	STATIC void
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	425	xfs_fstrm_free_func(
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	426	unsigned long ino,
				427	void *data)
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	428	{
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	429	fstrm_item_t item = (fstrm_item_t )data;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	430	xfs_inode_t *ip = item->ip;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	431
				432	ASSERT(ip->i_ino == ino);
				433
				434	xfs_iflags_clear(ip, XFS_IFILESTREAM);
				435
				436	/* Drop the reference taken on the AG when the item was added. */
Christoph Hellwig	0664ce8	2010-07-20 17:31:01 +1000	[diff] [blame]	437	xfs_filestream_put_ag(ip->i_mount, item->ag);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	438
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	439	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
				440	xfs_filestream_peek_ag(ip->i_mount, item->ag));
				441
				442	/*
				443	* _xfs_filestream_update_ag() always takes a reference on the inode
				444	* itself, whether it's a file or a directory. Release it here.
				445	* This can result in the inode being freed and so we must
				446	* not hold any inode locks when freeing filesstreams objects
				447	* otherwise we can deadlock here.
				448	*/
				449	IRELE(ip);
				450
				451	/*
				452	* In the case of a regular file, _xfs_filestream_update_ag() also
				453	* takes a ref on the parent inode to keep it in-core. Release that
				454	* too.
				455	*/
				456	if (item->pip)
				457	IRELE(item->pip);
				458
				459	/* Finally, free the memory allocated for the item. */
				460	kmem_zone_free(item_zone, item);
				461	}
				462
				463	/*
				464	* xfs_filestream_init() is called at xfs initialisation time to set up the
				465	* memory zone that will be used for filestream data structure allocation.
				466	*/
				467	int
				468	xfs_filestream_init(void)
				469	{
				470	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
Christoph Hellwig	9f8868f	2008-07-18 17:11:46 +1000	[diff] [blame]	471	if (!item_zone)
				472	return -ENOMEM;
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	473
Christoph Hellwig	9f8868f	2008-07-18 17:11:46 +1000	[diff] [blame]	474	return 0;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	475	}
				476
				477	/*
				478	* xfs_filestream_uninit() is called at xfs termination time to destroy the
				479	* memory zone that was used for filestream data structure allocation.
				480	*/
				481	void
				482	xfs_filestream_uninit(void)
				483	{
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	484	kmem_zone_destroy(item_zone);
				485	}
				486
				487	/*
				488	* xfs_filestream_mount() is called when a file system is mounted with the
				489	* filestream option. It is responsible for allocating the data structures
				490	* needed to track the new file system's file streams.
				491	*/
				492	int
				493	xfs_filestream_mount(
				494	xfs_mount_t *mp)
				495	{
				496	int err;
				497	unsigned int lifetime, grp_count;
				498
				499	/*
				500	* The filestream timer tunable is currently fixed within the range of
				501	* one second to four minutes, with five seconds being the default. The
				502	* group count is somewhat arbitrary, but it'd be nice to adhere to the
				503	* timer tunable to within about 10 percent. This requires at least 10
				504	* groups.
				505	*/
				506	lifetime = xfs_fstrm_centisecs * 10;
				507	grp_count = 10;
				508
				509	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
Eric Sandeen	bcc7b44	2007-08-30 17:21:38 +1000	[diff] [blame]	510	xfs_fstrm_free_func);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	511
				512	return err;
				513	}
				514
				515	/*
				516	* xfs_filestream_unmount() is called when a file system that was mounted with
				517	* the filestream option is unmounted. It drains the data structures created
				518	* to track the file system's file streams and frees all the memory that was
				519	* allocated.
				520	*/
				521	void
				522	xfs_filestream_unmount(
				523	xfs_mount_t *mp)
				524	{
				525	xfs_mru_cache_destroy(mp->m_filestream);
				526	}
				527
				528	/*
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	529	* Return the AG of the filestream the file or directory belongs to, or
				530	* NULLAGNUMBER otherwise.
				531	*/
				532	xfs_agnumber_t
				533	xfs_filestream_lookup_ag(
				534	xfs_inode_t *ip)
				535	{
				536	xfs_mru_cache_t *cache;
				537	fstrm_item_t *item;
				538	xfs_agnumber_t ag;
				539	int ref;
				540
Al Viro	0320937	2011-07-25 20:54:24 -0400	[diff] [blame]	541	if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	542	ASSERT(0);
				543	return NULLAGNUMBER;
				544	}
				545
				546	cache = ip->i_mount->m_filestream;
				547	item = xfs_mru_cache_lookup(cache, ip->i_ino);
				548	if (!item) {
				549	TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
				550	return NULLAGNUMBER;
				551	}
				552
				553	ASSERT(ip == item->ip);
				554	ag = item->ag;
				555	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
				556	xfs_mru_cache_done(cache);
				557
				558	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
				559	return ag;
				560	}
				561
				562	/*
				563	* xfs_filestream_associate() should only be called to associate a regular file
				564	* with its parent directory. Calling it with a child directory isn't
				565	* appropriate because filestreams don't apply to entire directory hierarchies.
				566	* Creating a file in a child directory of an existing filestream directory
				567	* starts a new filestream with its own allocation group association.
				568	*
				569	* Returns < 0 on error, 0 if successful association occurred, > 0 if
				570	* we failed to get an association because of locking issues.
				571	*/
				572	int
				573	xfs_filestream_associate(
				574	xfs_inode_t *pip,
				575	xfs_inode_t *ip)
				576	{
				577	xfs_mount_t *mp;
				578	xfs_mru_cache_t *cache;
				579	fstrm_item_t *item;
				580	xfs_agnumber_t ag, rotorstep, startag;
				581	int err = 0;
				582
Al Viro	0320937	2011-07-25 20:54:24 -0400	[diff] [blame]	583	ASSERT(S_ISDIR(pip->i_d.di_mode));
				584	ASSERT(S_ISREG(ip->i_d.di_mode));
				585	if (!S_ISDIR(pip->i_d.di_mode) \|\| !S_ISREG(ip->i_d.di_mode))
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	586	return -EINVAL;
				587
				588	mp = pip->i_mount;
				589	cache = mp->m_filestream;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	590
				591	/*
				592	* We have a problem, Houston.
				593	*
				594	* Taking the iolock here violates inode locking order - we already
				595	* hold the ilock. Hence if we block getting this lock we may never
				596	* wake. Unfortunately, that means if we can't get the lock, we're
				597	* screwed in terms of getting a stream association - we can't spin
				598	* waiting for the lock because someone else is waiting on the lock we
				599	* hold and we cannot drop that as we are in a transaction here.
				600	*
Christoph Hellwig	075fe10	2009-06-08 15:35:48 +0200	[diff] [blame]	601	* Lucky for us, this inversion is not a problem because it's a
				602	* directory inode that we are trying to lock here.
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	603	*
				604	* So, if we can't get the iolock without sleeping then just give up
				605	*/
Dave Chinner	1c1c6eb	2010-01-11 11:47:44 +0000	[diff] [blame]	606	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	607	return 1;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	608
				609	/* If the parent directory is already in the cache, use its AG. */
				610	item = xfs_mru_cache_lookup(cache, pip->i_ino);
				611	if (item) {
				612	ASSERT(item->ip == pip);
				613	ag = item->ag;
				614	xfs_mru_cache_done(cache);
				615
				616	TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
				617	err = _xfs_filestream_update_ag(ip, pip, ag);
				618
				619	goto exit;
				620	}
				621
				622	/*
				623	* Set the starting AG using the rotor for inode32, otherwise
				624	* use the directory inode's AG.
				625	*/
				626	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
				627	rotorstep = xfs_rotorstep;
				628	startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
				629	mp->m_agfrotor = (mp->m_agfrotor + 1) %
				630	(mp->m_sb.sb_agcount * rotorstep);
				631	} else
				632	startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
				633
				634	/* Pick a new AG for the parent inode starting at startag. */
				635	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
				636	if (err \|\| ag == NULLAGNUMBER)
				637	goto exit_did_pick;
				638
				639	/* Associate the parent inode with the AG. */
				640	err = _xfs_filestream_update_ag(pip, NULL, ag);
				641	if (err)
				642	goto exit_did_pick;
				643
				644	/* Associate the file inode with the AG. */
				645	err = _xfs_filestream_update_ag(ip, pip, ag);
				646	if (err)
				647	goto exit_did_pick;
				648
				649	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
				650
				651	exit_did_pick:
				652	/*
				653	* If _xfs_filestream_pick_ag() returned a valid AG, remove the
				654	* reference it took on it, since the file and directory will have taken
				655	* their own now if they were successfully cached.
				656	*/
				657	if (ag != NULLAGNUMBER)
				658	xfs_filestream_put_ag(mp, ag);
				659
				660	exit:
				661	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	662	return -err;
				663	}
				664
				665	/*
				666	* Pick a new allocation group for the current file and its file stream. This
				667	* function is called by xfs_bmap_filestreams() with the mount point's per-ag
				668	* lock held.
				669	*/
				670	int
				671	xfs_filestream_new_ag(
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame^]	672	struct xfs_bmalloca *ap,
				673	xfs_agnumber_t *agp)
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	674	{
				675	int flags, err;
				676	xfs_inode_t ip, pip = NULL;
				677	xfs_mount_t *mp;
				678	xfs_mru_cache_t *cache;
				679	xfs_extlen_t minlen;
				680	fstrm_item_t dir, file;
				681	xfs_agnumber_t ag = NULLAGNUMBER;
				682
				683	ip = ap->ip;
				684	mp = ip->i_mount;
				685	cache = mp->m_filestream;
Dave Chinner	3a75667	2011-09-18 20:40:58 +0000	[diff] [blame]	686	minlen = ap->length;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	687	*agp = NULLAGNUMBER;
				688
				689	/*
				690	* Look for the file in the cache, removing it if it's found. Doing
				691	* this allows it to be held across the dir lookup that follows.
				692	*/
				693	file = xfs_mru_cache_remove(cache, ip->i_ino);
				694	if (file) {
				695	ASSERT(ip == file->ip);
				696
				697	/* Save the file's parent inode and old AG number for later. */
				698	pip = file->pip;
				699	ag = file->ag;
				700
				701	/* Look for the file's directory in the cache. */
				702	dir = xfs_mru_cache_lookup(cache, pip->i_ino);
				703	if (dir) {
				704	ASSERT(pip == dir->ip);
				705
				706	/*
				707	* If the directory has already moved on to a new AG,
				708	* use that AG as the new AG for the file. Don't
				709	* forget to twiddle the AG refcounts to match the
				710	* movement.
				711	*/
				712	if (dir->ag != file->ag) {
				713	xfs_filestream_put_ag(mp, file->ag);
				714	xfs_filestream_get_ag(mp, dir->ag);
				715	*agp = file->ag = dir->ag;
				716	}
				717
				718	xfs_mru_cache_done(cache);
				719	}
				720
				721	/*
				722	* Put the file back in the cache. If this fails, the free
				723	* function needs to be called to tidy up in the same way as if
				724	* the item had simply expired from the cache.
				725	*/
				726	err = xfs_mru_cache_insert(cache, ip->i_ino, file);
				727	if (err) {
				728	xfs_fstrm_free_func(ip->i_ino, file);
				729	return err;
				730	}
				731
				732	/*
				733	* If the file's AG was moved to the directory's new AG, there's
				734	* nothing more to be done.
				735	*/
				736	if (*agp != NULLAGNUMBER) {
				737	TRACE_MOVEAG(mp, ip, pip,
				738	ag, xfs_filestream_peek_ag(mp, ag),
				739	agp, xfs_filestream_peek_ag(mp, agp));
				740	return 0;
				741	}
				742	}
				743
				744	/*
				745	* If the file's parent directory is known, take its iolock in exclusive
				746	* mode to prevent two sibling files from racing each other to migrate
				747	* themselves and their parent to different AGs.
Christoph Hellwig	785ce41	2010-11-06 11:42:44 +0000	[diff] [blame]	748	*
				749	* Note that we lock the parent directory iolock inside the child
				750	* iolock here. That's fine as we never hold both parent and child
				751	* iolock in any other place. This is different from the ilock,
				752	* which requires locking of the child after the parent for namespace
				753	* operations.
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	754	*/
				755	if (pip)
Christoph Hellwig	785ce41	2010-11-06 11:42:44 +0000	[diff] [blame]	756	xfs_ilock(pip, XFS_IOLOCK_EXCL \| XFS_IOLOCK_PARENT);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	757
				758	/*
				759	* A new AG needs to be found for the file. If the file's parent
				760	* directory is also known, it will be moved to the new AG as well to
				761	* ensure that files created inside it in future use the new AG.
				762	*/
				763	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
				764	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) \|
Dave Chinner	0937e0f	2011-09-18 20:40:57 +0000	[diff] [blame]	765	(ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	766
				767	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
				768	if (err \|\| *agp == NULLAGNUMBER)
				769	goto exit;
				770
				771	/*
				772	* If the file wasn't found in the file cache, then its parent directory
				773	* inode isn't known. For this to have happened, the file must either
				774	* be pre-existing, or it was created long enough ago that its cache
				775	* entry has expired. This isn't the sort of usage that the filestreams
				776	* allocator is trying to optimise, so there's no point trying to track
				777	* its new AG somehow in the filestream data structures.
				778	*/
				779	if (!pip) {
				780	TRACE_ORPHAN(mp, ip, *agp);
				781	goto exit;
				782	}
				783
				784	/* Associate the parent inode with the AG. */
				785	err = _xfs_filestream_update_ag(pip, NULL, *agp);
				786	if (err)
				787	goto exit;
				788
				789	/* Associate the file inode with the AG. */
				790	err = _xfs_filestream_update_ag(ip, pip, *agp);
				791	if (err)
				792	goto exit;
				793
				794	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
				795	agp, xfs_filestream_peek_ag(mp, agp));
				796
				797	exit:
				798	/*
				799	* If _xfs_filestream_pick_ag() returned a valid AG, remove the
				800	* reference it took on it, since the file and directory will have taken
				801	* their own now if they were successfully cached.
				802	*/
				803	if (*agp != NULLAGNUMBER)
				804	xfs_filestream_put_ag(mp, *agp);
				805	else
				806	*agp = 0;
				807
				808	if (pip)
				809	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
				810
				811	return err;
				812	}
				813
				814	/*
				815	* Remove an association between an inode and a filestream object.
				816	* Typically this is done on last close of an unlinked file.
				817	*/
				818	void
				819	xfs_filestream_deassociate(
				820	xfs_inode_t *ip)
				821	{
				822	xfs_mru_cache_t *cache = ip->i_mount->m_filestream;
				823
				824	xfs_mru_cache_delete(cache, ip->i_ino);
				825	}