Blame - fs/xfs/xfs_iwalk.c - SHIFTPHONES/mainline/linux

blob: 304c41e6ed1dd79d08a39617fc1e756f00a2d6d7 [file] [log] [blame]

Darrick J. Wong	a211432	2019-07-02 09:39:38 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/*
				3	* Copyright (C) 2019 Oracle. All Rights Reserved.
				4	* Author: Darrick J. Wong <darrick.wong@oracle.com>
				5	*/
				6	#include "xfs.h"
				7	#include "xfs_fs.h"
				8	#include "xfs_shared.h"
				9	#include "xfs_format.h"
				10	#include "xfs_log_format.h"
				11	#include "xfs_trans_resv.h"
				12	#include "xfs_mount.h"
				13	#include "xfs_inode.h"
				14	#include "xfs_btree.h"
				15	#include "xfs_ialloc.h"
				16	#include "xfs_ialloc_btree.h"
				17	#include "xfs_iwalk.h"
				18	#include "xfs_itable.h"
				19	#include "xfs_error.h"
				20	#include "xfs_trace.h"
				21	#include "xfs_icache.h"
				22	#include "xfs_health.h"
				23	#include "xfs_trans.h"
				24
				25	/*
				26	* Walking Inodes in the Filesystem
				27	* ================================
				28	*
				29	* This iterator function walks a subset of filesystem inodes in increasing
				30	* order from @startino until there are no more inodes. For each allocated
				31	* inode it finds, it calls a walk function with the relevant inode number and
				32	* a pointer to caller-provided data. The walk function can return the usual
				33	* negative error code to stop the iteration; 0 to continue the iteration; or
				34	* XFS_IWALK_ABORT to stop the iteration. This return value is returned to the
				35	* caller.
				36	*
				37	* Internally, we allow the walk function to do anything, which means that we
				38	* cannot maintain the inobt cursor or our lock on the AGI buffer. We
				39	* therefore cache the inobt records in kernel memory and only call the walk
				40	* function when our memory buffer is full. @nr_recs is the number of records
				41	* that we've cached, and @sz_recs is the size of our cache.
				42	*
				43	* It is the responsibility of the walk function to ensure it accesses
				44	* allocated inodes, as the inobt records may be stale by the time they are
				45	* acted upon.
				46	*/
				47
				48	struct xfs_iwalk_ag {
				49	struct xfs_mount *mp;
				50	struct xfs_trans *tp;
				51
				52	/* Where do we start the traversal? */
				53	xfs_ino_t startino;
				54
				55	/* Array of inobt records we cache. */
				56	struct xfs_inobt_rec_incore *recs;
				57
				58	/* Number of entries allocated for the @recs array. */
				59	unsigned int sz_recs;
				60
				61	/* Number of entries in the @recs array that are in use. */
				62	unsigned int nr_recs;
				63
				64	/* Inode walk function and data pointer. */
				65	xfs_iwalk_fn iwalk_fn;
				66	void *data;
				67	};
				68
				69	/* Allocate memory for a walk. */
				70	STATIC int
				71	xfs_iwalk_alloc(
				72	struct xfs_iwalk_ag *iwag)
				73	{
				74	size_t size;
				75
				76	ASSERT(iwag->recs == NULL);
				77	iwag->nr_recs = 0;
				78
				79	/* Allocate a prefetch buffer for inobt records. */
				80	size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
				81	iwag->recs = kmem_alloc(size, KM_MAYFAIL);
				82	if (iwag->recs == NULL)
				83	return -ENOMEM;
				84
				85	return 0;
				86	}
				87
				88	/* Free memory we allocated for a walk. */
				89	STATIC void
				90	xfs_iwalk_free(
				91	struct xfs_iwalk_ag *iwag)
				92	{
				93	kmem_free(iwag->recs);
				94	iwag->recs = NULL;
				95	}
				96
				97	/* For each inuse inode in each cached inobt record, call our function. */
				98	STATIC int
				99	xfs_iwalk_ag_recs(
				100	struct xfs_iwalk_ag *iwag)
				101	{
				102	struct xfs_mount *mp = iwag->mp;
				103	struct xfs_trans *tp = iwag->tp;
				104	xfs_ino_t ino;
				105	unsigned int i, j;
				106	xfs_agnumber_t agno;
				107	int error;
				108
				109	agno = XFS_INO_TO_AGNO(mp, iwag->startino);
				110	for (i = 0; i < iwag->nr_recs; i++) {
				111	struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
				112
				113	trace_xfs_iwalk_ag_rec(mp, agno, irec);
				114
				115	for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
				116	/* Skip if this inode is free */
				117	if (XFS_INOBT_MASK(j) & irec->ir_free)
				118	continue;
				119
				120	/* Otherwise call our function. */
				121	ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
				122	error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
				123	if (error)
				124	return error;
				125	}
				126	}
				127
				128	return 0;
				129	}
				130
				131	/* Delete cursor and let go of AGI. */
				132	static inline void
				133	xfs_iwalk_del_inobt(
				134	struct xfs_trans *tp,
				135	struct xfs_btree_cur **curpp,
				136	struct xfs_buf **agi_bpp,
				137	int error)
				138	{
				139	if (*curpp) {
				140	xfs_btree_del_cursor(*curpp, error);
				141	*curpp = NULL;
				142	}
				143	if (*agi_bpp) {
				144	xfs_trans_brelse(tp, *agi_bpp);
				145	*agi_bpp = NULL;
				146	}
				147	}
				148
				149	/*
				150	* Set ourselves up for walking inobt records starting from a given point in
				151	* the filesystem.
				152	*
				153	* If caller passed in a nonzero start inode number, load the record from the
				154	* inobt and make the record look like all the inodes before agino are free so
				155	* that we skip them, and then move the cursor to the next inobt record. This
				156	* is how we support starting an iwalk in the middle of an inode chunk.
				157	*
				158	* If the caller passed in a start number of zero, move the cursor to the first
				159	* inobt record.
				160	*
				161	* The caller is responsible for cleaning up the cursor and buffer pointer
				162	* regardless of the error status.
				163	*/
				164	STATIC int
				165	xfs_iwalk_ag_start(
				166	struct xfs_iwalk_ag *iwag,
				167	xfs_agnumber_t agno,
				168	xfs_agino_t agino,
				169	struct xfs_btree_cur **curpp,
				170	struct xfs_buf **agi_bpp,
				171	int *has_more)
				172	{
				173	struct xfs_mount *mp = iwag->mp;
				174	struct xfs_trans *tp = iwag->tp;
				175	int icount;
				176	int error;
				177
				178	/* Set up a fresh cursor and empty the inobt cache. */
				179	iwag->nr_recs = 0;
				180	error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
				181	if (error)
				182	return error;
				183
				184	/* Starting at the beginning of the AG? That's easy! */
				185	if (agino == 0)
				186	return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
				187
				188	/*
				189	* Otherwise, we have to grab the inobt record where we left off, stuff
				190	* the record into our cache, and then see if there are more records.
				191	* We require a lookup cache of at least two elements so that we don't
				192	* have to deal with tearing down the cursor to walk the records.
				193	*/
				194	error = xfs_bulkstat_grab_ichunk(*curpp, agino - 1, &icount,
				195	&iwag->recs[iwag->nr_recs]);
				196	if (error)
				197	return error;
				198	if (icount)
				199	iwag->nr_recs++;
				200
				201	/*
				202	* The prefetch calculation is supposed to give us a large enough inobt
				203	* record cache that grab_ichunk can stage a partial first record and
				204	* the loop body can cache a record without having to check for cache
				205	* space until after it reads an inobt record.
				206	*/
				207	ASSERT(iwag->nr_recs < iwag->sz_recs);
				208
				209	return xfs_btree_increment(*curpp, 0, has_more);
				210	}
				211
				212	/*
				213	* The inobt record cache is full, so preserve the inobt cursor state and
				214	* run callbacks on the cached inobt records. When we're done, restore the
				215	* cursor state to wherever the cursor would have been had the cache not been
				216	* full (and therefore we could've just incremented the cursor) if *@has_more
				217	* is true. On exit, *@has_more will indicate whether or not the caller should
				218	* try for more inode records.
				219	*/
				220	STATIC int
				221	xfs_iwalk_run_callbacks(
				222	struct xfs_iwalk_ag *iwag,
				223	xfs_agnumber_t agno,
				224	struct xfs_btree_cur **curpp,
				225	struct xfs_buf **agi_bpp,
				226	int *has_more)
				227	{
				228	struct xfs_mount *mp = iwag->mp;
				229	struct xfs_trans *tp = iwag->tp;
				230	struct xfs_inobt_rec_incore *irec;
				231	xfs_agino_t restart;
				232	int error;
				233
				234	ASSERT(iwag->nr_recs > 0);
				235
				236	/* Delete cursor but remember the last record we cached... */
				237	xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
				238	irec = &iwag->recs[iwag->nr_recs - 1];
				239	restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
				240
				241	error = xfs_iwalk_ag_recs(iwag);
				242	if (error)
				243	return error;
				244
				245	/* ...empty the cache... */
				246	iwag->nr_recs = 0;
				247
				248	if (!has_more)
				249	return 0;
				250
				251	/* ...and recreate the cursor just past where we left off. */
				252	error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
				253	if (error)
				254	return error;
				255
				256	return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
				257	}
				258
				259	/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
				260	STATIC int
				261	xfs_iwalk_ag(
				262	struct xfs_iwalk_ag *iwag)
				263	{
				264	struct xfs_mount *mp = iwag->mp;
				265	struct xfs_trans *tp = iwag->tp;
				266	struct xfs_buf *agi_bp = NULL;
				267	struct xfs_btree_cur *cur = NULL;
				268	xfs_agnumber_t agno;
				269	xfs_agino_t agino;
				270	int has_more;
				271	int error = 0;
				272
				273	/* Set up our cursor at the right place in the inode btree. */
				274	agno = XFS_INO_TO_AGNO(mp, iwag->startino);
				275	agino = XFS_INO_TO_AGINO(mp, iwag->startino);
				276	error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
				277
				278	while (!error && has_more) {
				279	struct xfs_inobt_rec_incore *irec;
				280
				281	cond_resched();
				282
				283	/* Fetch the inobt record. */
				284	irec = &iwag->recs[iwag->nr_recs];
				285	error = xfs_inobt_get_rec(cur, irec, &has_more);
				286	if (error \|\| !has_more)
				287	break;
				288
				289	/* No allocated inodes in this chunk; skip it. */
				290	if (irec->ir_freecount == irec->ir_count) {
				291	error = xfs_btree_increment(cur, 0, &has_more);
				292	if (error)
				293	break;
				294	continue;
				295	}
				296
				297	/*
				298	* Start readahead for this inode chunk in anticipation of
				299	* walking the inodes.
				300	*/
				301	xfs_bulkstat_ichunk_ra(mp, agno, irec);
				302
				303	/*
				304	* If there's space in the buffer for more records, increment
				305	* the btree cursor and grab more.
				306	*/
				307	if (++iwag->nr_recs < iwag->sz_recs) {
				308	error = xfs_btree_increment(cur, 0, &has_more);
				309	if (error \|\| !has_more)
				310	break;
				311	continue;
				312	}
				313
				314	/*
				315	* Otherwise, we need to save cursor state and run the callback
				316	* function on the cached records. The run_callbacks function
				317	* is supposed to return a cursor pointing to the record where
				318	* we would be if we had been able to increment like above.
				319	*/
				320	ASSERT(has_more);
				321	error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
				322	&has_more);
				323	}
				324
				325	if (iwag->nr_recs == 0 \|\| error)
				326	goto out;
				327
				328	/* Walk the unprocessed records in the cache. */
				329	error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
				330
				331	out:
				332	xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
				333	return error;
				334	}
				335
				336	/*
				337	* Given the number of inodes to prefetch, set the number of inobt records that
				338	* we cache in memory, which controls the number of inodes we try to read
				339	* ahead.
				340	*/
				341	static inline unsigned int
				342	xfs_iwalk_prefetch(
				343	unsigned int inode_records)
				344	{
				345	return PAGE_SIZE * 4 / sizeof(struct xfs_inobt_rec_incore);
				346	}
				347
				348	/*
				349	* Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
				350	* will be called for each allocated inode, being passed the inode's number and
				351	* @data. @max_prefetch controls how many inobt records' worth of inodes we
				352	* try to readahead.
				353	*/
				354	int
				355	xfs_iwalk(
				356	struct xfs_mount *mp,
				357	struct xfs_trans *tp,
				358	xfs_ino_t startino,
				359	xfs_iwalk_fn iwalk_fn,
				360	unsigned int inode_records,
				361	void *data)
				362	{
				363	struct xfs_iwalk_ag iwag = {
				364	.mp = mp,
				365	.tp = tp,
				366	.iwalk_fn = iwalk_fn,
				367	.data = data,
				368	.startino = startino,
				369	.sz_recs = xfs_iwalk_prefetch(inode_records),
				370	};
				371	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
				372	int error;
				373
				374	ASSERT(agno < mp->m_sb.sb_agcount);
				375
				376	error = xfs_iwalk_alloc(&iwag);
				377	if (error)
				378	return error;
				379
				380	for (; agno < mp->m_sb.sb_agcount; agno++) {
				381	error = xfs_iwalk_ag(&iwag);
				382	if (error)
				383	break;
				384	iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
				385	}
				386
				387	xfs_iwalk_free(&iwag);
				388	return error;
				389	}