blob: 94dba5c1b98d68d1a1f434a8b832f39b6a19c9b8 [file] [log] [blame]
Dave Chinner0b61f8a2018-06-05 19:42:14 -07001// SPDX-License-Identifier: GPL-2.0
David Chinnerfe4fa4b2008-10-30 17:06:08 +11002/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
David Chinnerfe4fa4b2008-10-30 17:06:08 +11005 */
6#include "xfs.h"
7#include "xfs_fs.h"
Darrick J. Wong5467b342019-06-28 19:25:35 -07008#include "xfs_shared.h"
Dave Chinner6ca1c902013-08-12 20:49:26 +10009#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110010#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110012#include "xfs_sb.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110013#include "xfs_mount.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110014#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110015#include "xfs_trans.h"
16#include "xfs_trans_priv.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110017#include "xfs_inode_item.h"
Christoph Hellwig7d095252009-06-08 15:33:32 +020018#include "xfs_quota.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000019#include "xfs_trace.h"
Dave Chinner6d8b79c2012-10-08 21:56:09 +110020#include "xfs_icache.h"
Dave Chinnerc24b5df2013-08-12 20:49:45 +100021#include "xfs_bmap_util.h"
Brian Fosterdc06f3982014-07-24 19:49:28 +100022#include "xfs_dquot_item.h"
23#include "xfs_dquot.h"
Darrick J. Wong83104d42016-10-03 09:11:46 -070024#include "xfs_reflink.h"
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -070025#include "xfs_ialloc.h"
David Chinnerfe4fa4b2008-10-30 17:06:08 +110026
Jeff Laytonf0e28282017-12-11 06:35:19 -050027#include <linux/iversion.h>
David Chinnera167b172008-10-30 17:06:18 +110028
Darrick J. Wongc809d7e2021-06-01 13:49:52 -070029/* Radix tree tags for incore inode tree. */
30
31/* inode is to be reclaimed */
32#define XFS_ICI_RECLAIM_TAG 0
33/* Inode has speculative preallocations (posteof or cow) to clean. */
34#define XFS_ICI_BLOCKGC_TAG 1
35
36/*
37 * The goal for walking incore inodes. These can correspond with incore inode
38 * radix tree tags when convenient. Avoid existing XFS_IWALK namespace.
39 */
40enum xfs_icwalk_goal {
41 /* Goals that are not related to tags; these must be < 0. */
42 XFS_ICWALK_DQRELE = -1,
43
44 /* Goals directly associated with tagged inodes. */
45 XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
46};
47
48#define XFS_ICWALK_NULL_TAG (-1U)
49
50/* Compute the inode radix tree tag for this goal. */
51static inline unsigned int
52xfs_icwalk_tag(enum xfs_icwalk_goal goal)
53{
54 return goal < 0 ? XFS_ICWALK_NULL_TAG : goal;
55}
56
Darrick J. Wong7fdff522021-05-31 11:31:59 -070057static int xfs_icwalk(struct xfs_mount *mp,
Darrick J. Wongf427cf52021-05-31 11:32:00 -070058 enum xfs_icwalk_goal goal, void *args);
Darrick J. Wong7fdff522021-05-31 11:31:59 -070059static int xfs_icwalk_ag(struct xfs_perag *pag,
Darrick J. Wongf427cf52021-05-31 11:32:00 -070060 enum xfs_icwalk_goal goal, void *args);
Darrick J. Wongdf600192021-06-01 13:29:41 -070061
Dave Chinner33479e02012-10-08 21:56:11 +110062/*
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -070063 * Private inode cache walk flags for struct xfs_eofblocks. Must not coincide
64 * with XFS_EOF_FLAGS_*.
65 */
66#define XFS_ICWALK_FLAG_DROP_UDQUOT (1U << 31)
67#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30)
68#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29)
69
70#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \
71 XFS_ICWALK_FLAG_DROP_GDQUOT | \
72 XFS_ICWALK_FLAG_DROP_PDQUOT)
73
74/*
Dave Chinner33479e02012-10-08 21:56:11 +110075 * Allocate and initialise an xfs_inode.
76 */
Dave Chinner638f44162013-08-30 10:23:45 +100077struct xfs_inode *
Dave Chinner33479e02012-10-08 21:56:11 +110078xfs_inode_alloc(
79 struct xfs_mount *mp,
80 xfs_ino_t ino)
81{
82 struct xfs_inode *ip;
83
84 /*
Carlos Maiolino3050bd02020-07-22 09:23:04 -070085 * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL
86 * and return NULL here on ENOMEM.
Dave Chinner33479e02012-10-08 21:56:11 +110087 */
Carlos Maiolino3050bd02020-07-22 09:23:04 -070088 ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL);
89
Dave Chinner33479e02012-10-08 21:56:11 +110090 if (inode_init_always(mp->m_super, VFS_I(ip))) {
Carlos Maiolino377bcd52019-11-14 12:43:04 -080091 kmem_cache_free(xfs_inode_zone, ip);
Dave Chinner33479e02012-10-08 21:56:11 +110092 return NULL;
93 }
94
Dave Chinnerc19b3b052016-02-09 16:54:58 +110095 /* VFS doesn't initialise i_mode! */
96 VFS_I(ip)->i_mode = 0;
97
Bill O'Donnellff6d6af2015-10-12 18:21:22 +110098 XFS_STATS_INC(mp, vn_active);
Dave Chinner33479e02012-10-08 21:56:11 +110099 ASSERT(atomic_read(&ip->i_pincount) == 0);
Dave Chinner33479e02012-10-08 21:56:11 +1100100 ASSERT(ip->i_ino == 0);
101
Dave Chinner33479e02012-10-08 21:56:11 +1100102 /* initialise the xfs inode */
103 ip->i_ino = ino;
104 ip->i_mount = mp;
105 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
106 ip->i_afp = NULL;
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700107 ip->i_cowfp = NULL;
Christoph Hellwig3ba738d2018-07-17 16:51:50 -0700108 memset(&ip->i_df, 0, sizeof(ip->i_df));
Dave Chinner33479e02012-10-08 21:56:11 +1100109 ip->i_flags = 0;
110 ip->i_delayed_blks = 0;
Christoph Hellwig3e09ab82021-03-29 11:11:45 -0700111 ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
Christoph Hellwig6e73a542021-03-29 11:11:40 -0700112 ip->i_nblocks = 0;
Christoph Hellwig7821ea32021-03-29 11:11:44 -0700113 ip->i_forkoff = 0;
Darrick J. Wong6772c1f2019-04-12 07:40:25 -0700114 ip->i_sick = 0;
115 ip->i_checked = 0;
Darrick J. Wongcb357bf2019-04-15 13:13:20 -0700116 INIT_WORK(&ip->i_ioend_work, xfs_end_io);
117 INIT_LIST_HEAD(&ip->i_ioend_list);
118 spin_lock_init(&ip->i_ioend_lock);
Dave Chinner33479e02012-10-08 21:56:11 +1100119
120 return ip;
121}
122
123STATIC void
124xfs_inode_free_callback(
125 struct rcu_head *head)
126{
127 struct inode *inode = container_of(head, struct inode, i_rcu);
128 struct xfs_inode *ip = XFS_I(inode);
129
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100130 switch (VFS_I(ip)->i_mode & S_IFMT) {
Dave Chinner33479e02012-10-08 21:56:11 +1100131 case S_IFREG:
132 case S_IFDIR:
133 case S_IFLNK:
Christoph Hellwigef838512020-05-18 10:29:27 -0700134 xfs_idestroy_fork(&ip->i_df);
Dave Chinner33479e02012-10-08 21:56:11 +1100135 break;
136 }
137
Christoph Hellwigef838512020-05-18 10:29:27 -0700138 if (ip->i_afp) {
139 xfs_idestroy_fork(ip->i_afp);
140 kmem_cache_free(xfs_ifork_zone, ip->i_afp);
141 }
142 if (ip->i_cowfp) {
143 xfs_idestroy_fork(ip->i_cowfp);
144 kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
145 }
Dave Chinner33479e02012-10-08 21:56:11 +1100146 if (ip->i_itemp) {
Dave Chinner22525c12018-05-09 07:47:34 -0700147 ASSERT(!test_bit(XFS_LI_IN_AIL,
148 &ip->i_itemp->ili_item.li_flags));
Dave Chinner33479e02012-10-08 21:56:11 +1100149 xfs_inode_item_destroy(ip);
150 ip->i_itemp = NULL;
151 }
152
Carlos Maiolino377bcd52019-11-14 12:43:04 -0800153 kmem_cache_free(xfs_inode_zone, ip);
Dave Chinner1f2dcfe2016-05-18 14:01:53 +1000154}
155
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000156static void
157__xfs_inode_free(
158 struct xfs_inode *ip)
159{
160 /* asserts to verify all state is correct here */
161 ASSERT(atomic_read(&ip->i_pincount) == 0);
Dave Chinner48d55e22020-06-29 14:49:18 -0700162 ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list));
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000163 XFS_STATS_DEC(ip->i_mount, vn_active);
164
165 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
166}
167
Dave Chinner1f2dcfe2016-05-18 14:01:53 +1000168void
169xfs_inode_free(
170 struct xfs_inode *ip)
171{
Dave Chinner718ecc52020-08-17 16:41:01 -0700172 ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
Brian Foster98efe8a2016-11-10 08:23:22 +1100173
Dave Chinner33479e02012-10-08 21:56:11 +1100174 /*
175 * Because we use RCU freeing we need to ensure the inode always
176 * appears to be reclaimed with an invalid inode number when in the
177 * free state. The ip->i_flags_lock provides the barrier against lookup
178 * races.
179 */
180 spin_lock(&ip->i_flags_lock);
181 ip->i_flags = XFS_IRECLAIM;
182 ip->i_ino = 0;
183 spin_unlock(&ip->i_flags_lock);
184
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000185 __xfs_inode_free(ip);
Dave Chinner33479e02012-10-08 21:56:11 +1100186}
187
188/*
Dave Chinner02511a52020-06-29 14:49:18 -0700189 * Queue background inode reclaim work if there are reclaimable inodes and there
190 * isn't reclaim work already scheduled or in progress.
Dave Chinnerad438c42016-05-18 14:20:08 +1000191 */
192static void
193xfs_reclaim_work_queue(
194 struct xfs_mount *mp)
195{
196
197 rcu_read_lock();
198 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
199 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
200 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
201 }
202 rcu_read_unlock();
203}
204
Dave Chinnerad438c42016-05-18 14:20:08 +1000205static void
206xfs_perag_set_reclaim_tag(
207 struct xfs_perag *pag)
208{
209 struct xfs_mount *mp = pag->pag_mount;
210
Brian Foster95989c42017-06-08 08:23:07 -0700211 lockdep_assert_held(&pag->pag_ici_lock);
Dave Chinnerad438c42016-05-18 14:20:08 +1000212 if (pag->pag_ici_reclaimable++)
213 return;
214
215 /* propagate the reclaim tag up into the perag radix tree */
216 spin_lock(&mp->m_perag_lock);
217 radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
218 XFS_ICI_RECLAIM_TAG);
219 spin_unlock(&mp->m_perag_lock);
220
221 /* schedule periodic background inode reclaim */
222 xfs_reclaim_work_queue(mp);
223
224 trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
225}
226
227static void
228xfs_perag_clear_reclaim_tag(
229 struct xfs_perag *pag)
230{
231 struct xfs_mount *mp = pag->pag_mount;
232
Brian Foster95989c42017-06-08 08:23:07 -0700233 lockdep_assert_held(&pag->pag_ici_lock);
Dave Chinnerad438c42016-05-18 14:20:08 +1000234 if (--pag->pag_ici_reclaimable)
235 return;
236
237 /* clear the reclaim tag from the perag radix tree */
238 spin_lock(&mp->m_perag_lock);
239 radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
240 XFS_ICI_RECLAIM_TAG);
241 spin_unlock(&mp->m_perag_lock);
242 trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
243}
244
245
246/*
247 * We set the inode flag atomically with the radix tree tag.
248 * Once we get tag lookups on the radix tree, this inode flag
249 * can go away.
250 */
251void
252xfs_inode_set_reclaim_tag(
253 struct xfs_inode *ip)
254{
255 struct xfs_mount *mp = ip->i_mount;
256 struct xfs_perag *pag;
257
258 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
259 spin_lock(&pag->pag_ici_lock);
260 spin_lock(&ip->i_flags_lock);
261
262 radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
263 XFS_ICI_RECLAIM_TAG);
264 xfs_perag_set_reclaim_tag(pag);
265 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
266
267 spin_unlock(&ip->i_flags_lock);
268 spin_unlock(&pag->pag_ici_lock);
269 xfs_perag_put(pag);
270}
271
272STATIC void
273xfs_inode_clear_reclaim_tag(
274 struct xfs_perag *pag,
275 xfs_ino_t ino)
276{
277 radix_tree_tag_clear(&pag->pag_ici_root,
278 XFS_INO_TO_AGINO(pag->pag_mount, ino),
279 XFS_ICI_RECLAIM_TAG);
280 xfs_perag_clear_reclaim_tag(pag);
281}
282
Darrick J. Wong7fdff522021-05-31 11:31:59 -0700283static inline void
Brian Fosterae2c4ac2017-04-26 08:30:39 -0700284xfs_inew_wait(
285 struct xfs_inode *ip)
286{
287 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
288 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
289
290 do {
Ingo Molnar21417132017-03-05 11:25:39 +0100291 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
Brian Fosterae2c4ac2017-04-26 08:30:39 -0700292 if (!xfs_iflags_test(ip, XFS_INEW))
293 break;
294 schedule();
295 } while (true);
Ingo Molnar21417132017-03-05 11:25:39 +0100296 finish_wait(wq, &wait.wq_entry);
Brian Fosterae2c4ac2017-04-26 08:30:39 -0700297}
298
Dave Chinnerad438c42016-05-18 14:20:08 +1000299/*
Dave Chinner50997472016-02-09 16:54:58 +1100300 * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
301 * part of the structure. This is made more complex by the fact we store
302 * information about the on-disk values in the VFS inode and so we can't just
Dave Chinner83e06f22016-02-09 16:54:58 +1100303 * overwrite the values unconditionally. Hence we save the parameters we
Dave Chinner50997472016-02-09 16:54:58 +1100304 * need to retain across reinitialisation, and rewrite them into the VFS inode
Dave Chinner83e06f22016-02-09 16:54:58 +1100305 * after reinitialisation even if it fails.
Dave Chinner50997472016-02-09 16:54:58 +1100306 */
307static int
308xfs_reinit_inode(
309 struct xfs_mount *mp,
310 struct inode *inode)
311{
312 int error;
Dave Chinner54d7b5c2016-02-09 16:54:58 +1100313 uint32_t nlink = inode->i_nlink;
Dave Chinner9e9a2672016-02-09 16:54:58 +1100314 uint32_t generation = inode->i_generation;
Jeff Laytonf0e28282017-12-11 06:35:19 -0500315 uint64_t version = inode_peek_iversion(inode);
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100316 umode_t mode = inode->i_mode;
Amir Goldsteinacd1d712018-01-26 11:24:40 -0800317 dev_t dev = inode->i_rdev;
Christoph Hellwig3d8f2822020-02-21 08:31:26 -0800318 kuid_t uid = inode->i_uid;
319 kgid_t gid = inode->i_gid;
Dave Chinner50997472016-02-09 16:54:58 +1100320
321 error = inode_init_always(mp->m_super, inode);
322
Dave Chinner54d7b5c2016-02-09 16:54:58 +1100323 set_nlink(inode, nlink);
Dave Chinner9e9a2672016-02-09 16:54:58 +1100324 inode->i_generation = generation;
Jeff Laytonf0e28282017-12-11 06:35:19 -0500325 inode_set_iversion_queried(inode, version);
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100326 inode->i_mode = mode;
Amir Goldsteinacd1d712018-01-26 11:24:40 -0800327 inode->i_rdev = dev;
Christoph Hellwig3d8f2822020-02-21 08:31:26 -0800328 inode->i_uid = uid;
329 inode->i_gid = gid;
Dave Chinner50997472016-02-09 16:54:58 +1100330 return error;
331}
332
333/*
Dave Chinnerafca6c52018-04-17 17:17:34 -0700334 * If we are allocating a new inode, then check what was returned is
335 * actually a free, empty inode. If we are not allocating an inode,
336 * then check we didn't find a free inode.
337 *
338 * Returns:
339 * 0 if the inode free state matches the lookup context
340 * -ENOENT if the inode is free and we are not allocating
341 * -EFSCORRUPTED if there is any state mismatch at all
342 */
343static int
344xfs_iget_check_free_state(
345 struct xfs_inode *ip,
346 int flags)
347{
348 if (flags & XFS_IGET_CREATE) {
349 /* should be a free inode */
350 if (VFS_I(ip)->i_mode != 0) {
351 xfs_warn(ip->i_mount,
352"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
353 ip->i_ino, VFS_I(ip)->i_mode);
354 return -EFSCORRUPTED;
355 }
356
Christoph Hellwig6e73a542021-03-29 11:11:40 -0700357 if (ip->i_nblocks != 0) {
Dave Chinnerafca6c52018-04-17 17:17:34 -0700358 xfs_warn(ip->i_mount,
359"Corruption detected! Free inode 0x%llx has blocks allocated!",
360 ip->i_ino);
361 return -EFSCORRUPTED;
362 }
363 return 0;
364 }
365
366 /* should be an allocated inode */
367 if (VFS_I(ip)->i_mode == 0)
368 return -ENOENT;
369
370 return 0;
371}
372
373/*
Dave Chinner33479e02012-10-08 21:56:11 +1100374 * Check the validity of the inode we just found it the cache
375 */
376static int
377xfs_iget_cache_hit(
378 struct xfs_perag *pag,
379 struct xfs_inode *ip,
380 xfs_ino_t ino,
381 int flags,
382 int lock_flags) __releases(RCU)
383{
384 struct inode *inode = VFS_I(ip);
385 struct xfs_mount *mp = ip->i_mount;
386 int error;
387
388 /*
389 * check for re-use of an inode within an RCU grace period due to the
390 * radix tree nodes not being updated yet. We monitor for this by
391 * setting the inode number to zero before freeing the inode structure.
392 * If the inode has been reallocated and set up, then the inode number
393 * will not match, so check for that, too.
394 */
395 spin_lock(&ip->i_flags_lock);
396 if (ip->i_ino != ino) {
397 trace_xfs_iget_skip(ip);
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100398 XFS_STATS_INC(mp, xs_ig_frecycle);
Dave Chinner24513372014-06-25 14:58:08 +1000399 error = -EAGAIN;
Dave Chinner33479e02012-10-08 21:56:11 +1100400 goto out_error;
401 }
402
403
404 /*
405 * If we are racing with another cache hit that is currently
406 * instantiating this inode or currently recycling it out of
407 * reclaimabe state, wait for the initialisation to complete
408 * before continuing.
409 *
410 * XXX(hch): eventually we should do something equivalent to
411 * wait_on_inode to wait for these flags to be cleared
412 * instead of polling for it.
413 */
414 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
415 trace_xfs_iget_skip(ip);
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100416 XFS_STATS_INC(mp, xs_ig_frecycle);
Dave Chinner24513372014-06-25 14:58:08 +1000417 error = -EAGAIN;
Dave Chinner33479e02012-10-08 21:56:11 +1100418 goto out_error;
419 }
420
421 /*
Dave Chinnerafca6c52018-04-17 17:17:34 -0700422 * Check the inode free state is valid. This also detects lookup
423 * racing with unlinks.
Dave Chinner33479e02012-10-08 21:56:11 +1100424 */
Dave Chinnerafca6c52018-04-17 17:17:34 -0700425 error = xfs_iget_check_free_state(ip, flags);
426 if (error)
Dave Chinner33479e02012-10-08 21:56:11 +1100427 goto out_error;
Dave Chinner33479e02012-10-08 21:56:11 +1100428
429 /*
430 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
431 * Need to carefully get it back into useable state.
432 */
433 if (ip->i_flags & XFS_IRECLAIMABLE) {
434 trace_xfs_iget_reclaim(ip);
435
Darrick J. Wong378f6812017-06-19 08:58:56 -0700436 if (flags & XFS_IGET_INCORE) {
437 error = -EAGAIN;
438 goto out_error;
439 }
440
Dave Chinner33479e02012-10-08 21:56:11 +1100441 /*
442 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
443 * from stomping over us while we recycle the inode. We can't
444 * clear the radix tree reclaimable tag yet as it requires
445 * pag_ici_lock to be held exclusive.
446 */
447 ip->i_flags |= XFS_IRECLAIM;
448
449 spin_unlock(&ip->i_flags_lock);
450 rcu_read_unlock();
451
Ira Weinyd45344d2020-04-22 21:50:57 -0700452 ASSERT(!rwsem_is_locked(&inode->i_rwsem));
Dave Chinner50997472016-02-09 16:54:58 +1100453 error = xfs_reinit_inode(mp, inode);
Dave Chinner33479e02012-10-08 21:56:11 +1100454 if (error) {
Brian Foster756baca2017-04-26 08:30:39 -0700455 bool wake;
Dave Chinner33479e02012-10-08 21:56:11 +1100456 /*
457 * Re-initializing the inode failed, and we are in deep
458 * trouble. Try to re-add it to the reclaim list.
459 */
460 rcu_read_lock();
461 spin_lock(&ip->i_flags_lock);
Brian Foster756baca2017-04-26 08:30:39 -0700462 wake = !!__xfs_iflags_test(ip, XFS_INEW);
Dave Chinner33479e02012-10-08 21:56:11 +1100463 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
Brian Foster756baca2017-04-26 08:30:39 -0700464 if (wake)
465 wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
Dave Chinner33479e02012-10-08 21:56:11 +1100466 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
467 trace_xfs_iget_reclaim_fail(ip);
468 goto out_error;
469 }
470
471 spin_lock(&pag->pag_ici_lock);
472 spin_lock(&ip->i_flags_lock);
473
474 /*
475 * Clear the per-lifetime state in the inode as we are now
476 * effectively a new inode and need to return to the initial
477 * state before reuse occurs.
478 */
479 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
480 ip->i_flags |= XFS_INEW;
Dave Chinner545c0882016-05-18 14:11:41 +1000481 xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
Dave Chinner33479e02012-10-08 21:56:11 +1100482 inode->i_state = I_NEW;
Darrick J. Wong6772c1f2019-04-12 07:40:25 -0700483 ip->i_sick = 0;
484 ip->i_checked = 0;
Dave Chinner33479e02012-10-08 21:56:11 +1100485
Dave Chinner33479e02012-10-08 21:56:11 +1100486 spin_unlock(&ip->i_flags_lock);
487 spin_unlock(&pag->pag_ici_lock);
488 } else {
489 /* If the VFS inode is being torn down, pause and try again. */
490 if (!igrab(inode)) {
491 trace_xfs_iget_skip(ip);
Dave Chinner24513372014-06-25 14:58:08 +1000492 error = -EAGAIN;
Dave Chinner33479e02012-10-08 21:56:11 +1100493 goto out_error;
494 }
495
496 /* We've got a live one. */
497 spin_unlock(&ip->i_flags_lock);
498 rcu_read_unlock();
499 trace_xfs_iget_hit(ip);
500 }
501
502 if (lock_flags != 0)
503 xfs_ilock(ip, lock_flags);
504
Darrick J. Wong378f6812017-06-19 08:58:56 -0700505 if (!(flags & XFS_IGET_INCORE))
Ira Weinydae2f8e2020-04-30 07:41:37 -0700506 xfs_iflags_clear(ip, XFS_ISTALE);
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100507 XFS_STATS_INC(mp, xs_ig_found);
Dave Chinner33479e02012-10-08 21:56:11 +1100508
509 return 0;
510
511out_error:
512 spin_unlock(&ip->i_flags_lock);
513 rcu_read_unlock();
514 return error;
515}
516
517
518static int
519xfs_iget_cache_miss(
520 struct xfs_mount *mp,
521 struct xfs_perag *pag,
522 xfs_trans_t *tp,
523 xfs_ino_t ino,
524 struct xfs_inode **ipp,
525 int flags,
526 int lock_flags)
527{
528 struct xfs_inode *ip;
529 int error;
530 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
531 int iflags;
532
533 ip = xfs_inode_alloc(mp, ino);
534 if (!ip)
Dave Chinner24513372014-06-25 14:58:08 +1000535 return -ENOMEM;
Dave Chinner33479e02012-10-08 21:56:11 +1100536
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700537 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags);
Dave Chinner33479e02012-10-08 21:56:11 +1100538 if (error)
539 goto out_destroy;
540
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700541 /*
542 * For version 5 superblocks, if we are initialising a new inode and we
543 * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
544 * simply build the new inode core with a random generation number.
545 *
546 * For version 4 (and older) superblocks, log recovery is dependent on
Christoph Hellwig965e0a12021-03-29 11:11:42 -0700547 * the i_flushiter field being initialised from the current on-disk
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700548 * value and hence we must also read the inode off disk even when
549 * initializing new inodes.
550 */
551 if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
552 (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) {
553 VFS_I(ip)->i_generation = prandom_u32();
554 } else {
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700555 struct xfs_buf *bp;
556
Christoph Hellwigaf9dcdd2021-03-29 11:11:37 -0700557 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp);
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700558 if (error)
559 goto out_destroy;
560
Christoph Hellwigaf9dcdd2021-03-29 11:11:37 -0700561 error = xfs_inode_from_disk(ip,
562 xfs_buf_offset(bp, ip->i_imap.im_boffset));
Christoph Hellwigbb8a66a2020-05-14 14:01:19 -0700563 if (!error)
564 xfs_buf_set_ref(bp, XFS_INO_REF);
565 xfs_trans_brelse(tp, bp);
566
567 if (error)
568 goto out_destroy;
569 }
570
Dave Chinner33479e02012-10-08 21:56:11 +1100571 trace_xfs_iget_miss(ip);
572
Dave Chinneree457002018-03-23 10:22:53 -0700573 /*
Dave Chinnerafca6c52018-04-17 17:17:34 -0700574 * Check the inode free state is valid. This also detects lookup
575 * racing with unlinks.
Dave Chinneree457002018-03-23 10:22:53 -0700576 */
Dave Chinnerafca6c52018-04-17 17:17:34 -0700577 error = xfs_iget_check_free_state(ip, flags);
578 if (error)
Dave Chinner33479e02012-10-08 21:56:11 +1100579 goto out_destroy;
Dave Chinner33479e02012-10-08 21:56:11 +1100580
581 /*
582 * Preload the radix tree so we can insert safely under the
583 * write spinlock. Note that we cannot sleep inside the preload
584 * region. Since we can be called from transaction context, don't
585 * recurse into the file system.
586 */
587 if (radix_tree_preload(GFP_NOFS)) {
Dave Chinner24513372014-06-25 14:58:08 +1000588 error = -EAGAIN;
Dave Chinner33479e02012-10-08 21:56:11 +1100589 goto out_destroy;
590 }
591
592 /*
593 * Because the inode hasn't been added to the radix-tree yet it can't
594 * be found by another thread, so we can do the non-sleeping lock here.
595 */
596 if (lock_flags) {
597 if (!xfs_ilock_nowait(ip, lock_flags))
598 BUG();
599 }
600
601 /*
602 * These values must be set before inserting the inode into the radix
603 * tree as the moment it is inserted a concurrent lookup (allowed by the
604 * RCU locking mechanism) can find it and that lookup must see that this
605 * is an inode currently under construction (i.e. that XFS_INEW is set).
606 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
607 * memory barrier that ensures this detection works correctly at lookup
608 * time.
609 */
610 iflags = XFS_INEW;
611 if (flags & XFS_IGET_DONTCACHE)
Ira Weiny2c567af2020-04-30 07:41:37 -0700612 d_mark_dontcache(VFS_I(ip));
Chandra Seetharaman113a5682013-06-27 17:25:07 -0500613 ip->i_udquot = NULL;
614 ip->i_gdquot = NULL;
Chandra Seetharaman92f8ff72013-07-11 00:00:40 -0500615 ip->i_pdquot = NULL;
Dave Chinner33479e02012-10-08 21:56:11 +1100616 xfs_iflags_set(ip, iflags);
617
618 /* insert the new inode */
619 spin_lock(&pag->pag_ici_lock);
620 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
621 if (unlikely(error)) {
622 WARN_ON(error != -EEXIST);
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100623 XFS_STATS_INC(mp, xs_ig_dup);
Dave Chinner24513372014-06-25 14:58:08 +1000624 error = -EAGAIN;
Dave Chinner33479e02012-10-08 21:56:11 +1100625 goto out_preload_end;
626 }
627 spin_unlock(&pag->pag_ici_lock);
628 radix_tree_preload_end();
629
630 *ipp = ip;
631 return 0;
632
633out_preload_end:
634 spin_unlock(&pag->pag_ici_lock);
635 radix_tree_preload_end();
636 if (lock_flags)
637 xfs_iunlock(ip, lock_flags);
638out_destroy:
639 __destroy_inode(VFS_I(ip));
640 xfs_inode_free(ip);
641 return error;
642}
643
644/*
Dave Chinner02511a52020-06-29 14:49:18 -0700645 * Look up an inode by number in the given file system. The inode is looked up
646 * in the cache held in each AG. If the inode is found in the cache, initialise
647 * the vfs inode if necessary.
Dave Chinner33479e02012-10-08 21:56:11 +1100648 *
Dave Chinner02511a52020-06-29 14:49:18 -0700649 * If it is not in core, read it in from the file system's device, add it to the
650 * cache and initialise the vfs inode.
Dave Chinner33479e02012-10-08 21:56:11 +1100651 *
652 * The inode is locked according to the value of the lock_flags parameter.
Dave Chinner02511a52020-06-29 14:49:18 -0700653 * Inode lookup is only done during metadata operations and not as part of the
654 * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup.
Dave Chinner33479e02012-10-08 21:56:11 +1100655 */
656int
657xfs_iget(
Dave Chinner02511a52020-06-29 14:49:18 -0700658 struct xfs_mount *mp,
659 struct xfs_trans *tp,
660 xfs_ino_t ino,
661 uint flags,
662 uint lock_flags,
663 struct xfs_inode **ipp)
Dave Chinner33479e02012-10-08 21:56:11 +1100664{
Dave Chinner02511a52020-06-29 14:49:18 -0700665 struct xfs_inode *ip;
666 struct xfs_perag *pag;
667 xfs_agino_t agino;
668 int error;
Dave Chinner33479e02012-10-08 21:56:11 +1100669
Dave Chinner33479e02012-10-08 21:56:11 +1100670 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
671
672 /* reject inode numbers outside existing AGs */
673 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
Dave Chinner24513372014-06-25 14:58:08 +1000674 return -EINVAL;
Dave Chinner33479e02012-10-08 21:56:11 +1100675
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100676 XFS_STATS_INC(mp, xs_ig_attempts);
Lucas Stach8774cf82015-08-28 14:50:56 +1000677
Dave Chinner33479e02012-10-08 21:56:11 +1100678 /* get the perag structure and ensure that it's inode capable */
679 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
680 agino = XFS_INO_TO_AGINO(mp, ino);
681
682again:
683 error = 0;
684 rcu_read_lock();
685 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
686
687 if (ip) {
688 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
689 if (error)
690 goto out_error_or_again;
691 } else {
692 rcu_read_unlock();
Darrick J. Wong378f6812017-06-19 08:58:56 -0700693 if (flags & XFS_IGET_INCORE) {
Darrick J. Wonged438b42017-10-17 21:37:32 -0700694 error = -ENODATA;
Darrick J. Wong378f6812017-06-19 08:58:56 -0700695 goto out_error_or_again;
696 }
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100697 XFS_STATS_INC(mp, xs_ig_missed);
Dave Chinner33479e02012-10-08 21:56:11 +1100698
699 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
700 flags, lock_flags);
701 if (error)
702 goto out_error_or_again;
703 }
704 xfs_perag_put(pag);
705
706 *ipp = ip;
707
708 /*
Dave Chinner58c90472015-02-23 22:38:08 +1100709 * If we have a real type for an on-disk inode, we can setup the inode
Dave Chinner33479e02012-10-08 21:56:11 +1100710 * now. If it's a new inode being created, xfs_ialloc will handle it.
711 */
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100712 if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
Dave Chinner58c90472015-02-23 22:38:08 +1100713 xfs_setup_existing_inode(ip);
Dave Chinner33479e02012-10-08 21:56:11 +1100714 return 0;
715
716out_error_or_again:
Darrick J. Wong378f6812017-06-19 08:58:56 -0700717 if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
Dave Chinner33479e02012-10-08 21:56:11 +1100718 delay(1);
719 goto again;
720 }
721 xfs_perag_put(pag);
722 return error;
723}
724
Dave Chinner78ae5252010-09-28 12:28:19 +1000725/*
Darrick J. Wong378f6812017-06-19 08:58:56 -0700726 * "Is this a cached inode that's also allocated?"
727 *
728 * Look up an inode by number in the given file system. If the inode is
729 * in cache and isn't in purgatory, return 1 if the inode is allocated
730 * and 0 if it is not. For all other cases (not in cache, being torn
731 * down, etc.), return a negative error code.
732 *
733 * The caller has to prevent inode allocation and freeing activity,
734 * presumably by locking the AGI buffer. This is to ensure that an
735 * inode cannot transition from allocated to freed until the caller is
736 * ready to allow that. If the inode is in an intermediate state (new,
737 * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
738 * inode is not in the cache, -ENOENT will be returned. The caller must
739 * deal with these scenarios appropriately.
740 *
741 * This is a specialized use case for the online scrubber; if you're
742 * reading this, you probably want xfs_iget.
743 */
744int
745xfs_icache_inode_is_allocated(
746 struct xfs_mount *mp,
747 struct xfs_trans *tp,
748 xfs_ino_t ino,
749 bool *inuse)
750{
751 struct xfs_inode *ip;
752 int error;
753
754 error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
755 if (error)
756 return error;
757
758 *inuse = !!(VFS_I(ip)->i_mode);
Darrick J. Wong44a87362018-07-25 12:52:32 -0700759 xfs_irele(ip);
Darrick J. Wong378f6812017-06-19 08:58:56 -0700760 return 0;
761}
762
763/*
Dave Chinner78ae5252010-09-28 12:28:19 +1000764 * The inode lookup is done in batches to keep the amount of lock traffic and
765 * radix tree lookups to a minimum. The batch size is a trade off between
766 * lookup reduction and stack usage. This is in the reclaim path, so we can't
767 * be too greedy.
Darrick J. Wongdf600192021-06-01 13:29:41 -0700768 *
Darrick J. Wongc1115c02021-06-01 22:41:25 -0700769 * XXX: This will be moved closer to xfs_icwalk* once we get rid of the
Darrick J. Wongdf600192021-06-01 13:29:41 -0700770 * separate reclaim walk functions.
Dave Chinner78ae5252010-09-28 12:28:19 +1000771 */
772#define XFS_LOOKUP_BATCH 32
773
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -0700774#ifdef CONFIG_XFS_QUOTA
Darrick J. Wongb9baaef2021-05-31 11:31:58 -0700775/* Decide if we want to grab this inode to drop its dquots. */
776static bool
777xfs_dqrele_igrab(
778 struct xfs_inode *ip)
779{
780 bool ret = false;
781
782 ASSERT(rcu_read_lock_held());
783
784 /* Check for stale RCU freed inode */
785 spin_lock(&ip->i_flags_lock);
786 if (!ip->i_ino)
787 goto out_unlock;
788
789 /*
790 * Skip inodes that are anywhere in the reclaim machinery because we
791 * drop dquots before tagging an inode for reclamation.
792 */
793 if (ip->i_flags & (XFS_IRECLAIM | XFS_IRECLAIMABLE))
794 goto out_unlock;
795
796 /*
797 * The inode looks alive; try to grab a VFS reference so that it won't
798 * get destroyed. If we got the reference, return true to say that
799 * we grabbed the inode.
800 *
801 * If we can't get the reference, then we know the inode had its VFS
802 * state torn down and hasn't yet entered the reclaim machinery. Since
803 * we also know that dquots are detached from an inode before it enters
804 * reclaim, we can skip the inode.
805 */
806 ret = igrab(VFS_I(ip)) != NULL;
807
808out_unlock:
809 spin_unlock(&ip->i_flags_lock);
810 return ret;
811}
812
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -0700813/* Drop this inode's dquots. */
814static int
815xfs_dqrele_inode(
816 struct xfs_inode *ip,
817 void *priv)
818{
819 struct xfs_eofblocks *eofb = priv;
820
Darrick J. Wong9d2793c2021-05-31 11:31:59 -0700821 if (xfs_iflags_test(ip, XFS_INEW))
822 xfs_inew_wait(ip);
823
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -0700824 xfs_ilock(ip, XFS_ILOCK_EXCL);
825 if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_UDQUOT) {
826 xfs_qm_dqrele(ip->i_udquot);
827 ip->i_udquot = NULL;
828 }
829 if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_GDQUOT) {
830 xfs_qm_dqrele(ip->i_gdquot);
831 ip->i_gdquot = NULL;
832 }
833 if (eofb->eof_flags & XFS_ICWALK_FLAG_DROP_PDQUOT) {
834 xfs_qm_dqrele(ip->i_pdquot);
835 ip->i_pdquot = NULL;
836 }
837 xfs_iunlock(ip, XFS_ILOCK_EXCL);
838 return 0;
839}
840
841/*
842 * Detach all dquots from incore inodes if we can. The caller must already
843 * have dropped the relevant XFS_[UGP]QUOTA_ACTIVE flags so that dquots will
844 * not get reattached.
845 */
846int
847xfs_dqrele_all_inodes(
848 struct xfs_mount *mp,
849 unsigned int qflags)
850{
851 struct xfs_eofblocks eofb = { .eof_flags = 0 };
852
853 if (qflags & XFS_UQUOTA_ACCT)
854 eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_UDQUOT;
855 if (qflags & XFS_GQUOTA_ACCT)
856 eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_GDQUOT;
857 if (qflags & XFS_PQUOTA_ACCT)
858 eofb.eof_flags |= XFS_ICWALK_FLAG_DROP_PDQUOT;
859
Darrick J. Wongf427cf52021-05-31 11:32:00 -0700860 return xfs_icwalk(mp, XFS_ICWALK_DQRELE, &eofb);
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -0700861}
Darrick J. Wongb9baaef2021-05-31 11:31:58 -0700862#else
863# define xfs_dqrele_igrab(ip) (false)
Darrick J. Wongf427cf52021-05-31 11:32:00 -0700864# define xfs_dqrele_inode(ip, priv) (0)
Darrick J. Wong1ad2cfe2021-05-31 11:31:57 -0700865#endif /* CONFIG_XFS_QUOTA */
866
Brian Foster579b62f2012-11-06 09:50:47 -0500867/*
Dave Chinnere3a20c02010-09-24 19:51:50 +1000868 * Grab the inode for reclaim exclusively.
Dave Chinner50718b82020-07-01 10:21:05 -0700869 *
870 * We have found this inode via a lookup under RCU, so the inode may have
871 * already been freed, or it may be in the process of being recycled by
872 * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode
873 * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE
874 * will not be set. Hence we need to check for both these flag conditions to
875 * avoid inodes that are no longer reclaim candidates.
876 *
877 * Note: checking for other state flags here, under the i_flags_lock or not, is
878 * racy and should be avoided. Those races should be resolved only after we have
879 * ensured that we are able to reclaim this inode and the world can see that we
880 * are going to reclaim it.
881 *
882 * Return true if we grabbed it, false otherwise.
Dave Chinnere3a20c02010-09-24 19:51:50 +1000883 */
Dave Chinner50718b82020-07-01 10:21:05 -0700884static bool
Dave Chinnere3a20c02010-09-24 19:51:50 +1000885xfs_reclaim_inode_grab(
Dave Chinner50718b82020-07-01 10:21:05 -0700886 struct xfs_inode *ip)
Dave Chinnere3a20c02010-09-24 19:51:50 +1000887{
Dave Chinner1a3e8f32010-12-17 17:29:43 +1100888 ASSERT(rcu_read_lock_held());
889
Dave Chinnere3a20c02010-09-24 19:51:50 +1000890 spin_lock(&ip->i_flags_lock);
Dave Chinner1a3e8f32010-12-17 17:29:43 +1100891 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
892 __xfs_iflags_test(ip, XFS_IRECLAIM)) {
893 /* not a reclaim candidate. */
Dave Chinnere3a20c02010-09-24 19:51:50 +1000894 spin_unlock(&ip->i_flags_lock);
Dave Chinner50718b82020-07-01 10:21:05 -0700895 return false;
Dave Chinnere3a20c02010-09-24 19:51:50 +1000896 }
897 __xfs_iflags_set(ip, XFS_IRECLAIM);
898 spin_unlock(&ip->i_flags_lock);
Dave Chinner50718b82020-07-01 10:21:05 -0700899 return true;
Dave Chinnere3a20c02010-09-24 19:51:50 +1000900}
901
902/*
Dave Chinner02511a52020-06-29 14:49:18 -0700903 * Inode reclaim is non-blocking, so the default action if progress cannot be
904 * made is to "requeue" the inode for reclaim by unlocking it and clearing the
905 * XFS_IRECLAIM flag. If we are in a shutdown state, we don't care about
906 * blocking anymore and hence we can wait for the inode to be able to reclaim
907 * it.
Dave Chinner777df5a2010-02-06 12:37:26 +1100908 *
Dave Chinner02511a52020-06-29 14:49:18 -0700909 * We do no IO here - if callers require inodes to be cleaned they must push the
910 * AIL first to trigger writeback of dirty inodes. This enables writeback to be
911 * done in the background in a non-blocking manner, and enables memory reclaim
912 * to make progress without blocking.
Dave Chinner777df5a2010-02-06 12:37:26 +1100913 */
Dave Chinner4d0bab32020-07-01 10:21:28 -0700914static void
Dave Chinnerc8e20be2010-01-10 23:51:45 +0000915xfs_reclaim_inode(
Dave Chinner75f3cb12009-06-08 15:35:14 +0200916 struct xfs_inode *ip,
Dave Chinner50718b82020-07-01 10:21:05 -0700917 struct xfs_perag *pag)
David Chinner7a3be022008-10-30 17:37:37 +1100918{
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000919 xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */
Dave Chinner777df5a2010-02-06 12:37:26 +1100920
Dave Chinner9552e142020-06-29 14:49:17 -0700921 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
Dave Chinner617825f2020-06-29 14:49:16 -0700922 goto out;
Dave Chinner718ecc52020-08-17 16:41:01 -0700923 if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
Dave Chinner9552e142020-06-29 14:49:17 -0700924 goto out_iunlock;
Dave Chinnerc8e20be2010-01-10 23:51:45 +0000925
Dave Chinner777df5a2010-02-06 12:37:26 +1100926 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
927 xfs_iunpin_wait(ip);
Brian Foster88fc1872020-05-06 13:27:40 -0700928 xfs_iflush_abort(ip);
Dave Chinner777df5a2010-02-06 12:37:26 +1100929 goto reclaim;
930 }
Dave Chinner617825f2020-06-29 14:49:16 -0700931 if (xfs_ipincount(ip))
Dave Chinner718ecc52020-08-17 16:41:01 -0700932 goto out_clear_flush;
Dave Chinner617825f2020-06-29 14:49:16 -0700933 if (!xfs_inode_clean(ip))
Dave Chinner718ecc52020-08-17 16:41:01 -0700934 goto out_clear_flush;
Christoph Hellwig8a480882012-04-23 15:58:35 +1000935
Dave Chinner718ecc52020-08-17 16:41:01 -0700936 xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner777df5a2010-02-06 12:37:26 +1100937reclaim:
Brian Foster98efe8a2016-11-10 08:23:22 +1100938
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000939 /*
940 * Because we use RCU freeing we need to ensure the inode always appears
941 * to be reclaimed with an invalid inode number when in the free state.
Brian Foster98efe8a2016-11-10 08:23:22 +1100942 * We do this as early as possible under the ILOCK so that
Omar Sandovalf2e9ad22017-08-25 10:05:26 -0700943 * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to
944 * detect races with us here. By doing this, we guarantee that once
945 * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that
946 * it will see either a valid inode that will serialise correctly, or it
947 * will see an invalid inode that it can skip.
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000948 */
949 spin_lock(&ip->i_flags_lock);
950 ip->i_flags = XFS_IRECLAIM;
951 ip->i_ino = 0;
952 spin_unlock(&ip->i_flags_lock);
953
Dave Chinnerc8e20be2010-01-10 23:51:45 +0000954 xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner2f11fea2010-07-20 17:53:25 +1000955
Bill O'Donnellff6d6af2015-10-12 18:21:22 +1100956 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
Dave Chinner2f11fea2010-07-20 17:53:25 +1000957 /*
958 * Remove the inode from the per-AG radix tree.
959 *
960 * Because radix_tree_delete won't complain even if the item was never
961 * added to the tree assert that it's been there before to catch
962 * problems with the inode life time early on.
963 */
Dave Chinner1a427ab2010-12-16 17:08:41 +1100964 spin_lock(&pag->pag_ici_lock);
Dave Chinner2f11fea2010-07-20 17:53:25 +1000965 if (!radix_tree_delete(&pag->pag_ici_root,
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000966 XFS_INO_TO_AGINO(ip->i_mount, ino)))
Dave Chinner2f11fea2010-07-20 17:53:25 +1000967 ASSERT(0);
Dave Chinner545c0882016-05-18 14:11:41 +1000968 xfs_perag_clear_reclaim_tag(pag);
Dave Chinner1a427ab2010-12-16 17:08:41 +1100969 spin_unlock(&pag->pag_ici_lock);
Dave Chinner2f11fea2010-07-20 17:53:25 +1000970
971 /*
972 * Here we do an (almost) spurious inode lock in order to coordinate
973 * with inode cache radix tree lookups. This is because the lookup
974 * can reference the inodes in the cache without taking references.
975 *
976 * We make that OK here by ensuring that we wait until the inode is
Alex Elderad637a12012-02-16 22:01:00 +0000977 * unlocked after the lookup before we go ahead and free it.
Dave Chinner2f11fea2010-07-20 17:53:25 +1000978 */
Alex Elderad637a12012-02-16 22:01:00 +0000979 xfs_ilock(ip, XFS_ILOCK_EXCL);
Darrick J. Wong3ea06d72021-05-31 11:31:57 -0700980 ASSERT(!ip->i_udquot && !ip->i_gdquot && !ip->i_pdquot);
Alex Elderad637a12012-02-16 22:01:00 +0000981 xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner96355d5a2020-06-29 14:48:45 -0700982 ASSERT(xfs_inode_clean(ip));
Dave Chinner2f11fea2010-07-20 17:53:25 +1000983
Dave Chinner8a17d7d2016-05-18 14:09:12 +1000984 __xfs_inode_free(ip);
Dave Chinner4d0bab32020-07-01 10:21:28 -0700985 return;
Christoph Hellwig8a480882012-04-23 15:58:35 +1000986
Dave Chinner718ecc52020-08-17 16:41:01 -0700987out_clear_flush:
988 xfs_iflags_clear(ip, XFS_IFLUSHING);
Dave Chinner9552e142020-06-29 14:49:17 -0700989out_iunlock:
Christoph Hellwig8a480882012-04-23 15:58:35 +1000990 xfs_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner9552e142020-06-29 14:49:17 -0700991out:
Dave Chinner617825f2020-06-29 14:49:16 -0700992 xfs_iflags_clear(ip, XFS_IRECLAIM);
David Chinner7a3be022008-10-30 17:37:37 +1100993}
994
Dave Chinner65d0f202010-09-24 18:40:15 +1000995/*
996 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
997 * corrupted, we still want to try to reclaim all the inodes. If we don't,
998 * then a shut down during filesystem unmount reclaim walk leak all the
999 * unreclaimed inodes.
Dave Chinner617825f2020-06-29 14:49:16 -07001000 *
1001 * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
1002 * so that callers that want to block until all dirty inodes are written back
1003 * and reclaimed can sanely loop.
Dave Chinner65d0f202010-09-24 18:40:15 +10001004 */
Dave Chinner4d0bab32020-07-01 10:21:28 -07001005static void
Dave Chinner65d0f202010-09-24 18:40:15 +10001006xfs_reclaim_inodes_ag(
1007 struct xfs_mount *mp,
Dave Chinner65d0f202010-09-24 18:40:15 +10001008 int *nr_to_scan)
1009{
1010 struct xfs_perag *pag;
Dave Chinner0e8e2c62020-06-29 14:49:16 -07001011 xfs_agnumber_t ag = 0;
Dave Chinner65d0f202010-09-24 18:40:15 +10001012
Dave Chinner65d0f202010-09-24 18:40:15 +10001013 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
1014 unsigned long first_index = 0;
1015 int done = 0;
Dave Chinnere3a20c02010-09-24 19:51:50 +10001016 int nr_found = 0;
Dave Chinner65d0f202010-09-24 18:40:15 +10001017
1018 ag = pag->pag_agno + 1;
1019
Dave Chinner0e8e2c62020-06-29 14:49:16 -07001020 first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
Dave Chinner65d0f202010-09-24 18:40:15 +10001021 do {
Dave Chinnere3a20c02010-09-24 19:51:50 +10001022 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
1023 int i;
Dave Chinner65d0f202010-09-24 18:40:15 +10001024
Dave Chinner1a3e8f32010-12-17 17:29:43 +11001025 rcu_read_lock();
Dave Chinnere3a20c02010-09-24 19:51:50 +10001026 nr_found = radix_tree_gang_lookup_tag(
1027 &pag->pag_ici_root,
1028 (void **)batch, first_index,
1029 XFS_LOOKUP_BATCH,
Dave Chinner65d0f202010-09-24 18:40:15 +10001030 XFS_ICI_RECLAIM_TAG);
1031 if (!nr_found) {
Dave Chinnerb2232212011-05-06 02:54:04 +00001032 done = 1;
Dave Chinner1a3e8f32010-12-17 17:29:43 +11001033 rcu_read_unlock();
Dave Chinner65d0f202010-09-24 18:40:15 +10001034 break;
1035 }
1036
1037 /*
Dave Chinnere3a20c02010-09-24 19:51:50 +10001038 * Grab the inodes before we drop the lock. if we found
1039 * nothing, nr == 0 and the loop will be skipped.
Dave Chinner65d0f202010-09-24 18:40:15 +10001040 */
Dave Chinnere3a20c02010-09-24 19:51:50 +10001041 for (i = 0; i < nr_found; i++) {
1042 struct xfs_inode *ip = batch[i];
Dave Chinner65d0f202010-09-24 18:40:15 +10001043
Dave Chinner50718b82020-07-01 10:21:05 -07001044 if (done || !xfs_reclaim_inode_grab(ip))
Dave Chinnere3a20c02010-09-24 19:51:50 +10001045 batch[i] = NULL;
Dave Chinner65d0f202010-09-24 18:40:15 +10001046
Dave Chinnere3a20c02010-09-24 19:51:50 +10001047 /*
1048 * Update the index for the next lookup. Catch
1049 * overflows into the next AG range which can
1050 * occur if we have inodes in the last block of
1051 * the AG and we are currently pointing to the
1052 * last inode.
Dave Chinner1a3e8f32010-12-17 17:29:43 +11001053 *
1054 * Because we may see inodes that are from the
1055 * wrong AG due to RCU freeing and
1056 * reallocation, only update the index if it
1057 * lies in this AG. It was a race that lead us
1058 * to see this inode, so another lookup from
1059 * the same index will not find it again.
Dave Chinnere3a20c02010-09-24 19:51:50 +10001060 */
Dave Chinner1a3e8f32010-12-17 17:29:43 +11001061 if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
1062 pag->pag_agno)
1063 continue;
Dave Chinnere3a20c02010-09-24 19:51:50 +10001064 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
1065 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
1066 done = 1;
1067 }
1068
1069 /* unlock now we've grabbed the inodes. */
Dave Chinner1a3e8f32010-12-17 17:29:43 +11001070 rcu_read_unlock();
Dave Chinnere3a20c02010-09-24 19:51:50 +10001071
1072 for (i = 0; i < nr_found; i++) {
Dave Chinner4d0bab32020-07-01 10:21:28 -07001073 if (batch[i])
1074 xfs_reclaim_inode(batch[i], pag);
Dave Chinnere3a20c02010-09-24 19:51:50 +10001075 }
1076
1077 *nr_to_scan -= XFS_LOOKUP_BATCH;
Dave Chinner8daaa832011-07-08 14:14:46 +10001078 cond_resched();
Dave Chinnere3a20c02010-09-24 19:51:50 +10001079 } while (nr_found && !done && *nr_to_scan > 0);
Dave Chinner65d0f202010-09-24 18:40:15 +10001080
Dave Chinner0e8e2c62020-06-29 14:49:16 -07001081 if (done)
1082 first_index = 0;
1083 WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
Dave Chinner65d0f202010-09-24 18:40:15 +10001084 xfs_perag_put(pag);
1085 }
Dave Chinner65d0f202010-09-24 18:40:15 +10001086}
1087
Dave Chinner4d0bab32020-07-01 10:21:28 -07001088void
David Chinner1dc33182008-10-30 17:37:15 +11001089xfs_reclaim_inodes(
Dave Chinner4d0bab32020-07-01 10:21:28 -07001090 struct xfs_mount *mp)
David Chinnerfce08f22008-10-30 17:37:03 +11001091{
Dave Chinner65d0f202010-09-24 18:40:15 +10001092 int nr_to_scan = INT_MAX;
1093
Dave Chinner4d0bab32020-07-01 10:21:28 -07001094 while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
Dave Chinner617825f2020-06-29 14:49:16 -07001095 xfs_ail_push_all_sync(mp->m_ail);
Dave Chinner4d0bab32020-07-01 10:21:28 -07001096 xfs_reclaim_inodes_ag(mp, &nr_to_scan);
Zheng Bin0f4ec0f12020-09-09 09:29:16 -07001097 }
Dave Chinner9bf729c2010-04-29 09:55:50 +10001098}
1099
1100/*
Dave Chinner02511a52020-06-29 14:49:18 -07001101 * The shrinker infrastructure determines how many inodes we should scan for
1102 * reclaim. We want as many clean inodes ready to reclaim as possible, so we
1103 * push the AIL here. We also want to proactively free up memory if we can to
1104 * minimise the amount of work memory reclaim has to do so we kick the
1105 * background reclaim if it isn't already scheduled.
Dave Chinner9bf729c2010-04-29 09:55:50 +10001106 */
Dave Chinner0a234c62013-08-28 10:17:57 +10001107long
Dave Chinner8daaa832011-07-08 14:14:46 +10001108xfs_reclaim_inodes_nr(
1109 struct xfs_mount *mp,
1110 int nr_to_scan)
Dave Chinner9bf729c2010-04-29 09:55:50 +10001111{
Dave Chinner8daaa832011-07-08 14:14:46 +10001112 /* kick background reclaimer and push the AIL */
Dave Chinner58896082012-10-08 21:56:05 +11001113 xfs_reclaim_work_queue(mp);
Dave Chinner8daaa832011-07-08 14:14:46 +10001114 xfs_ail_push_all(mp->m_ail);
Dave Chinner9bf729c2010-04-29 09:55:50 +10001115
Dave Chinner50718b82020-07-01 10:21:05 -07001116 xfs_reclaim_inodes_ag(mp, &nr_to_scan);
Dave Chinner617825f2020-06-29 14:49:16 -07001117 return 0;
Dave Chinner8daaa832011-07-08 14:14:46 +10001118}
Dave Chinnera7b339f2011-04-08 12:45:07 +10001119
Dave Chinner8daaa832011-07-08 14:14:46 +10001120/*
1121 * Return the number of reclaimable inodes in the filesystem for
1122 * the shrinker to determine how much to reclaim.
1123 */
1124int
1125xfs_reclaim_inodes_count(
1126 struct xfs_mount *mp)
1127{
1128 struct xfs_perag *pag;
1129 xfs_agnumber_t ag = 0;
1130 int reclaimable = 0;
Dave Chinner9bf729c2010-04-29 09:55:50 +10001131
Dave Chinner65d0f202010-09-24 18:40:15 +10001132 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
1133 ag = pag->pag_agno + 1;
Dave Chinner70e60ce2010-07-20 08:07:02 +10001134 reclaimable += pag->pag_ici_reclaimable;
1135 xfs_perag_put(pag);
Dave Chinner9bf729c2010-04-29 09:55:50 +10001136 }
Dave Chinner9bf729c2010-04-29 09:55:50 +10001137 return reclaimable;
1138}
1139
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001140STATIC bool
Brian Foster3e3f9f52012-11-07 12:21:13 -05001141xfs_inode_match_id(
1142 struct xfs_inode *ip,
1143 struct xfs_eofblocks *eofb)
1144{
Dwight Engenb9fe5052013-08-15 14:08:02 -04001145 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1146 !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001147 return false;
Brian Foster3e3f9f52012-11-07 12:21:13 -05001148
Dwight Engenb9fe5052013-08-15 14:08:02 -04001149 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1150 !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001151 return false;
Brian Foster1b556042012-11-06 09:50:45 -05001152
Dwight Engenb9fe5052013-08-15 14:08:02 -04001153 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
Christoph Hellwigceaf6032021-03-29 11:11:39 -07001154 ip->i_projid != eofb->eof_prid)
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001155 return false;
Brian Foster1b556042012-11-06 09:50:45 -05001156
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001157 return true;
Brian Foster3e3f9f52012-11-07 12:21:13 -05001158}
1159
Brian Fosterf4526392014-07-24 19:44:28 +10001160/*
1161 * A union-based inode filtering algorithm. Process the inode if any of the
1162 * criteria match. This is for global/internal scans only.
1163 */
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001164STATIC bool
Brian Fosterf4526392014-07-24 19:44:28 +10001165xfs_inode_match_id_union(
1166 struct xfs_inode *ip,
1167 struct xfs_eofblocks *eofb)
1168{
1169 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1170 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001171 return true;
Brian Fosterf4526392014-07-24 19:44:28 +10001172
1173 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1174 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001175 return true;
Brian Fosterf4526392014-07-24 19:44:28 +10001176
1177 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
Christoph Hellwigceaf6032021-03-29 11:11:39 -07001178 ip->i_projid == eofb->eof_prid)
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001179 return true;
Brian Fosterf4526392014-07-24 19:44:28 +10001180
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001181 return false;
Brian Fosterf4526392014-07-24 19:44:28 +10001182}
1183
Darrick J. Wonga91bf992020-05-21 13:08:48 -07001184/*
1185 * Is this inode @ip eligible for eof/cow block reclamation, given some
1186 * filtering parameters @eofb? The inode is eligible if @eofb is null or
1187 * if the predicate functions match.
1188 */
1189static bool
1190xfs_inode_matches_eofb(
1191 struct xfs_inode *ip,
1192 struct xfs_eofblocks *eofb)
1193{
Darrick J. Wong39b1cfd2020-05-21 13:08:49 -07001194 bool match;
Darrick J. Wonga91bf992020-05-21 13:08:48 -07001195
1196 if (!eofb)
1197 return true;
1198
1199 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1200 match = xfs_inode_match_id_union(ip, eofb);
1201 else
1202 match = xfs_inode_match_id(ip, eofb);
1203 if (!match)
1204 return false;
1205
1206 /* skip the inode if the file size is too small */
1207 if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
1208 XFS_ISIZE(ip) < eofb->eof_min_file_size)
1209 return false;
1210
1211 return true;
1212}
1213
Dave Chinner4d0bab32020-07-01 10:21:28 -07001214/*
1215 * This is a fast pass over the inode cache to try to get reclaim moving on as
1216 * many inodes as possible in a short period of time. It kicks itself every few
1217 * seconds, as well as being kicked by the inode cache shrinker when memory
Dave Chinner02511a52020-06-29 14:49:18 -07001218 * goes low.
Dave Chinner4d0bab32020-07-01 10:21:28 -07001219 */
1220void
1221xfs_reclaim_worker(
1222 struct work_struct *work)
1223{
1224 struct xfs_mount *mp = container_of(to_delayed_work(work),
1225 struct xfs_mount, m_reclaim_work);
1226 int nr_to_scan = INT_MAX;
1227
1228 xfs_reclaim_inodes_ag(mp, &nr_to_scan);
1229 xfs_reclaim_work_queue(mp);
1230}
1231
Brian Foster3e3f9f52012-11-07 12:21:13 -05001232STATIC int
Brian Foster41176a62012-11-06 09:50:42 -05001233xfs_inode_free_eofblocks(
1234 struct xfs_inode *ip,
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001235 void *args,
1236 unsigned int *lockflags)
Brian Foster41176a62012-11-06 09:50:42 -05001237{
Darrick J. Wong390600f2020-05-21 13:08:48 -07001238 struct xfs_eofblocks *eofb = args;
1239 bool wait;
Darrick J. Wong390600f2020-05-21 13:08:48 -07001240
1241 wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC);
Brian Foster5400da72014-07-24 19:40:22 +10001242
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001243 if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS))
1244 return 0;
1245
Brian Foster41176a62012-11-06 09:50:42 -05001246 /*
1247 * If the mapping is dirty the operation can block and wait for some
1248 * time. Unless we are waiting, skip it.
1249 */
Darrick J. Wong390600f2020-05-21 13:08:48 -07001250 if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
Brian Foster41176a62012-11-06 09:50:42 -05001251 return 0;
1252
Darrick J. Wonga91bf992020-05-21 13:08:48 -07001253 if (!xfs_inode_matches_eofb(ip, eofb))
1254 return 0;
Brian Foster3e3f9f52012-11-07 12:21:13 -05001255
Brian Fostera36b9262017-01-27 23:22:55 -08001256 /*
1257 * If the caller is waiting, return -EAGAIN to keep the background
1258 * scanner moving and revisit the inode in a subsequent pass.
1259 */
Brian Fosterc3155092017-01-27 23:22:56 -08001260 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
Darrick J. Wong390600f2020-05-21 13:08:48 -07001261 if (wait)
1262 return -EAGAIN;
1263 return 0;
Brian Fostera36b9262017-01-27 23:22:55 -08001264 }
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001265 *lockflags |= XFS_IOLOCK_EXCL;
Darrick J. Wong390600f2020-05-21 13:08:48 -07001266
Darrick J. Wong2b156ff2021-03-23 16:59:31 -07001267 if (xfs_can_free_eofblocks(ip, false))
1268 return xfs_free_eofblocks(ip);
1269
1270 /* inode could be preallocated or append-only */
1271 trace_xfs_inode_free_eofblocks_invalid(ip);
1272 xfs_inode_clear_eofblocks_tag(ip);
1273 return 0;
Brian Foster41176a62012-11-06 09:50:42 -05001274}
1275
Darrick J. Wongf9296562021-01-22 16:48:39 -08001276/*
Darrick J. Wong9669f512021-01-22 16:48:43 -08001277 * Background scanning to trim preallocated space. This is queued based on the
1278 * 'speculative_prealloc_lifetime' tunable (5m by default).
Darrick J. Wongf9296562021-01-22 16:48:39 -08001279 */
Darrick J. Wong9669f512021-01-22 16:48:43 -08001280static inline void
1281xfs_blockgc_queue(
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001282 struct xfs_perag *pag)
Darrick J. Wongf9296562021-01-22 16:48:39 -08001283{
1284 rcu_read_lock();
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001285 if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG))
Darrick J. Wong3fef46f2021-03-22 09:51:55 -07001286 queue_delayed_work(pag->pag_mount->m_gc_workqueue,
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001287 &pag->pag_blockgc_work,
Darrick J. Wong9669f512021-01-22 16:48:43 -08001288 msecs_to_jiffies(xfs_blockgc_secs * 1000));
Darrick J. Wongf9296562021-01-22 16:48:39 -08001289 rcu_read_unlock();
1290}
1291
Darrick J. Wong83104d42016-10-03 09:11:46 -07001292static void
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001293xfs_blockgc_set_iflag(
1294 struct xfs_inode *ip,
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001295 unsigned long iflag)
Brian Foster27b52862012-11-06 09:50:38 -05001296{
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001297 struct xfs_mount *mp = ip->i_mount;
1298 struct xfs_perag *pag;
1299 int tagged;
1300
1301 ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0);
Brian Foster27b52862012-11-06 09:50:38 -05001302
Christoph Hellwig85a6e762016-09-19 11:09:48 +10001303 /*
1304 * Don't bother locking the AG and looking up in the radix trees
1305 * if we already know that we have the tag set.
1306 */
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001307 if (ip->i_flags & iflag)
Christoph Hellwig85a6e762016-09-19 11:09:48 +10001308 return;
1309 spin_lock(&ip->i_flags_lock);
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001310 ip->i_flags |= iflag;
Christoph Hellwig85a6e762016-09-19 11:09:48 +10001311 spin_unlock(&ip->i_flags_lock);
1312
Brian Foster27b52862012-11-06 09:50:38 -05001313 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
1314 spin_lock(&pag->pag_ici_lock);
Brian Foster27b52862012-11-06 09:50:38 -05001315
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001316 tagged = radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG);
Brian Foster27b52862012-11-06 09:50:38 -05001317 radix_tree_tag_set(&pag->pag_ici_root,
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001318 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
1319 XFS_ICI_BLOCKGC_TAG);
Brian Foster27b52862012-11-06 09:50:38 -05001320 if (!tagged) {
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001321 /* propagate the blockgc tag up into the perag radix tree */
Brian Foster27b52862012-11-06 09:50:38 -05001322 spin_lock(&ip->i_mount->m_perag_lock);
1323 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
1324 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001325 XFS_ICI_BLOCKGC_TAG);
Brian Foster27b52862012-11-06 09:50:38 -05001326 spin_unlock(&ip->i_mount->m_perag_lock);
1327
Brian Foster579b62f2012-11-06 09:50:47 -05001328 /* kick off background trimming */
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001329 xfs_blockgc_queue(pag);
Brian Foster579b62f2012-11-06 09:50:47 -05001330
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001331 trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1,
1332 _RET_IP_);
Brian Foster27b52862012-11-06 09:50:38 -05001333 }
1334
1335 spin_unlock(&pag->pag_ici_lock);
1336 xfs_perag_put(pag);
1337}
1338
1339void
Darrick J. Wong83104d42016-10-03 09:11:46 -07001340xfs_inode_set_eofblocks_tag(
Brian Foster27b52862012-11-06 09:50:38 -05001341 xfs_inode_t *ip)
1342{
Darrick J. Wong83104d42016-10-03 09:11:46 -07001343 trace_xfs_inode_set_eofblocks_tag(ip);
Darrick J. Wong9669f512021-01-22 16:48:43 -08001344 return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS);
Darrick J. Wong83104d42016-10-03 09:11:46 -07001345}
1346
1347static void
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001348xfs_blockgc_clear_iflag(
1349 struct xfs_inode *ip,
1350 unsigned long iflag)
Darrick J. Wong83104d42016-10-03 09:11:46 -07001351{
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001352 struct xfs_mount *mp = ip->i_mount;
1353 struct xfs_perag *pag;
1354 bool clear_tag;
1355
1356 ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0);
Brian Foster27b52862012-11-06 09:50:38 -05001357
Christoph Hellwig85a6e762016-09-19 11:09:48 +10001358 spin_lock(&ip->i_flags_lock);
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001359 ip->i_flags &= ~iflag;
1360 clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0;
Christoph Hellwig85a6e762016-09-19 11:09:48 +10001361 spin_unlock(&ip->i_flags_lock);
1362
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001363 if (!clear_tag)
1364 return;
1365
Brian Foster27b52862012-11-06 09:50:38 -05001366 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
1367 spin_lock(&pag->pag_ici_lock);
Brian Foster27b52862012-11-06 09:50:38 -05001368
1369 radix_tree_tag_clear(&pag->pag_ici_root,
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001370 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
1371 XFS_ICI_BLOCKGC_TAG);
1372 if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) {
1373 /* clear the blockgc tag from the perag radix tree */
Brian Foster27b52862012-11-06 09:50:38 -05001374 spin_lock(&ip->i_mount->m_perag_lock);
1375 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
1376 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001377 XFS_ICI_BLOCKGC_TAG);
Brian Foster27b52862012-11-06 09:50:38 -05001378 spin_unlock(&ip->i_mount->m_perag_lock);
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001379 trace_xfs_perag_clear_blockgc(ip->i_mount, pag->pag_agno, -1,
1380 _RET_IP_);
Brian Foster27b52862012-11-06 09:50:38 -05001381 }
1382
1383 spin_unlock(&pag->pag_ici_lock);
1384 xfs_perag_put(pag);
1385}
1386
Darrick J. Wong83104d42016-10-03 09:11:46 -07001387void
1388xfs_inode_clear_eofblocks_tag(
1389 xfs_inode_t *ip)
1390{
1391 trace_xfs_inode_clear_eofblocks_tag(ip);
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001392 return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS);
Darrick J. Wong83104d42016-10-03 09:11:46 -07001393}
1394
1395/*
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001396 * Set ourselves up to free CoW blocks from this file. If it's already clean
1397 * then we can bail out quickly, but otherwise we must back off if the file
1398 * is undergoing some kind of write.
1399 */
1400static bool
1401xfs_prep_free_cowblocks(
Christoph Hellwig51d62692018-07-17 16:51:51 -07001402 struct xfs_inode *ip)
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001403{
1404 /*
1405 * Just clear the tag if we have an empty cow fork or none at all. It's
1406 * possible the inode was fully unshared since it was originally tagged.
1407 */
Christoph Hellwig51d62692018-07-17 16:51:51 -07001408 if (!xfs_inode_has_cow_data(ip)) {
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001409 trace_xfs_inode_free_cowblocks_invalid(ip);
1410 xfs_inode_clear_cowblocks_tag(ip);
1411 return false;
1412 }
1413
1414 /*
1415 * If the mapping is dirty or under writeback we cannot touch the
1416 * CoW fork. Leave it alone if we're in the midst of a directio.
1417 */
1418 if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
1419 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
1420 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
1421 atomic_read(&VFS_I(ip)->i_dio_count))
1422 return false;
1423
1424 return true;
1425}
1426
1427/*
Darrick J. Wong83104d42016-10-03 09:11:46 -07001428 * Automatic CoW Reservation Freeing
1429 *
1430 * These functions automatically garbage collect leftover CoW reservations
1431 * that were made on behalf of a cowextsize hint when we start to run out
1432 * of quota or when the reservations sit around for too long. If the file
1433 * has dirty pages or is undergoing writeback, its CoW reservations will
1434 * be retained.
1435 *
1436 * The actual garbage collection piggybacks off the same code that runs
1437 * the speculative EOF preallocation garbage collector.
1438 */
1439STATIC int
1440xfs_inode_free_cowblocks(
1441 struct xfs_inode *ip,
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001442 void *args,
1443 unsigned int *lockflags)
Darrick J. Wong83104d42016-10-03 09:11:46 -07001444{
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001445 struct xfs_eofblocks *eofb = args;
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001446 bool wait;
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001447 int ret = 0;
Darrick J. Wong83104d42016-10-03 09:11:46 -07001448
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001449 wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC);
1450
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001451 if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
1452 return 0;
1453
Christoph Hellwig51d62692018-07-17 16:51:51 -07001454 if (!xfs_prep_free_cowblocks(ip))
Darrick J. Wong83104d42016-10-03 09:11:46 -07001455 return 0;
1456
Darrick J. Wonga91bf992020-05-21 13:08:48 -07001457 if (!xfs_inode_matches_eofb(ip, eofb))
1458 return 0;
Darrick J. Wong83104d42016-10-03 09:11:46 -07001459
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001460 /*
1461 * If the caller is waiting, return -EAGAIN to keep the background
1462 * scanner moving and revisit the inode in a subsequent pass.
1463 */
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001464 if (!(*lockflags & XFS_IOLOCK_EXCL) &&
1465 !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001466 if (wait)
1467 return -EAGAIN;
1468 return 0;
1469 }
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001470 *lockflags |= XFS_IOLOCK_EXCL;
1471
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001472 if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) {
1473 if (wait)
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001474 return -EAGAIN;
1475 return 0;
Darrick J. Wongf41a0712021-01-22 16:48:35 -08001476 }
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001477 *lockflags |= XFS_MMAPLOCK_EXCL;
Darrick J. Wong83104d42016-10-03 09:11:46 -07001478
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001479 /*
1480 * Check again, nobody else should be able to dirty blocks or change
1481 * the reflink iflag now that we have the first two locks held.
1482 */
Christoph Hellwig51d62692018-07-17 16:51:51 -07001483 if (xfs_prep_free_cowblocks(ip))
Darrick J. Wongbe78ff02018-01-16 19:03:59 -08001484 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
Darrick J. Wong83104d42016-10-03 09:11:46 -07001485 return ret;
1486}
1487
Darrick J. Wong83104d42016-10-03 09:11:46 -07001488void
1489xfs_inode_set_cowblocks_tag(
1490 xfs_inode_t *ip)
1491{
Brian Foster7b7381f2016-10-24 14:21:00 +11001492 trace_xfs_inode_set_cowblocks_tag(ip);
Darrick J. Wong9669f512021-01-22 16:48:43 -08001493 return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS);
Darrick J. Wong83104d42016-10-03 09:11:46 -07001494}
1495
1496void
1497xfs_inode_clear_cowblocks_tag(
1498 xfs_inode_t *ip)
1499{
Brian Foster7b7381f2016-10-24 14:21:00 +11001500 trace_xfs_inode_clear_cowblocks_tag(ip);
Darrick J. Wongce2d3bb2021-01-22 16:48:43 -08001501 return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS);
Darrick J. Wong83104d42016-10-03 09:11:46 -07001502}
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001503
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001504#define for_each_perag_tag(mp, next_agno, pag, tag) \
1505 for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
1506 (pag) != NULL; \
1507 (next_agno) = (pag)->pag_agno + 1, \
1508 xfs_perag_put(pag), \
1509 (pag) = xfs_perag_get_tag((mp), (next_agno), (tag)))
1510
1511
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001512/* Disable post-EOF and CoW block auto-reclamation. */
1513void
Darrick J. Wongc9a65262021-01-22 16:48:44 -08001514xfs_blockgc_stop(
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001515 struct xfs_mount *mp)
1516{
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001517 struct xfs_perag *pag;
1518 xfs_agnumber_t agno;
1519
1520 for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
1521 cancel_delayed_work_sync(&pag->pag_blockgc_work);
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001522}
1523
1524/* Enable post-EOF and CoW block auto-reclamation. */
1525void
Darrick J. Wongc9a65262021-01-22 16:48:44 -08001526xfs_blockgc_start(
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001527 struct xfs_mount *mp)
1528{
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001529 struct xfs_perag *pag;
1530 xfs_agnumber_t agno;
1531
1532 for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
1533 xfs_blockgc_queue(pag);
Darrick J. Wongd6b636e2018-05-09 10:03:56 -07001534}
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001535
Darrick J. Wongd20d5ed2021-06-01 23:01:44 -07001536/* Don't try to run block gc on an inode that's in any of these states. */
1537#define XFS_BLOCKGC_NOGRAB_IFLAGS (XFS_INEW | \
1538 XFS_IRECLAIMABLE | \
1539 XFS_IRECLAIM)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001540/*
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001541 * Decide if the given @ip is eligible for garbage collection of speculative
1542 * preallocations, and grab it if so. Returns true if it's ready to go or
1543 * false if we should just ignore it.
Darrick J. Wongdf600192021-06-01 13:29:41 -07001544 */
1545static bool
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001546xfs_blockgc_igrab(
Darrick J. Wong7fdff522021-05-31 11:31:59 -07001547 struct xfs_inode *ip)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001548{
1549 struct inode *inode = VFS_I(ip);
Darrick J. Wongdf600192021-06-01 13:29:41 -07001550
1551 ASSERT(rcu_read_lock_held());
1552
1553 /* Check for stale RCU freed inode */
1554 spin_lock(&ip->i_flags_lock);
1555 if (!ip->i_ino)
1556 goto out_unlock_noent;
1557
Darrick J. Wongd20d5ed2021-06-01 23:01:44 -07001558 if (ip->i_flags & XFS_BLOCKGC_NOGRAB_IFLAGS)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001559 goto out_unlock_noent;
1560 spin_unlock(&ip->i_flags_lock);
1561
1562 /* nothing to sync during shutdown */
1563 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1564 return false;
1565
1566 /* If we can't grab the inode, it must on it's way to reclaim. */
1567 if (!igrab(inode))
1568 return false;
1569
1570 /* inode is valid */
1571 return true;
1572
1573out_unlock_noent:
1574 spin_unlock(&ip->i_flags_lock);
1575 return false;
1576}
1577
Darrick J. Wong41956752021-01-22 16:48:43 -08001578/* Scan one incore inode for block preallocations that we can remove. */
1579static int
1580xfs_blockgc_scan_inode(
1581 struct xfs_inode *ip,
1582 void *args)
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001583{
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001584 unsigned int lockflags = 0;
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001585 int error;
1586
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001587 error = xfs_inode_free_eofblocks(ip, args, &lockflags);
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001588 if (error)
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001589 goto unlock;
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001590
Darrick J. Wong0fa4a102021-01-25 21:09:49 -08001591 error = xfs_inode_free_cowblocks(ip, args, &lockflags);
1592unlock:
1593 if (lockflags)
1594 xfs_iunlock(ip, lockflags);
1595 return error;
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001596}
1597
Darrick J. Wong9669f512021-01-22 16:48:43 -08001598/* Background worker that trims preallocated space. */
1599void
1600xfs_blockgc_worker(
1601 struct work_struct *work)
1602{
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001603 struct xfs_perag *pag = container_of(to_delayed_work(work),
1604 struct xfs_perag, pag_blockgc_work);
1605 struct xfs_mount *mp = pag->pag_mount;
Darrick J. Wong9669f512021-01-22 16:48:43 -08001606 int error;
1607
1608 if (!sb_start_write_trylock(mp->m_super))
1609 return;
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001610 error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL);
Darrick J. Wong9669f512021-01-22 16:48:43 -08001611 if (error)
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001612 xfs_info(mp, "AG %u preallocation gc worker failed, err=%d",
1613 pag->pag_agno, error);
Darrick J. Wong9669f512021-01-22 16:48:43 -08001614 sb_end_write(mp->m_super);
Darrick J. Wong894ecac2021-01-22 16:48:44 -08001615 xfs_blockgc_queue(pag);
Darrick J. Wong9669f512021-01-22 16:48:43 -08001616}
1617
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001618/*
1619 * Try to free space in the filesystem by purging eofblocks and cowblocks.
1620 */
1621int
1622xfs_blockgc_free_space(
1623 struct xfs_mount *mp,
1624 struct xfs_eofblocks *eofb)
1625{
1626 trace_xfs_blockgc_free_space(mp, eofb, _RET_IP_);
1627
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001628 return xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, eofb);
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001629}
1630
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001631/*
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001632 * Run cow/eofblocks scans on the supplied dquots. We don't know exactly which
1633 * quota caused an allocation failure, so we make a best effort by including
1634 * each quota under low free space conditions (less than 1% free space) in the
1635 * scan.
Darrick J. Wong111068f2021-01-22 16:48:36 -08001636 *
1637 * Callers must not hold any inode's ILOCK. If requesting a synchronous scan
1638 * (XFS_EOF_FLAGS_SYNC), the caller also must not hold any inode's IOLOCK or
1639 * MMAPLOCK.
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001640 */
Darrick J. Wong111068f2021-01-22 16:48:36 -08001641int
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001642xfs_blockgc_free_dquots(
1643 struct xfs_mount *mp,
1644 struct xfs_dquot *udqp,
1645 struct xfs_dquot *gdqp,
1646 struct xfs_dquot *pdqp,
Darrick J. Wong111068f2021-01-22 16:48:36 -08001647 unsigned int eof_flags)
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001648{
1649 struct xfs_eofblocks eofb = {0};
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001650 bool do_work = false;
1651
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001652 if (!udqp && !gdqp && !pdqp)
1653 return 0;
1654
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001655 /*
Darrick J. Wong111068f2021-01-22 16:48:36 -08001656 * Run a scan to free blocks using the union filter to cover all
1657 * applicable quotas in a single scan.
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001658 */
Darrick J. Wong111068f2021-01-22 16:48:36 -08001659 eofb.eof_flags = XFS_EOF_FLAGS_UNION | eof_flags;
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001660
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001661 if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) {
1662 eofb.eof_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id);
1663 eofb.eof_flags |= XFS_EOF_FLAGS_UID;
1664 do_work = true;
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001665 }
1666
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001667 if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) {
1668 eofb.eof_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id);
1669 eofb.eof_flags |= XFS_EOF_FLAGS_GID;
1670 do_work = true;
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001671 }
1672
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001673 if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) {
1674 eofb.eof_prid = pdqp->q_id;
1675 eofb.eof_flags |= XFS_EOF_FLAGS_PRID;
1676 do_work = true;
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001677 }
1678
1679 if (!do_work)
Darrick J. Wong111068f2021-01-22 16:48:36 -08001680 return 0;
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001681
Darrick J. Wong85c5b272021-01-22 16:48:39 -08001682 return xfs_blockgc_free_space(mp, &eofb);
Darrick J. Wongc237dd72021-01-22 16:48:37 -08001683}
1684
1685/* Run cow/eofblocks scans on the quotas attached to the inode. */
1686int
1687xfs_blockgc_free_quota(
1688 struct xfs_inode *ip,
1689 unsigned int eof_flags)
1690{
1691 return xfs_blockgc_free_dquots(ip->i_mount,
1692 xfs_inode_dquot(ip, XFS_DQTYPE_USER),
1693 xfs_inode_dquot(ip, XFS_DQTYPE_GROUP),
1694 xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), eof_flags);
Darrick J. Wong3d4feec2021-01-22 16:48:36 -08001695}
Darrick J. Wongdf600192021-06-01 13:29:41 -07001696
1697/* XFS Inode Cache Walking Code */
1698
1699/*
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001700 * Decide if we want to grab this inode in anticipation of doing work towards
1701 * the goal. If selected, the VFS must hold a reference to this inode, which
1702 * will be released after processing.
1703 */
1704static inline bool
1705xfs_icwalk_igrab(
1706 enum xfs_icwalk_goal goal,
Darrick J. Wong7fdff522021-05-31 11:31:59 -07001707 struct xfs_inode *ip)
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001708{
1709 switch (goal) {
1710 case XFS_ICWALK_DQRELE:
1711 return xfs_dqrele_igrab(ip);
1712 case XFS_ICWALK_BLOCKGC:
Darrick J. Wong7fdff522021-05-31 11:31:59 -07001713 return xfs_blockgc_igrab(ip);
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001714 default:
1715 return false;
1716 }
1717}
1718
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001719/* Process an inode and release it. Return -EAGAIN to skip an inode. */
1720static inline int
1721xfs_icwalk_process_inode(
1722 enum xfs_icwalk_goal goal,
1723 struct xfs_inode *ip,
1724 void *args)
1725{
1726 int error;
1727
1728 switch (goal) {
1729 case XFS_ICWALK_DQRELE:
1730 error = xfs_dqrele_inode(ip, args);
1731 break;
1732 case XFS_ICWALK_BLOCKGC:
1733 error = xfs_blockgc_scan_inode(ip, args);
1734 break;
1735 }
1736 xfs_irele(ip);
1737 return error;
1738}
1739
Darrick J. Wongb9baaef2021-05-31 11:31:58 -07001740/*
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001741 * For a given per-AG structure @pag and a goal, grab qualifying inodes and
1742 * process them in some manner.
Darrick J. Wongdf600192021-06-01 13:29:41 -07001743 */
1744static int
Darrick J. Wongc1115c02021-06-01 22:41:25 -07001745xfs_icwalk_ag(
Darrick J. Wongdf600192021-06-01 13:29:41 -07001746 struct xfs_perag *pag,
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001747 enum xfs_icwalk_goal goal,
1748 void *args)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001749{
1750 struct xfs_mount *mp = pag->pag_mount;
1751 uint32_t first_index;
1752 int last_error = 0;
1753 int skipped;
1754 bool done;
1755 int nr_found;
1756
1757restart:
1758 done = false;
1759 skipped = 0;
1760 first_index = 0;
1761 nr_found = 0;
1762 do {
1763 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
Darrick J. Wongc809d7e2021-06-01 13:49:52 -07001764 unsigned int tag = xfs_icwalk_tag(goal);
Darrick J. Wongdf600192021-06-01 13:29:41 -07001765 int error = 0;
1766 int i;
1767
1768 rcu_read_lock();
1769
Darrick J. Wongc809d7e2021-06-01 13:49:52 -07001770 if (tag == XFS_ICWALK_NULL_TAG)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001771 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
1772 (void **)batch, first_index,
1773 XFS_LOOKUP_BATCH);
1774 else
1775 nr_found = radix_tree_gang_lookup_tag(
1776 &pag->pag_ici_root,
1777 (void **) batch, first_index,
1778 XFS_LOOKUP_BATCH, tag);
1779
1780 if (!nr_found) {
1781 rcu_read_unlock();
1782 break;
1783 }
1784
1785 /*
1786 * Grab the inodes before we drop the lock. if we found
1787 * nothing, nr == 0 and the loop will be skipped.
1788 */
1789 for (i = 0; i < nr_found; i++) {
1790 struct xfs_inode *ip = batch[i];
1791
Darrick J. Wong7fdff522021-05-31 11:31:59 -07001792 if (done || !xfs_icwalk_igrab(goal, ip))
Darrick J. Wongdf600192021-06-01 13:29:41 -07001793 batch[i] = NULL;
1794
1795 /*
1796 * Update the index for the next lookup. Catch
1797 * overflows into the next AG range which can occur if
1798 * we have inodes in the last block of the AG and we
1799 * are currently pointing to the last inode.
1800 *
1801 * Because we may see inodes that are from the wrong AG
1802 * due to RCU freeing and reallocation, only update the
1803 * index if it lies in this AG. It was a race that lead
1804 * us to see this inode, so another lookup from the
1805 * same index will not find it again.
1806 */
1807 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
1808 continue;
1809 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
1810 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
1811 done = true;
1812 }
1813
1814 /* unlock now we've grabbed the inodes. */
1815 rcu_read_unlock();
1816
1817 for (i = 0; i < nr_found; i++) {
1818 if (!batch[i])
1819 continue;
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001820 error = xfs_icwalk_process_inode(goal, batch[i], args);
Darrick J. Wongdf600192021-06-01 13:29:41 -07001821 if (error == -EAGAIN) {
1822 skipped++;
1823 continue;
1824 }
1825 if (error && last_error != -EFSCORRUPTED)
1826 last_error = error;
1827 }
1828
1829 /* bail out if the filesystem is corrupted. */
1830 if (error == -EFSCORRUPTED)
1831 break;
1832
1833 cond_resched();
1834
1835 } while (nr_found && !done);
1836
1837 if (skipped) {
1838 delay(1);
1839 goto restart;
1840 }
1841 return last_error;
1842}
1843
1844/* Fetch the next (possibly tagged) per-AG structure. */
1845static inline struct xfs_perag *
Darrick J. Wongc1115c02021-06-01 22:41:25 -07001846xfs_icwalk_get_perag(
Darrick J. Wongdf600192021-06-01 13:29:41 -07001847 struct xfs_mount *mp,
1848 xfs_agnumber_t agno,
Darrick J. Wongc809d7e2021-06-01 13:49:52 -07001849 enum xfs_icwalk_goal goal)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001850{
Darrick J. Wongc809d7e2021-06-01 13:49:52 -07001851 unsigned int tag = xfs_icwalk_tag(goal);
1852
1853 if (tag == XFS_ICWALK_NULL_TAG)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001854 return xfs_perag_get(mp, agno);
1855 return xfs_perag_get_tag(mp, agno, tag);
1856}
1857
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001858/* Walk all incore inodes to achieve a given goal. */
Darrick J. Wongdf600192021-06-01 13:29:41 -07001859static int
Darrick J. Wongc1115c02021-06-01 22:41:25 -07001860xfs_icwalk(
Darrick J. Wongdf600192021-06-01 13:29:41 -07001861 struct xfs_mount *mp,
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001862 enum xfs_icwalk_goal goal,
1863 void *args)
Darrick J. Wongdf600192021-06-01 13:29:41 -07001864{
1865 struct xfs_perag *pag;
1866 int error = 0;
1867 int last_error = 0;
1868 xfs_agnumber_t agno = 0;
1869
Darrick J. Wongc809d7e2021-06-01 13:49:52 -07001870 while ((pag = xfs_icwalk_get_perag(mp, agno, goal))) {
Darrick J. Wongdf600192021-06-01 13:29:41 -07001871 agno = pag->pag_agno + 1;
Darrick J. Wongf427cf52021-05-31 11:32:00 -07001872 error = xfs_icwalk_ag(pag, goal, args);
Darrick J. Wongdf600192021-06-01 13:29:41 -07001873 xfs_perag_put(pag);
1874 if (error) {
1875 last_error = error;
1876 if (error == -EFSCORRUPTED)
1877 break;
1878 }
1879 }
1880 return last_error;
1881 BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_EOF_FLAGS_VALID);
1882}