blob: bed73e8002a51bc0bdebe2922faa43857438971f [file] [log] [blame]
Dave Chinner0b61f8a2018-06-05 19:42:14 -07001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
Nathan Scott7b718762005-11-02 14:58:39 +11003 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +11007#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +11008#include "xfs_shared.h"
Dave Chinner239880e2013-10-23 10:50:10 +11009#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110012#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include "xfs_sb.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include "xfs_inode.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110016#include "xfs_dir2.h"
Nathan Scotta844f452005-11-02 14:38:42 +110017#include "xfs_ialloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs_alloc.h"
19#include "xfs_rtalloc.h"
20#include "xfs_bmap.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_trans.h"
22#include "xfs_trans_priv.h"
23#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_error.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_quota.h"
26#include "xfs_fsops.h"
Dave Chinner6d8b79c2012-10-08 21:56:09 +110027#include "xfs_icache.h"
Brian Fostera31b1d32014-07-15 08:07:01 +100028#include "xfs_sysfs.h"
Darrick J. Wong035e00a2016-08-03 11:36:07 +100029#include "xfs_rmap_btree.h"
Darrick J. Wong1946b912016-10-03 09:11:18 -070030#include "xfs_refcount_btree.h"
Darrick J. Wong174edb02016-10-03 09:11:39 -070031#include "xfs_reflink.h"
Christoph Hellwigebf55872017-02-07 14:06:57 -080032#include "xfs_extent_busy.h"
Darrick J. Wong39353ff2019-04-12 07:41:15 -070033#include "xfs_health.h"
Darrick J. Wong13eaec42019-12-11 13:19:06 -080034#include "xfs_trace.h"
Dave Chinner9bbafc712021-06-02 10:48:24 +100035#include "xfs_ag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
Christoph Hellwig27174202009-03-30 10:21:31 +020037static DEFINE_MUTEX(xfs_uuid_table_mutex);
38static int xfs_uuid_table_size;
39static uuid_t *xfs_uuid_table;
40
Darrick J. Wongaf3b6382015-11-03 13:06:34 +110041void
42xfs_uuid_table_free(void)
43{
44 if (xfs_uuid_table_size == 0)
45 return;
46 kmem_free(xfs_uuid_table);
47 xfs_uuid_table = NULL;
48 xfs_uuid_table_size = 0;
49}
50
Christoph Hellwig27174202009-03-30 10:21:31 +020051/*
52 * See if the UUID is unique among mounted XFS filesystems.
53 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
54 */
55STATIC int
56xfs_uuid_mount(
57 struct xfs_mount *mp)
58{
59 uuid_t *uuid = &mp->m_sb.sb_uuid;
60 int hole, i;
61
Amir Goldstein8f720d92017-04-28 08:10:53 -070062 /* Publish UUID in struct super_block */
Christoph Hellwig85787092017-05-10 15:06:33 +020063 uuid_copy(&mp->m_super->s_uuid, uuid);
Amir Goldstein8f720d92017-04-28 08:10:53 -070064
Dave Chinner0560f312021-08-18 18:46:52 -070065 if (xfs_has_nouuid(mp))
Christoph Hellwig27174202009-03-30 10:21:31 +020066 return 0;
67
Amir Goldsteind905fda2017-05-04 16:26:23 +030068 if (uuid_is_null(uuid)) {
69 xfs_warn(mp, "Filesystem has null UUID - can't mount");
Dave Chinner24513372014-06-25 14:58:08 +100070 return -EINVAL;
Christoph Hellwig27174202009-03-30 10:21:31 +020071 }
72
73 mutex_lock(&xfs_uuid_table_mutex);
74 for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
Amir Goldsteind905fda2017-05-04 16:26:23 +030075 if (uuid_is_null(&xfs_uuid_table[i])) {
Christoph Hellwig27174202009-03-30 10:21:31 +020076 hole = i;
77 continue;
78 }
79 if (uuid_equal(uuid, &xfs_uuid_table[i]))
80 goto out_duplicate;
81 }
82
83 if (hole < 0) {
Carlos Maiolino771915c2020-08-26 14:05:56 -070084 xfs_uuid_table = krealloc(xfs_uuid_table,
Christoph Hellwig27174202009-03-30 10:21:31 +020085 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
Carlos Maiolino771915c2020-08-26 14:05:56 -070086 GFP_KERNEL | __GFP_NOFAIL);
Christoph Hellwig27174202009-03-30 10:21:31 +020087 hole = xfs_uuid_table_size++;
88 }
89 xfs_uuid_table[hole] = *uuid;
90 mutex_unlock(&xfs_uuid_table_mutex);
91
92 return 0;
93
94 out_duplicate:
95 mutex_unlock(&xfs_uuid_table_mutex);
Mitsuo Hayasaka021000e2012-01-13 05:58:39 +000096 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
Dave Chinner24513372014-06-25 14:58:08 +100097 return -EINVAL;
Christoph Hellwig27174202009-03-30 10:21:31 +020098}
99
100STATIC void
101xfs_uuid_unmount(
102 struct xfs_mount *mp)
103{
104 uuid_t *uuid = &mp->m_sb.sb_uuid;
105 int i;
106
Dave Chinner0560f312021-08-18 18:46:52 -0700107 if (xfs_has_nouuid(mp))
Christoph Hellwig27174202009-03-30 10:21:31 +0200108 return;
109
110 mutex_lock(&xfs_uuid_table_mutex);
111 for (i = 0; i < xfs_uuid_table_size; i++) {
Amir Goldsteind905fda2017-05-04 16:26:23 +0300112 if (uuid_is_null(&xfs_uuid_table[i]))
Christoph Hellwig27174202009-03-30 10:21:31 +0200113 continue;
114 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
115 continue;
116 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
117 break;
118 }
119 ASSERT(i < xfs_uuid_table_size);
120 mutex_unlock(&xfs_uuid_table_mutex);
121}
122
Nathan Scott4cc929e2007-05-14 18:24:02 +1000123/*
124 * Check size of device based on the (data/realtime) block count.
125 * Note: this check is used by the growfs code as well as mount.
126 */
127int
128xfs_sb_validate_fsb_count(
129 xfs_sb_t *sbp,
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700130 uint64_t nblocks)
Nathan Scott4cc929e2007-05-14 18:24:02 +1000131{
132 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
133 ASSERT(sbp->sb_blocklog >= BBSHIFT);
134
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000135 /* Limited by ULONG_MAX of page cache index */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300136 if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
Dave Chinner24513372014-06-25 14:58:08 +1000137 return -EFBIG;
Nathan Scott4cc929e2007-05-14 18:24:02 +1000138 return 0;
139}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141/*
142 * xfs_readsb
143 *
144 * Does the initial read of the superblock.
145 */
146int
Dave Chinnerff550682013-08-12 20:49:41 +1000147xfs_readsb(
148 struct xfs_mount *mp,
149 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150{
151 unsigned int sector_size;
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100152 struct xfs_buf *bp;
153 struct xfs_sb *sbp = &mp->m_sb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 int error;
Dave Chinneraf34e092011-03-07 10:04:35 +1100155 int loud = !(flags & XFS_MFSI_QUIET);
Eric Sandeendaba5422014-02-19 15:39:16 +1100156 const struct xfs_buf_ops *buf_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
158 ASSERT(mp->m_sb_bp == NULL);
159 ASSERT(mp->m_ddev_targp != NULL);
160
161 /*
Eric Sandeendaba5422014-02-19 15:39:16 +1100162 * For the initial read, we must guess at the sector
163 * size based on the block device. It's enough to
164 * get the sb_sectsize out of the superblock and
165 * then reread with the proper length.
166 * We don't verify it yet, because it may not be complete.
167 */
168 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
169 buf_ops = NULL;
170
171 /*
Brian Fosterc891c302016-07-20 11:13:43 +1000172 * Allocate a (locked) buffer to hold the superblock. This will be kept
173 * around at all times to optimize access to the superblock. Therefore,
174 * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
175 * elevated.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 */
Dave Chinner26af6552010-09-22 10:47:20 +1000177reread:
Dave Chinnerba3726742014-10-02 09:05:32 +1000178 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
Brian Fosterc891c302016-07-20 11:13:43 +1000179 BTOBB(sector_size), XBF_NO_IOACCT, &bp,
180 buf_ops);
Dave Chinnerba3726742014-10-02 09:05:32 +1000181 if (error) {
Dave Chinnereab4e632012-11-12 22:54:02 +1100182 if (loud)
Dave Chinnere721f502013-04-03 16:11:32 +1100183 xfs_warn(mp, "SB validate failed with error %d.", error);
Dave Chinnerac75a1f2014-03-07 16:19:14 +1100184 /* bad CRC means corrupted metadata */
Dave Chinner24513372014-06-25 14:58:08 +1000185 if (error == -EFSBADCRC)
186 error = -EFSCORRUPTED;
Dave Chinnerba3726742014-10-02 09:05:32 +1000187 return error;
Dave Chinnereab4e632012-11-12 22:54:02 +1100188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190 /*
191 * Initialize the mount structure from the superblock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 */
Christoph Hellwig3e6e8af2020-03-10 08:57:30 -0700193 xfs_sb_from_disk(sbp, bp->b_addr);
Dave Chinner556b8882014-06-06 16:00:43 +1000194
195 /*
196 * If we haven't validated the superblock, do so now before we try
197 * to check the sector size and reread the superblock appropriately.
198 */
199 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
200 if (loud)
201 xfs_warn(mp, "Invalid superblock magic number");
Dave Chinner24513372014-06-25 14:58:08 +1000202 error = -EINVAL;
Dave Chinner556b8882014-06-06 16:00:43 +1000203 goto release_buf;
204 }
Dave Chinnerff550682013-08-12 20:49:41 +1000205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 /*
207 * We must be able to do sector-sized and sector-aligned IO.
208 */
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100209 if (sector_size > sbp->sb_sectsize) {
Dave Chinneraf34e092011-03-07 10:04:35 +1100210 if (loud)
211 xfs_warn(mp, "device supports %u byte sectors (not %u)",
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100212 sector_size, sbp->sb_sectsize);
Dave Chinner24513372014-06-25 14:58:08 +1000213 error = -ENOSYS;
Dave Chinner26af6552010-09-22 10:47:20 +1000214 goto release_buf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 }
216
Eric Sandeendaba5422014-02-19 15:39:16 +1100217 if (buf_ops == NULL) {
Dave Chinner556b8882014-06-06 16:00:43 +1000218 /*
219 * Re-read the superblock so the buffer is correctly sized,
220 * and properly verified.
221 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 xfs_buf_relse(bp);
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100223 sector_size = sbp->sb_sectsize;
Eric Sandeendaba5422014-02-19 15:39:16 +1100224 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
Dave Chinner26af6552010-09-22 10:47:20 +1000225 goto reread;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 }
227
Dave Chinnera1d86e82021-08-18 18:46:26 -0700228 mp->m_features |= xfs_sb_version_to_features(sbp);
Dave Chinner5681ca42015-02-23 21:22:31 +1100229 xfs_reinit_percpu_counters(mp);
David Chinner8d280b92006-03-14 13:13:09 +1100230
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100231 /* no need to be quiet anymore, so reset the buf ops */
232 bp->b_ops = &xfs_sb_buf_ops;
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 mp->m_sb_bp = bp;
Dave Chinner26af6552010-09-22 10:47:20 +1000235 xfs_buf_unlock(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 return 0;
237
Dave Chinner26af6552010-09-22 10:47:20 +1000238release_buf:
239 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 return error;
241}
242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243/*
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800244 * If the sunit/swidth change would move the precomputed root inode value, we
245 * must reject the ondisk change because repair will stumble over that.
246 * However, we allow the mount to proceed because we never rejected this
247 * combination before. Returns true to update the sb, false otherwise.
248 */
249static inline int
250xfs_check_new_dalign(
251 struct xfs_mount *mp,
252 int new_dalign,
253 bool *update_sb)
254{
255 struct xfs_sb *sbp = &mp->m_sb;
256 xfs_ino_t calc_ino;
257
258 calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
259 trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
260
261 if (sbp->sb_rootino == calc_ino) {
262 *update_sb = true;
263 return 0;
264 }
265
266 xfs_warn(mp,
267"Cannot change stripe alignment; would require moving root inode.");
268
269 /*
270 * XXX: Next time we add a new incompat feature, this should start
271 * returning -EINVAL to fail the mount. Until then, spit out a warning
272 * that we're ignoring the administrator's instructions.
273 */
274 xfs_warn(mp, "Skipping superblock stripe alignment update.");
275 *update_sb = false;
276 return 0;
277}
278
279/*
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800280 * If we were provided with new sunit/swidth values as mount options, make sure
281 * that they pass basic alignment and superblock feature checks, and convert
282 * them into the same units (FSB) that everything else expects. This step
283 * /must/ be done before computing the inode geometry.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000285STATIC int
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800286xfs_validate_new_dalign(
287 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288{
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800289 if (mp->m_dalign == 0)
290 return 0;
291
292 /*
293 * If stripe unit and stripe width are not multiples
294 * of the fs blocksize turn off alignment.
295 */
296 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
297 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
298 xfs_warn(mp,
299 "alignment check failed: sunit/swidth vs. blocksize(%d)",
300 mp->m_sb.sb_blocksize);
301 return -EINVAL;
302 } else {
303 /*
304 * Convert the stripe unit and width to FSBs.
305 */
306 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
307 if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
308 xfs_warn(mp,
309 "alignment check failed: sunit/swidth vs. agsize(%d)",
310 mp->m_sb.sb_agblocks);
311 return -EINVAL;
312 } else if (mp->m_dalign) {
313 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
314 } else {
315 xfs_warn(mp,
316 "alignment check failed: sunit(%d) less than bsize(%d)",
317 mp->m_dalign, mp->m_sb.sb_blocksize);
318 return -EINVAL;
319 }
320 }
321
Dave Chinner38c26bf2021-08-18 18:46:37 -0700322 if (!xfs_has_dalign(mp)) {
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800323 xfs_warn(mp,
324"cannot change alignment: superblock does not support data alignment");
325 return -EINVAL;
326 }
327
328 return 0;
329}
330
331/* Update alignment values based on mount options and sb values. */
332STATIC int
333xfs_update_alignment(
334 struct xfs_mount *mp)
335{
336 struct xfs_sb *sbp = &mp->m_sb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Christoph Hellwig42490232008-08-13 16:49:32 +1000338 if (mp->m_dalign) {
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800339 bool update_sb;
340 int error;
341
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800342 if (sbp->sb_unit == mp->m_dalign &&
343 sbp->sb_width == mp->m_swidth)
344 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800346 error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
347 if (error || !update_sb)
348 return error;
349
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800350 sbp->sb_unit = mp->m_dalign;
351 sbp->sb_width = mp->m_swidth;
352 mp->m_update_sb = true;
Dave Chinner0560f312021-08-18 18:46:52 -0700353 } else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) {
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800354 mp->m_dalign = sbp->sb_unit;
355 mp->m_swidth = sbp->sb_width;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 }
357
Eric Sandeen0771fb42007-10-12 11:03:40 +1000358 return 0;
359}
360
361/*
Dave Chinner055388a2011-01-04 11:35:03 +1100362 * precalculate the low space thresholds for dynamic speculative preallocation.
363 */
364void
365xfs_set_low_space_thresholds(
366 struct xfs_mount *mp)
367{
Darrick J. Wong65f03d82021-08-06 11:05:41 -0700368 uint64_t dblocks = mp->m_sb.sb_dblocks;
369 uint64_t rtexts = mp->m_sb.sb_rextents;
370 int i;
371
372 do_div(dblocks, 100);
373 do_div(rtexts, 100);
Dave Chinner055388a2011-01-04 11:35:03 +1100374
375 for (i = 0; i < XFS_LOWSP_MAX; i++) {
Darrick J. Wong65f03d82021-08-06 11:05:41 -0700376 mp->m_low_space[i] = dblocks * (i + 1);
377 mp->m_low_rtexts[i] = rtexts * (i + 1);
Dave Chinner055388a2011-01-04 11:35:03 +1100378 }
379}
380
Eric Sandeen0771fb42007-10-12 11:03:40 +1000381/*
Zhi Yong Wu0471f622013-08-07 10:10:58 +0000382 * Check that the data (and log if separate) is an ok size.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000383 */
384STATIC int
Dave Chinnerba3726742014-10-02 09:05:32 +1000385xfs_check_sizes(
386 struct xfs_mount *mp)
Eric Sandeen0771fb42007-10-12 11:03:40 +1000387{
Dave Chinnerba3726742014-10-02 09:05:32 +1000388 struct xfs_buf *bp;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000389 xfs_daddr_t d;
Dave Chinnerba3726742014-10-02 09:05:32 +1000390 int error;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000391
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
393 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100394 xfs_warn(mp, "filesystem size mismatch detected");
Dave Chinner24513372014-06-25 14:58:08 +1000395 return -EFBIG;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 }
Dave Chinnerba3726742014-10-02 09:05:32 +1000397 error = xfs_buf_read_uncached(mp->m_ddev_targp,
Dave Chinner1922c942010-09-22 10:47:20 +1000398 d - XFS_FSS_TO_BB(mp, 1),
Dave Chinnerba3726742014-10-02 09:05:32 +1000399 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
400 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100401 xfs_warn(mp, "last sector read failed");
Dave Chinnerba3726742014-10-02 09:05:32 +1000402 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
Dave Chinner1922c942010-09-22 10:47:20 +1000404 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405
Dave Chinnerba3726742014-10-02 09:05:32 +1000406 if (mp->m_logdev_targp == mp->m_ddev_targp)
407 return 0;
408
409 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
410 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
411 xfs_warn(mp, "log size mismatch detected");
412 return -EFBIG;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 }
Dave Chinnerba3726742014-10-02 09:05:32 +1000414 error = xfs_buf_read_uncached(mp->m_logdev_targp,
415 d - XFS_FSB_TO_BB(mp, 1),
416 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
417 if (error) {
418 xfs_warn(mp, "log device read failed");
419 return error;
420 }
421 xfs_buf_relse(bp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000422 return 0;
423}
424
425/*
Christoph Hellwig7d095252009-06-08 15:33:32 +0200426 * Clear the quotaflags in memory and in the superblock.
427 */
428int
429xfs_mount_reset_sbqflags(
430 struct xfs_mount *mp)
431{
Christoph Hellwig7d095252009-06-08 15:33:32 +0200432 mp->m_qflags = 0;
433
Dave Chinner61e63ec2015-01-22 09:10:31 +1100434 /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
Christoph Hellwig7d095252009-06-08 15:33:32 +0200435 if (mp->m_sb.sb_qflags == 0)
436 return 0;
437 spin_lock(&mp->m_sb_lock);
438 mp->m_sb.sb_qflags = 0;
439 spin_unlock(&mp->m_sb_lock);
440
Dave Chinner61e63ec2015-01-22 09:10:31 +1100441 if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
Christoph Hellwig7d095252009-06-08 15:33:32 +0200442 return 0;
443
Dave Chinner61e63ec2015-01-22 09:10:31 +1100444 return xfs_sync_sb(mp, false);
Christoph Hellwig7d095252009-06-08 15:33:32 +0200445}
446
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700447uint64_t
Eric Sandeend5db0f92010-02-05 22:59:53 +0000448xfs_default_resblks(xfs_mount_t *mp)
449{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700450 uint64_t resblks;
Eric Sandeend5db0f92010-02-05 22:59:53 +0000451
452 /*
Dave Chinner8babd8a2010-03-04 01:46:25 +0000453 * We default to 5% or 8192 fsbs of space reserved, whichever is
454 * smaller. This is intended to cover concurrent allocation
455 * transactions when we initially hit enospc. These each require a 4
456 * block reservation. Hence by default we cover roughly 2000 concurrent
457 * allocation reservations.
Eric Sandeend5db0f92010-02-05 22:59:53 +0000458 */
459 resblks = mp->m_sb.sb_dblocks;
460 do_div(resblks, 20);
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700461 resblks = min_t(uint64_t, resblks, 8192);
Eric Sandeend5db0f92010-02-05 22:59:53 +0000462 return resblks;
463}
464
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700465/* Ensure the summary counts are correct. */
466STATIC int
467xfs_check_summary_counts(
468 struct xfs_mount *mp)
469{
470 /*
471 * The AG0 superblock verifier rejects in-progress filesystems,
472 * so we should never see the flag set this far into mounting.
473 */
474 if (mp->m_sb.sb_inprogress) {
475 xfs_err(mp, "sb_inprogress set after log recovery??");
476 WARN_ON(1);
477 return -EFSCORRUPTED;
478 }
479
480 /*
481 * Now the log is mounted, we know if it was an unclean shutdown or
482 * not. If it was, with the first phase of recovery has completed, we
483 * have consistent AG blocks on disk. We have not recovered EFIs yet,
484 * but they are recovered transactionally in the second recovery phase
485 * later.
486 *
487 * If the log was clean when we mounted, we can check the summary
488 * counters. If any of them are obviously incorrect, we can recompute
489 * them from the AGF headers in the next step.
490 */
Dave Chinner2e973b22021-08-18 18:46:52 -0700491 if (xfs_is_clean(mp) &&
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700492 (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
Darrick J. Wong00d22a12018-08-10 17:55:56 -0700493 !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700494 mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
Darrick J. Wong39353ff2019-04-12 07:41:15 -0700495 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700496
497 /*
498 * We can safely re-initialise incore superblock counters from the
499 * per-ag data. These may not be correct if the filesystem was not
500 * cleanly unmounted, so we waited for recovery to finish before doing
501 * this.
502 *
503 * If the filesystem was cleanly unmounted or the previous check did
504 * not flag anything weird, then we can trust the values in the
505 * superblock to be correct and we don't need to do anything here.
506 * Otherwise, recalculate the summary counters.
507 */
Dave Chinner2e973b22021-08-18 18:46:52 -0700508 if ((!xfs_has_lazysbcount(mp) || xfs_is_clean(mp)) &&
Darrick J. Wong39353ff2019-04-12 07:41:15 -0700509 !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700510 return 0;
511
512 return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
513}
514
Christoph Hellwig7d095252009-06-08 15:33:32 +0200515/*
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800516 * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
517 * internal inode structures can be sitting in the CIL and AIL at this point,
518 * so we need to unpin them, write them back and/or reclaim them before unmount
Dave Chinnerab23a772021-08-06 11:05:39 -0700519 * can proceed. In other words, callers are required to have inactivated all
520 * inodes.
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800521 *
522 * An inode cluster that has been freed can have its buffer still pinned in
523 * memory because the transaction is still sitting in a iclog. The stale inodes
524 * on that buffer will be pinned to the buffer until the transaction hits the
525 * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
526 * may never see the pinned buffer, so nothing will push out the iclog and
527 * unpin the buffer.
528 *
529 * Hence we need to force the log to unpin everything first. However, log
530 * forces don't wait for the discards they issue to complete, so we have to
531 * explicitly wait for them to complete here as well.
532 *
533 * Then we can tell the world we are unmounting so that error handling knows
534 * that the filesystem is going away and we should error out anything that we
535 * have been retrying in the background. This will prevent never-ending
536 * retries in AIL pushing from hanging the unmount.
537 *
538 * Finally, we can push the AIL to clean all the remaining dirty objects, then
539 * reclaim the remaining inodes that are still in memory at this point in time.
540 */
541static void
542xfs_unmount_flush_inodes(
543 struct xfs_mount *mp)
544{
545 xfs_log_force(mp, XFS_LOG_SYNC);
546 xfs_extent_busy_wait_all(mp);
547 flush_workqueue(xfs_discard_wq);
548
Dave Chinner2e973b22021-08-18 18:46:52 -0700549 set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate);
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800550
551 xfs_ail_push_all_sync(mp->m_ail);
Dave Chinnerab23a772021-08-06 11:05:39 -0700552 xfs_inodegc_stop(mp);
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800553 cancel_delayed_work_sync(&mp->m_reclaim_work);
554 xfs_reclaim_inodes(mp);
555 xfs_health_unmount(mp);
556}
557
Dave Chinnerb2941042021-04-06 07:03:24 -0700558static void
559xfs_mount_setup_inode_geom(
560 struct xfs_mount *mp)
561{
562 struct xfs_ino_geometry *igeo = M_IGEO(mp);
563
564 igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp);
565 ASSERT(igeo->attr_fork_offset < XFS_LITINO(mp));
566
567 xfs_ialloc_setup_geometry(mp);
568}
569
Darrick J. Wongb74e15d2021-09-16 12:27:34 -0700570/* Compute maximum possible height for per-AG btree types for this fs. */
571static inline void
572xfs_agbtree_compute_maxlevels(
573 struct xfs_mount *mp)
574{
575 unsigned int levels;
576
577 levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels);
578 levels = max(levels, mp->m_rmap_maxlevels);
579 mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
580}
581
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800582/*
Eric Sandeen0771fb42007-10-12 11:03:40 +1000583 * This function does the following on an initial mount of a file system:
584 * - reads the superblock from disk and init the mount struct
585 * - if we're a 32-bit kernel, do a size check on the superblock
586 * so we don't mount terabyte filesystems
587 * - init mount struct realtime fields
588 * - allocate inode hash table for fs
589 * - init directory manager
590 * - perform recovery and init the log manager
591 */
592int
593xfs_mountfs(
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000594 struct xfs_mount *mp)
Eric Sandeen0771fb42007-10-12 11:03:40 +1000595{
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000596 struct xfs_sb *sbp = &(mp->m_sb);
597 struct xfs_inode *rip;
Darrick J. Wongef325952019-06-05 11:19:34 -0700598 struct xfs_ino_geometry *igeo = M_IGEO(mp);
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700599 uint64_t resblks;
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000600 uint quotamount = 0;
601 uint quotaflags = 0;
602 int error = 0;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000603
Dave Chinnerff550682013-08-12 20:49:41 +1000604 xfs_sb_mount_common(mp, sbp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000605
606 /*
Dave Chinner074e4272015-01-22 09:10:33 +1100607 * Check for a mismatched features2 values. Older kernels read & wrote
608 * into the wrong sb offset for sb_features2 on some platforms due to
609 * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
610 * which made older superblock reading/writing routines swap it as a
611 * 64-bit value.
David Chinneree1c0902008-03-06 13:45:50 +1100612 *
Eric Sandeene6957ea42008-04-10 12:19:34 +1000613 * For backwards compatibility, we make both slots equal.
614 *
Dave Chinner074e4272015-01-22 09:10:33 +1100615 * If we detect a mismatched field, we OR the set bits into the existing
616 * features2 field in case it has already been modified; we don't want
617 * to lose any features. We then update the bad location with the ORed
618 * value so that older kernels will see any features2 flags. The
619 * superblock writeback code ensures the new sb_features2 is copied to
620 * sb_bad_features2 before it is logged or written to disk.
David Chinneree1c0902008-03-06 13:45:50 +1100621 */
Eric Sandeene6957ea42008-04-10 12:19:34 +1000622 if (xfs_sb_has_mismatched_features2(sbp)) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100623 xfs_warn(mp, "correcting sb_features alignment problem");
David Chinneree1c0902008-03-06 13:45:50 +1100624 sbp->sb_features2 |= sbp->sb_bad_features2;
Dave Chinner61e63ec2015-01-22 09:10:31 +1100625 mp->m_update_sb = true;
Tim Shimmin7c12f292008-04-30 18:15:28 +1000626 }
Eric Sandeene6957ea42008-04-10 12:19:34 +1000627
David Chinneree1c0902008-03-06 13:45:50 +1100628
Dave Chinner263997a2014-05-20 07:46:40 +1000629 /* always use v2 inodes by default now */
630 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
631 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
Dave Chinner38c26bf2021-08-18 18:46:37 -0700632 mp->m_features |= XFS_FEAT_NLINK;
Dave Chinner61e63ec2015-01-22 09:10:31 +1100633 mp->m_update_sb = true;
Dave Chinner263997a2014-05-20 07:46:40 +1000634 }
635
David Chinneree1c0902008-03-06 13:45:50 +1100636 /*
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800637 * If we were given new sunit/swidth options, do some basic validation
638 * checks and convert the incore dalign and swidth values to the
639 * same units (FSB) that everything else uses. This /must/ happen
640 * before computing the inode geometry.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000641 */
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800642 error = xfs_validate_new_dalign(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000643 if (error)
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100644 goto out;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000645
646 xfs_alloc_compute_maxlevels(mp);
647 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
648 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
Dave Chinnerb2941042021-04-06 07:03:24 -0700649 xfs_mount_setup_inode_geom(mp);
Darrick J. Wong035e00a2016-08-03 11:36:07 +1000650 xfs_rmapbt_compute_maxlevels(mp);
Darrick J. Wong1946b912016-10-03 09:11:18 -0700651 xfs_refcountbt_compute_maxlevels(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000652
Darrick J. Wongb74e15d2021-09-16 12:27:34 -0700653 xfs_agbtree_compute_maxlevels(mp);
654
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800655 /*
656 * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
657 * is NOT aligned turn off m_dalign since allocator alignment is within
658 * an ag, therefore ag has to be aligned at stripe boundary. Note that
659 * we must compute the free space and rmap btree geometry before doing
660 * this.
661 */
662 error = xfs_update_alignment(mp);
663 if (error)
664 goto out;
665
Carlos Maiolinoe6b3bb72016-05-18 11:11:27 +1000666 /* enable fail_at_unmount as default */
Thomas Meyer749f24f2017-10-09 11:38:54 -0700667 mp->m_fail_unmount = true;
Carlos Maiolinoe6b3bb72016-05-18 11:11:27 +1000668
Ian Kente1d3d212019-11-04 13:58:40 -0800669 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
670 NULL, mp->m_super->s_id);
Christoph Hellwig27174202009-03-30 10:21:31 +0200671 if (error)
672 goto out;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000673
Bill O'Donnell225e4632015-10-12 18:21:19 +1100674 error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
675 &mp->m_kobj, "stats");
Brian Fostera31b1d32014-07-15 08:07:01 +1000676 if (error)
677 goto out_remove_sysfs;
678
Carlos Maiolino192852b2016-05-18 10:58:51 +1000679 error = xfs_error_sysfs_init(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +1100680 if (error)
681 goto out_del_stats;
682
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700683 error = xfs_errortag_init(mp);
684 if (error)
685 goto out_remove_error_sysfs;
Carlos Maiolino192852b2016-05-18 10:58:51 +1000686
687 error = xfs_uuid_mount(mp);
688 if (error)
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700689 goto out_remove_errortag;
Carlos Maiolino192852b2016-05-18 10:58:51 +1000690
Eric Sandeen0771fb42007-10-12 11:03:40 +1000691 /*
Christoph Hellwig2fcddee2019-10-28 08:41:45 -0700692 * Update the preferred write size based on the information from the
693 * on-disk superblock.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000694 */
Christoph Hellwig2fcddee2019-10-28 08:41:45 -0700695 mp->m_allocsize_log =
696 max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
697 mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000698
Dave Chinner055388a2011-01-04 11:35:03 +1100699 /* set the low space thresholds for dynamic preallocation */
700 xfs_set_low_space_thresholds(mp);
701
Eric Sandeen0771fb42007-10-12 11:03:40 +1000702 /*
Brian Fostere5376fc2015-05-29 08:57:27 +1000703 * If enabled, sparse inode chunk alignment is expected to match the
704 * cluster size. Full inode chunk alignment must match the chunk size,
705 * but that is checked on sb read verification...
706 */
Dave Chinner38c26bf2021-08-18 18:46:37 -0700707 if (xfs_has_sparseinodes(mp) &&
Brian Fostere5376fc2015-05-29 08:57:27 +1000708 mp->m_sb.sb_spino_align !=
Darrick J. Wong490d4512019-06-05 11:19:35 -0700709 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
Brian Fostere5376fc2015-05-29 08:57:27 +1000710 xfs_warn(mp,
711 "Sparse inode block alignment (%u) must match cluster size (%llu).",
712 mp->m_sb.sb_spino_align,
Darrick J. Wong490d4512019-06-05 11:19:35 -0700713 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
Brian Fostere5376fc2015-05-29 08:57:27 +1000714 error = -EINVAL;
715 goto out_remove_uuid;
716 }
717
718 /*
Zhi Yong Wuc2bfbc92013-08-12 03:15:03 +0000719 * Check that the data (and log if separate) is an ok size.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000720 */
Christoph Hellwig42490232008-08-13 16:49:32 +1000721 error = xfs_check_sizes(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000722 if (error)
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100723 goto out_remove_uuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
725 /*
726 * Initialize realtime fields in the mount structure
727 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000728 error = xfs_rtmount_init(mp);
729 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100730 xfs_warn(mp, "RT mount failed");
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100731 goto out_remove_uuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733
734 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 * Copies the low order bits of the timestamp and the randomly
736 * set "sequence" number out of a UUID.
737 */
Christoph Hellwigcb0ba6c2017-05-05 09:39:10 +0200738 mp->m_fixedfsid[0] =
739 (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
740 get_unaligned_be16(&sbp->sb_uuid.b[4]);
741 mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
Dave Chinner0650b552014-06-06 15:01:58 +1000743 error = xfs_da_mount(mp);
744 if (error) {
745 xfs_warn(mp, "Failed dir/attr init: %d", error);
746 goto out_remove_uuid;
747 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
749 /*
750 * Initialize the precomputed transaction reservations values.
751 */
752 xfs_trans_init(mp);
753
754 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 * Allocate and initialize the per-ag data.
756 */
Dave Chinner1c1c6eb2010-01-11 11:47:44 +0000757 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
758 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100759 xfs_warn(mp, "Failed per-ag init: %d", error);
Dave Chinner0650b552014-06-06 15:01:58 +1000760 goto out_free_dir;
Dave Chinner1c1c6eb2010-01-11 11:47:44 +0000761 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Darrick J. Wonga71895c2019-11-11 12:53:22 -0800763 if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100764 xfs_warn(mp, "no log defined");
Dave Chinner24513372014-06-25 14:58:08 +1000765 error = -EFSCORRUPTED;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100766 goto out_free_perag;
767 }
768
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700769 error = xfs_inodegc_register_shrinker(mp);
770 if (error)
771 goto out_fail_wait;
772
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 /*
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000774 * Log's mount-time initialization. The first part of recovery can place
775 * some items on the AIL, to be handled when recovery is finished or
776 * cancelled.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 */
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100778 error = xfs_log_mount(mp, mp->m_logdev_targp,
779 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
780 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
781 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100782 xfs_warn(mp, "log mount failed");
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700783 goto out_inodegc_shrinker;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 }
785
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700786 /* Make sure the summary counts are ok. */
787 error = xfs_check_summary_counts(mp);
788 if (error)
789 goto out_log_dealloc;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100790
Dave Chinnerab23a772021-08-06 11:05:39 -0700791 /* Enable background inode inactivation workers. */
792 xfs_inodegc_start(mp);
Darrick J. Wong6f649092021-08-06 11:05:42 -0700793 xfs_blockgc_start(mp);
Dave Chinnerab23a772021-08-06 11:05:39 -0700794
David Chinner92821e22007-05-24 15:26:31 +1000795 /*
Dave Chinnere23b55d2021-08-18 18:46:25 -0700796 * Now that we've recovered any pending superblock feature bit
797 * additions, we can finish setting up the attr2 behaviour for the
Dave Chinner0560f312021-08-18 18:46:52 -0700798 * mount. The noattr2 option overrides the superblock flag, so only
799 * check the superblock feature flag if the mount option is not set.
Dave Chinnere23b55d2021-08-18 18:46:25 -0700800 */
Dave Chinner0560f312021-08-18 18:46:52 -0700801 if (xfs_has_noattr2(mp)) {
802 mp->m_features &= ~XFS_FEAT_ATTR2;
803 } else if (!xfs_has_attr2(mp) &&
804 (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
805 mp->m_features |= XFS_FEAT_ATTR2;
806 }
Dave Chinnere23b55d2021-08-18 18:46:25 -0700807
808 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 * Get and sanity-check the root inode.
810 * Save the pointer to it in the mount structure.
811 */
Dave Chinner541b5ac2018-06-05 10:09:33 -0700812 error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
813 XFS_ILOCK_EXCL, &rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 if (error) {
Dave Chinner541b5ac2018-06-05 10:09:33 -0700815 xfs_warn(mp,
816 "Failed to read root inode 0x%llx, error %d",
817 sbp->sb_rootino, -error);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100818 goto out_log_dealloc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 }
820
821 ASSERT(rip != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822
Darrick J. Wonga71895c2019-11-11 12:53:22 -0800823 if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100824 xfs_warn(mp, "corrupted root inode %llu: not a directory",
Nathan Scottb6574522006-06-09 15:29:40 +1000825 (unsigned long long)rip->i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 xfs_iunlock(rip, XFS_ILOCK_EXCL);
Dave Chinner24513372014-06-25 14:58:08 +1000827 error = -EFSCORRUPTED;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100828 goto out_rele_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 }
830 mp->m_rootip = rip; /* save it */
831
832 xfs_iunlock(rip, XFS_ILOCK_EXCL);
833
834 /*
835 * Initialize realtime inode pointers in the mount structure
836 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000837 error = xfs_rtmount_inodes(mp);
838 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 /*
840 * Free up the root inode.
841 */
Dave Chinner0b932cc2011-03-07 10:08:35 +1100842 xfs_warn(mp, "failed to read RT inodes");
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100843 goto out_rele_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 }
845
846 /*
Christoph Hellwig7884bc82009-01-19 02:04:07 +0100847 * If this is a read-only mount defer the superblock updates until
848 * the next remount into writeable mode. Otherwise we would never
849 * perform the update e.g. for the root filesystem.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 */
Dave Chinner2e973b22021-08-18 18:46:52 -0700851 if (mp->m_update_sb && !xfs_is_readonly(mp)) {
Dave Chinner61e63ec2015-01-22 09:10:31 +1100852 error = xfs_sync_sb(mp, false);
David Chinnere5720ee2008-04-10 12:21:18 +1000853 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100854 xfs_warn(mp, "failed to write sb changes");
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100855 goto out_rtunmount;
David Chinnere5720ee2008-04-10 12:21:18 +1000856 }
857 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858
859 /*
860 * Initialise the XFS quota management subsystem for this mount
861 */
Christoph Hellwig149e53a2021-08-06 11:05:37 -0700862 if (XFS_IS_QUOTA_ON(mp)) {
Christoph Hellwig7d095252009-06-08 15:33:32 +0200863 error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
864 if (error)
865 goto out_rtunmount;
866 } else {
Christoph Hellwig7d095252009-06-08 15:33:32 +0200867 /*
868 * If a file system had quotas running earlier, but decided to
869 * mount without -o uquota/pquota/gquota options, revoke the
870 * quotachecked license.
871 */
872 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100873 xfs_notice(mp, "resetting quota flags");
Christoph Hellwig7d095252009-06-08 15:33:32 +0200874 error = xfs_mount_reset_sbqflags(mp);
875 if (error)
Brian Fostera70a4fa2014-07-15 07:41:25 +1000876 goto out_rtunmount;
Christoph Hellwig7d095252009-06-08 15:33:32 +0200877 }
878 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 /*
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000881 * Finish recovering the file system. This part needed to be delayed
882 * until after the root and real-time bitmap inodes were consistently
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700883 * read in. Temporarily create per-AG space reservations for metadata
884 * btree shape changes because space freeing transactions (for inode
885 * inactivation) require the per-AG reservation in lieu of reserving
886 * blocks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 */
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700888 error = xfs_fs_reserve_ag_blocks(mp);
889 if (error && error == -ENOSPC)
890 xfs_warn(mp,
891 "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
Christoph Hellwig42490232008-08-13 16:49:32 +1000892 error = xfs_log_mount_finish(mp);
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700893 xfs_fs_unreserve_ag_blocks(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100895 xfs_warn(mp, "log mount finish failed");
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100896 goto out_rtunmount;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 }
898
899 /*
Dave Chinnerddeb14f2016-09-26 08:21:44 +1000900 * Now the log is fully replayed, we can transition to full read-only
901 * mode for read-only mounts. This will sync all the metadata and clean
902 * the log so that the recovery we just performed does not have to be
903 * replayed again on the next mount.
904 *
905 * We use the same quiesce mechanism as the rw->ro remount, as they are
906 * semantically identical operations.
907 */
Dave Chinner2e973b22021-08-18 18:46:52 -0700908 if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp))
Brian Fosterea2064d2021-01-22 16:48:24 -0800909 xfs_log_clean(mp);
Dave Chinnerddeb14f2016-09-26 08:21:44 +1000910
911 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 * Complete the quota initialisation, post-log-replay component.
913 */
Christoph Hellwig7d095252009-06-08 15:33:32 +0200914 if (quotamount) {
915 ASSERT(mp->m_qflags == 0);
916 mp->m_qflags = quotaflags;
917
918 xfs_qm_mount_quotas(mp);
919 }
920
David Chinner84e1e992007-06-18 16:50:27 +1000921 /*
922 * Now we are mounted, reserve a small amount of unused space for
923 * privileged transactions. This is needed so that transaction
924 * space required for critical operations can dip into this pool
925 * when at ENOSPC. This is needed for operations like create with
926 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
927 * are not allowed to use this reserved space.
Dave Chinner8babd8a2010-03-04 01:46:25 +0000928 *
929 * This may drive us straight to ENOSPC on mount, but that implies
930 * we were already there on the last unmount. Warn if this occurs.
David Chinner84e1e992007-06-18 16:50:27 +1000931 */
Dave Chinner2e973b22021-08-18 18:46:52 -0700932 if (!xfs_is_readonly(mp)) {
Eric Sandeend5db0f92010-02-05 22:59:53 +0000933 resblks = xfs_default_resblks(mp);
934 error = xfs_reserve_blocks(mp, &resblks, NULL);
935 if (error)
Dave Chinner0b932cc2011-03-07 10:08:35 +1100936 xfs_warn(mp,
937 "Unable to allocate reserve blocks. Continuing without reserve pool.");
Darrick J. Wong174edb02016-10-03 09:11:39 -0700938
Darrick J. Wong84d69612016-10-03 09:11:44 -0700939 /* Reserve AG blocks for future btree expansion. */
940 error = xfs_fs_reserve_ag_blocks(mp);
941 if (error && error != -ENOSPC)
942 goto out_agresv;
Eric Sandeend5db0f92010-02-05 22:59:53 +0000943 }
David Chinner84e1e992007-06-18 16:50:27 +1000944
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 return 0;
946
Darrick J. Wong84d69612016-10-03 09:11:44 -0700947 out_agresv:
948 xfs_fs_unreserve_ag_blocks(mp);
Darrick J. Wong174edb02016-10-03 09:11:39 -0700949 xfs_qm_unmount_quotas(mp);
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100950 out_rtunmount:
951 xfs_rtunmount_inodes(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100952 out_rele_rip:
Darrick J. Wong44a87362018-07-25 12:52:32 -0700953 xfs_irele(rip);
Darrick J. Wong77aff8c2017-08-10 14:20:29 -0700954 /* Clean out dquots that might be in memory after quotacheck. */
955 xfs_qm_unmount(mp);
Dave Chinnerab23a772021-08-06 11:05:39 -0700956
957 /*
958 * Inactivate all inodes that might still be in memory after a log
959 * intent recovery failure so that reclaim can free them. Metadata
960 * inodes and the root directory shouldn't need inactivation, but the
961 * mount failed for some reason, so pull down all the state and flee.
962 */
963 xfs_inodegc_flush(mp);
964
Darrick J. Wong2d1d1da2017-11-08 16:26:49 -0800965 /*
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800966 * Flush all inode reclamation work and flush the log.
Darrick J. Wong2d1d1da2017-11-08 16:26:49 -0800967 * We have to do this /after/ rtunmount and qm_unmount because those
968 * two will have scheduled delayed reclaim for the rt/quota inodes.
969 *
970 * This is slightly different from the unmountfs call sequence
971 * because we could be tearing down a partially set up mount. In
972 * particular, if log_mount_finish fails we bail out without calling
973 * qm_unmount_quotas and therefore rely on qm_unmount to release the
974 * quota inodes.
975 */
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800976 xfs_unmount_flush_inodes(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100977 out_log_dealloc:
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000978 xfs_log_mount_cancel(mp);
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700979 out_inodegc_shrinker:
980 unregister_shrinker(&mp->m_inodegc_shrinker);
Dave Chinnerd4f35122012-04-23 15:59:06 +1000981 out_fail_wait:
982 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
Brian Foster10fb9ac2021-01-22 16:48:19 -0800983 xfs_buftarg_drain(mp->m_logdev_targp);
984 xfs_buftarg_drain(mp->m_ddev_targp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100985 out_free_perag:
Christoph Hellwigff4f0382008-08-13 16:50:47 +1000986 xfs_free_perag(mp);
Dave Chinner0650b552014-06-06 15:01:58 +1000987 out_free_dir:
988 xfs_da_unmount(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100989 out_remove_uuid:
Christoph Hellwig27174202009-03-30 10:21:31 +0200990 xfs_uuid_unmount(mp);
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700991 out_remove_errortag:
992 xfs_errortag_del(mp);
Carlos Maiolino192852b2016-05-18 10:58:51 +1000993 out_remove_error_sysfs:
994 xfs_error_sysfs_del(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +1100995 out_del_stats:
996 xfs_sysfs_del(&mp->m_stats.xs_kobj);
Brian Fostera31b1d32014-07-15 08:07:01 +1000997 out_remove_sysfs:
998 xfs_sysfs_del(&mp->m_kobj);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100999 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 return error;
1001}
1002
1003/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 * This flushes out the inodes,dquots and the superblock, unmounts the
1005 * log and makes sure that incore structures are freed.
1006 */
Christoph Hellwig41b5c2e2008-08-13 16:49:57 +10001007void
1008xfs_unmountfs(
1009 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07001011 uint64_t resblks;
Christoph Hellwig41b5c2e2008-08-13 16:49:57 +10001012 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
Dave Chinnerab23a772021-08-06 11:05:39 -07001014 /*
1015 * Perform all on-disk metadata updates required to inactivate inodes
1016 * that the VFS evicted earlier in the unmount process. Freeing inodes
1017 * and discarding CoW fork preallocations can cause shape changes to
1018 * the free inode and refcount btrees, respectively, so we must finish
1019 * this before we discard the metadata space reservations. Metadata
1020 * inodes and the root directory do not require inactivation.
1021 */
1022 xfs_inodegc_flush(mp);
1023
Darrick J. Wongc9a65262021-01-22 16:48:44 -08001024 xfs_blockgc_stop(mp);
Darrick J. Wong84d69612016-10-03 09:11:44 -07001025 xfs_fs_unreserve_ag_blocks(mp);
Christoph Hellwig7d095252009-06-08 15:33:32 +02001026 xfs_qm_unmount_quotas(mp);
Christoph Hellwigb93b6e42009-02-04 09:33:58 +01001027 xfs_rtunmount_inodes(mp);
Darrick J. Wong44a87362018-07-25 12:52:32 -07001028 xfs_irele(mp->m_rootip);
Christoph Hellwig77508ec2008-08-13 16:49:04 +10001029
Darrick J. Wongd336f7e2021-03-02 09:32:53 -08001030 xfs_unmount_flush_inodes(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
Christoph Hellwig7d095252009-06-08 15:33:32 +02001032 xfs_qm_unmount(mp);
Lachlan McIlroya357a122008-10-30 16:53:25 +11001033
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 /*
David Chinner84e1e992007-06-18 16:50:27 +10001035 * Unreserve any blocks we have so that when we unmount we don't account
1036 * the reserved free space as used. This is really only necessary for
1037 * lazy superblock counting because it trusts the incore superblock
Malcolm Parsons9da096f2009-03-29 09:55:42 +02001038 * counters to be absolutely correct on clean unmount.
David Chinner84e1e992007-06-18 16:50:27 +10001039 *
1040 * We don't bother correcting this elsewhere for lazy superblock
1041 * counting because on mount of an unclean filesystem we reconstruct the
1042 * correct counter value and this is irrelevant.
1043 *
1044 * For non-lazy counter filesystems, this doesn't matter at all because
1045 * we only every apply deltas to the superblock and hence the incore
1046 * value does not matter....
1047 */
1048 resblks = 0;
David Chinner714082b2008-04-10 12:20:03 +10001049 error = xfs_reserve_blocks(mp, &resblks, NULL);
1050 if (error)
Dave Chinner0b932cc2011-03-07 10:08:35 +11001051 xfs_warn(mp, "Unable to free reserved block pool. "
David Chinner714082b2008-04-10 12:20:03 +10001052 "Freespace may not be correct on next mount.");
1053
Christoph Hellwig21b699c2009-03-16 08:19:29 +01001054 xfs_log_unmount(mp);
Dave Chinner0650b552014-06-06 15:01:58 +10001055 xfs_da_unmount(mp);
Christoph Hellwig27174202009-03-30 10:21:31 +02001056 xfs_uuid_unmount(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057
Christoph Hellwig1550d0b2008-08-13 16:17:37 +10001058#if defined(DEBUG)
Darrick J. Wong31965ef2017-06-20 17:54:46 -07001059 xfs_errortag_clearall(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060#endif
Darrick J. Wong40b1de0072021-08-06 11:05:43 -07001061 unregister_shrinker(&mp->m_inodegc_shrinker);
Christoph Hellwigff4f0382008-08-13 16:50:47 +10001062 xfs_free_perag(mp);
Brian Fostera31b1d32014-07-15 08:07:01 +10001063
Darrick J. Wong31965ef2017-06-20 17:54:46 -07001064 xfs_errortag_del(mp);
Carlos Maiolino192852b2016-05-18 10:58:51 +10001065 xfs_error_sysfs_del(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +11001066 xfs_sysfs_del(&mp->m_stats.xs_kobj);
Brian Fostera31b1d32014-07-15 08:07:01 +10001067 xfs_sysfs_del(&mp->m_kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068}
1069
Brian Foster91ee5752014-11-28 14:02:59 +11001070/*
1071 * Determine whether modifications can proceed. The caller specifies the minimum
1072 * freeze level for which modifications should not be allowed. This allows
1073 * certain operations to proceed while the freeze sequence is in progress, if
1074 * necessary.
1075 */
1076bool
1077xfs_fs_writable(
1078 struct xfs_mount *mp,
1079 int level)
David Chinner92821e22007-05-24 15:26:31 +10001080{
Brian Foster91ee5752014-11-28 14:02:59 +11001081 ASSERT(level > SB_UNFROZEN);
1082 if ((mp->m_super->s_writers.frozen >= level) ||
Dave Chinner75c8c50f2021-08-18 18:46:53 -07001083 xfs_is_shutdown(mp) || xfs_is_readonly(mp))
Brian Foster91ee5752014-11-28 14:02:59 +11001084 return false;
1085
1086 return true;
David Chinner92821e22007-05-24 15:26:31 +10001087}
1088
Dave Chinner0d485ad2015-02-23 21:22:03 +11001089int
1090xfs_mod_fdblocks(
1091 struct xfs_mount *mp,
1092 int64_t delta,
1093 bool rsvd)
1094{
1095 int64_t lcounter;
1096 long long res_used;
1097 s32 batch;
Brian Fosterfd43cf62021-04-28 15:06:05 -07001098 uint64_t set_aside;
Dave Chinner0d485ad2015-02-23 21:22:03 +11001099
1100 if (delta > 0) {
1101 /*
1102 * If the reserve pool is depleted, put blocks back into it
1103 * first. Most of the time the pool is full.
1104 */
1105 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1106 percpu_counter_add(&mp->m_fdblocks, delta);
1107 return 0;
1108 }
1109
1110 spin_lock(&mp->m_sb_lock);
1111 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1112
1113 if (res_used > delta) {
1114 mp->m_resblks_avail += delta;
1115 } else {
1116 delta -= res_used;
1117 mp->m_resblks_avail = mp->m_resblks;
1118 percpu_counter_add(&mp->m_fdblocks, delta);
1119 }
1120 spin_unlock(&mp->m_sb_lock);
1121 return 0;
1122 }
1123
1124 /*
1125 * Taking blocks away, need to be more accurate the closer we
1126 * are to zero.
1127 *
Dave Chinner0d485ad2015-02-23 21:22:03 +11001128 * If the counter has a value of less than 2 * max batch size,
1129 * then make everything serialise as we are real close to
1130 * ENOSPC.
1131 */
Dave Chinner8c1903d2015-05-29 07:39:34 +10001132 if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
1133 XFS_FDBLOCKS_BATCH) < 0)
Dave Chinner0d485ad2015-02-23 21:22:03 +11001134 batch = 1;
1135 else
Dave Chinner8c1903d2015-05-29 07:39:34 +10001136 batch = XFS_FDBLOCKS_BATCH;
Dave Chinner0d485ad2015-02-23 21:22:03 +11001137
Brian Fosterfd43cf62021-04-28 15:06:05 -07001138 /*
1139 * Set aside allocbt blocks because these blocks are tracked as free
1140 * space but not available for allocation. Technically this means that a
1141 * single reservation cannot consume all remaining free space, but the
1142 * ratio of allocbt blocks to usable free blocks should be rather small.
1143 * The tradeoff without this is that filesystems that maintain high
1144 * perag block reservations can over reserve physical block availability
1145 * and fail physical allocation, which leads to much more serious
1146 * problems (i.e. transaction abort, pagecache discards, etc.) than
1147 * slightly premature -ENOSPC.
1148 */
1149 set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
Nikolay Borisov104b4e52017-06-20 21:01:20 +03001150 percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
Brian Fosterfd43cf62021-04-28 15:06:05 -07001151 if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
Dave Chinner8c1903d2015-05-29 07:39:34 +10001152 XFS_FDBLOCKS_BATCH) >= 0) {
Dave Chinner0d485ad2015-02-23 21:22:03 +11001153 /* we had space! */
1154 return 0;
1155 }
1156
1157 /*
1158 * lock up the sb for dipping into reserves before releasing the space
1159 * that took us to ENOSPC.
1160 */
1161 spin_lock(&mp->m_sb_lock);
1162 percpu_counter_add(&mp->m_fdblocks, -delta);
1163 if (!rsvd)
1164 goto fdblocks_enospc;
1165
1166 lcounter = (long long)mp->m_resblks_avail + delta;
1167 if (lcounter >= 0) {
1168 mp->m_resblks_avail = lcounter;
1169 spin_unlock(&mp->m_sb_lock);
1170 return 0;
1171 }
Eric Sandeenec43f6d2020-04-27 11:00:42 -07001172 xfs_warn_once(mp,
1173"Reserve blocks depleted! Consider increasing reserve pool size.");
1174
Dave Chinner0d485ad2015-02-23 21:22:03 +11001175fdblocks_enospc:
1176 spin_unlock(&mp->m_sb_lock);
1177 return -ENOSPC;
1178}
1179
Dave Chinnerbab98bb2015-02-23 21:22:54 +11001180int
1181xfs_mod_frextents(
1182 struct xfs_mount *mp,
1183 int64_t delta)
1184{
1185 int64_t lcounter;
1186 int ret = 0;
1187
1188 spin_lock(&mp->m_sb_lock);
1189 lcounter = mp->m_sb.sb_frextents + delta;
1190 if (lcounter < 0)
1191 ret = -ENOSPC;
1192 else
1193 mp->m_sb.sb_frextents = lcounter;
1194 spin_unlock(&mp->m_sb_lock);
1195 return ret;
1196}
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 * Used to free the superblock along various error paths.
1200 */
1201void
1202xfs_freesb(
Dave Chinner26af6552010-09-22 10:47:20 +10001203 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204{
Dave Chinner26af6552010-09-22 10:47:20 +10001205 struct xfs_buf *bp = mp->m_sb_bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
Dave Chinner26af6552010-09-22 10:47:20 +10001207 xfs_buf_lock(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 mp->m_sb_bp = NULL;
Dave Chinner26af6552010-09-22 10:47:20 +10001209 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210}
1211
1212/*
Christoph Hellwigdda35b82010-02-15 09:44:46 +00001213 * If the underlying (data/log/rt) device is readonly, there are some
1214 * operations that cannot proceed.
1215 */
1216int
1217xfs_dev_is_read_only(
1218 struct xfs_mount *mp,
1219 char *message)
1220{
1221 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1222 xfs_readonly_buftarg(mp->m_logdev_targp) ||
1223 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
Dave Chinner0b932cc2011-03-07 10:08:35 +11001224 xfs_notice(mp, "%s required on read-only device.", message);
1225 xfs_notice(mp, "write access unavailable, cannot proceed.");
Dave Chinner24513372014-06-25 14:58:08 +10001226 return -EROFS;
Christoph Hellwigdda35b82010-02-15 09:44:46 +00001227 }
1228 return 0;
1229}
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001230
1231/* Force the summary counters to be recalculated at next mount. */
1232void
1233xfs_force_summary_recalc(
1234 struct xfs_mount *mp)
1235{
Dave Chinner38c26bf2021-08-18 18:46:37 -07001236 if (!xfs_has_lazysbcount(mp))
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001237 return;
1238
Darrick J. Wong39353ff2019-04-12 07:41:15 -07001239 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001240}
Darrick J. Wong9fe82b82019-04-25 18:26:22 -07001241
1242/*
Darrick J. Wong908ce712021-08-08 08:27:12 -07001243 * Enable a log incompat feature flag in the primary superblock. The caller
1244 * cannot have any other transactions in progress.
1245 */
1246int
1247xfs_add_incompat_log_feature(
1248 struct xfs_mount *mp,
1249 uint32_t feature)
1250{
1251 struct xfs_dsb *dsb;
1252 int error;
1253
1254 ASSERT(hweight32(feature) == 1);
1255 ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
1256
1257 /*
1258 * Force the log to disk and kick the background AIL thread to reduce
1259 * the chances that the bwrite will stall waiting for the AIL to unpin
1260 * the primary superblock buffer. This isn't a data integrity
1261 * operation, so we don't need a synchronous push.
1262 */
1263 error = xfs_log_force(mp, XFS_LOG_SYNC);
1264 if (error)
1265 return error;
1266 xfs_ail_push_all(mp->m_ail);
1267
1268 /*
1269 * Lock the primary superblock buffer to serialize all callers that
1270 * are trying to set feature bits.
1271 */
1272 xfs_buf_lock(mp->m_sb_bp);
1273 xfs_buf_hold(mp->m_sb_bp);
1274
Dave Chinner75c8c50f2021-08-18 18:46:53 -07001275 if (xfs_is_shutdown(mp)) {
Darrick J. Wong908ce712021-08-08 08:27:12 -07001276 error = -EIO;
1277 goto rele;
1278 }
1279
1280 if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
1281 goto rele;
1282
1283 /*
1284 * Write the primary superblock to disk immediately, because we need
1285 * the log_incompat bit to be set in the primary super now to protect
1286 * the log items that we're going to commit later.
1287 */
1288 dsb = mp->m_sb_bp->b_addr;
1289 xfs_sb_to_disk(dsb, &mp->m_sb);
1290 dsb->sb_features_log_incompat |= cpu_to_be32(feature);
1291 error = xfs_bwrite(mp->m_sb_bp);
1292 if (error)
1293 goto shutdown;
1294
1295 /*
1296 * Add the feature bits to the incore superblock before we unlock the
1297 * buffer.
1298 */
1299 xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
1300 xfs_buf_relse(mp->m_sb_bp);
1301
1302 /* Log the superblock to disk. */
1303 return xfs_sync_sb(mp, false);
1304shutdown:
1305 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1306rele:
1307 xfs_buf_relse(mp->m_sb_bp);
1308 return error;
1309}
1310
1311/*
1312 * Clear all the log incompat flags from the superblock.
1313 *
1314 * The caller cannot be in a transaction, must ensure that the log does not
1315 * contain any log items protected by any log incompat bit, and must ensure
1316 * that there are no other threads that depend on the state of the log incompat
1317 * feature flags in the primary super.
1318 *
1319 * Returns true if the superblock is dirty.
1320 */
1321bool
1322xfs_clear_incompat_log_features(
1323 struct xfs_mount *mp)
1324{
1325 bool ret = false;
1326
Dave Chinnerebd90272021-08-18 18:46:55 -07001327 if (!xfs_has_crc(mp) ||
Darrick J. Wong908ce712021-08-08 08:27:12 -07001328 !xfs_sb_has_incompat_log_feature(&mp->m_sb,
1329 XFS_SB_FEAT_INCOMPAT_LOG_ALL) ||
Dave Chinner75c8c50f2021-08-18 18:46:53 -07001330 xfs_is_shutdown(mp))
Darrick J. Wong908ce712021-08-08 08:27:12 -07001331 return false;
1332
1333 /*
1334 * Update the incore superblock. We synchronize on the primary super
1335 * buffer lock to be consistent with the add function, though at least
1336 * in theory this shouldn't be necessary.
1337 */
1338 xfs_buf_lock(mp->m_sb_bp);
1339 xfs_buf_hold(mp->m_sb_bp);
1340
1341 if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
1342 XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
1343 xfs_info(mp, "Clearing log incompat feature flags.");
1344 xfs_sb_remove_incompat_log_features(&mp->m_sb);
1345 ret = true;
1346 }
1347
1348 xfs_buf_relse(mp->m_sb_bp);
1349 return ret;
1350}
1351
1352/*
Darrick J. Wong9fe82b82019-04-25 18:26:22 -07001353 * Update the in-core delayed block counter.
1354 *
1355 * We prefer to update the counter without having to take a spinlock for every
1356 * counter update (i.e. batching). Each change to delayed allocation
1357 * reservations can change can easily exceed the default percpu counter
1358 * batching, so we use a larger batch factor here.
1359 *
1360 * Note that we don't currently have any callers requiring fast summation
1361 * (e.g. percpu_counter_read) so we can use a big batch value here.
1362 */
1363#define XFS_DELALLOC_BATCH (4096)
1364void
1365xfs_mod_delalloc(
1366 struct xfs_mount *mp,
1367 int64_t delta)
1368{
1369 percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
1370 XFS_DELALLOC_BATCH);
1371}