blob: 8d7dc0423764b16bb383dceb40fbac0a5b2cdeed [file] [log] [blame]
Dave Chinner0b61f8a2018-06-05 19:42:14 -07001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
Nathan Scott7b718762005-11-02 14:58:39 +11003 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +11007#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +11008#include "xfs_shared.h"
Dave Chinner239880e2013-10-23 10:50:10 +11009#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110012#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include "xfs_sb.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include "xfs_inode.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110016#include "xfs_dir2.h"
Nathan Scotta844f452005-11-02 14:38:42 +110017#include "xfs_ialloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs_alloc.h"
19#include "xfs_rtalloc.h"
20#include "xfs_bmap.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_trans.h"
22#include "xfs_trans_priv.h"
23#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_error.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_quota.h"
26#include "xfs_fsops.h"
Dave Chinner6d8b79c2012-10-08 21:56:09 +110027#include "xfs_icache.h"
Brian Fostera31b1d32014-07-15 08:07:01 +100028#include "xfs_sysfs.h"
Darrick J. Wong035e00a2016-08-03 11:36:07 +100029#include "xfs_rmap_btree.h"
Darrick J. Wong1946b912016-10-03 09:11:18 -070030#include "xfs_refcount_btree.h"
Darrick J. Wong174edb02016-10-03 09:11:39 -070031#include "xfs_reflink.h"
Christoph Hellwigebf55872017-02-07 14:06:57 -080032#include "xfs_extent_busy.h"
Darrick J. Wong39353ff2019-04-12 07:41:15 -070033#include "xfs_health.h"
Darrick J. Wong13eaec42019-12-11 13:19:06 -080034#include "xfs_trace.h"
Dave Chinner9bbafc712021-06-02 10:48:24 +100035#include "xfs_ag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
Christoph Hellwig27174202009-03-30 10:21:31 +020037static DEFINE_MUTEX(xfs_uuid_table_mutex);
38static int xfs_uuid_table_size;
39static uuid_t *xfs_uuid_table;
40
Darrick J. Wongaf3b6382015-11-03 13:06:34 +110041void
42xfs_uuid_table_free(void)
43{
44 if (xfs_uuid_table_size == 0)
45 return;
46 kmem_free(xfs_uuid_table);
47 xfs_uuid_table = NULL;
48 xfs_uuid_table_size = 0;
49}
50
Christoph Hellwig27174202009-03-30 10:21:31 +020051/*
52 * See if the UUID is unique among mounted XFS filesystems.
53 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
54 */
55STATIC int
56xfs_uuid_mount(
57 struct xfs_mount *mp)
58{
59 uuid_t *uuid = &mp->m_sb.sb_uuid;
60 int hole, i;
61
Amir Goldstein8f720d92017-04-28 08:10:53 -070062 /* Publish UUID in struct super_block */
Christoph Hellwig85787092017-05-10 15:06:33 +020063 uuid_copy(&mp->m_super->s_uuid, uuid);
Amir Goldstein8f720d92017-04-28 08:10:53 -070064
Dave Chinner0560f312021-08-18 18:46:52 -070065 if (xfs_has_nouuid(mp))
Christoph Hellwig27174202009-03-30 10:21:31 +020066 return 0;
67
Amir Goldsteind905fda2017-05-04 16:26:23 +030068 if (uuid_is_null(uuid)) {
69 xfs_warn(mp, "Filesystem has null UUID - can't mount");
Dave Chinner24513372014-06-25 14:58:08 +100070 return -EINVAL;
Christoph Hellwig27174202009-03-30 10:21:31 +020071 }
72
73 mutex_lock(&xfs_uuid_table_mutex);
74 for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
Amir Goldsteind905fda2017-05-04 16:26:23 +030075 if (uuid_is_null(&xfs_uuid_table[i])) {
Christoph Hellwig27174202009-03-30 10:21:31 +020076 hole = i;
77 continue;
78 }
79 if (uuid_equal(uuid, &xfs_uuid_table[i]))
80 goto out_duplicate;
81 }
82
83 if (hole < 0) {
Carlos Maiolino771915c2020-08-26 14:05:56 -070084 xfs_uuid_table = krealloc(xfs_uuid_table,
Christoph Hellwig27174202009-03-30 10:21:31 +020085 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
Carlos Maiolino771915c2020-08-26 14:05:56 -070086 GFP_KERNEL | __GFP_NOFAIL);
Christoph Hellwig27174202009-03-30 10:21:31 +020087 hole = xfs_uuid_table_size++;
88 }
89 xfs_uuid_table[hole] = *uuid;
90 mutex_unlock(&xfs_uuid_table_mutex);
91
92 return 0;
93
94 out_duplicate:
95 mutex_unlock(&xfs_uuid_table_mutex);
Mitsuo Hayasaka021000e2012-01-13 05:58:39 +000096 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
Dave Chinner24513372014-06-25 14:58:08 +100097 return -EINVAL;
Christoph Hellwig27174202009-03-30 10:21:31 +020098}
99
100STATIC void
101xfs_uuid_unmount(
102 struct xfs_mount *mp)
103{
104 uuid_t *uuid = &mp->m_sb.sb_uuid;
105 int i;
106
Dave Chinner0560f312021-08-18 18:46:52 -0700107 if (xfs_has_nouuid(mp))
Christoph Hellwig27174202009-03-30 10:21:31 +0200108 return;
109
110 mutex_lock(&xfs_uuid_table_mutex);
111 for (i = 0; i < xfs_uuid_table_size; i++) {
Amir Goldsteind905fda2017-05-04 16:26:23 +0300112 if (uuid_is_null(&xfs_uuid_table[i]))
Christoph Hellwig27174202009-03-30 10:21:31 +0200113 continue;
114 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
115 continue;
116 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
117 break;
118 }
119 ASSERT(i < xfs_uuid_table_size);
120 mutex_unlock(&xfs_uuid_table_mutex);
121}
122
Nathan Scott4cc929e2007-05-14 18:24:02 +1000123/*
124 * Check size of device based on the (data/realtime) block count.
125 * Note: this check is used by the growfs code as well as mount.
126 */
127int
128xfs_sb_validate_fsb_count(
129 xfs_sb_t *sbp,
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700130 uint64_t nblocks)
Nathan Scott4cc929e2007-05-14 18:24:02 +1000131{
132 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
133 ASSERT(sbp->sb_blocklog >= BBSHIFT);
134
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000135 /* Limited by ULONG_MAX of page cache index */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300136 if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
Dave Chinner24513372014-06-25 14:58:08 +1000137 return -EFBIG;
Nathan Scott4cc929e2007-05-14 18:24:02 +1000138 return 0;
139}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141/*
142 * xfs_readsb
143 *
144 * Does the initial read of the superblock.
145 */
146int
Dave Chinnerff550682013-08-12 20:49:41 +1000147xfs_readsb(
148 struct xfs_mount *mp,
149 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150{
151 unsigned int sector_size;
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100152 struct xfs_buf *bp;
153 struct xfs_sb *sbp = &mp->m_sb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 int error;
Dave Chinneraf34e092011-03-07 10:04:35 +1100155 int loud = !(flags & XFS_MFSI_QUIET);
Eric Sandeendaba5422014-02-19 15:39:16 +1100156 const struct xfs_buf_ops *buf_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
158 ASSERT(mp->m_sb_bp == NULL);
159 ASSERT(mp->m_ddev_targp != NULL);
160
161 /*
Eric Sandeendaba5422014-02-19 15:39:16 +1100162 * For the initial read, we must guess at the sector
163 * size based on the block device. It's enough to
164 * get the sb_sectsize out of the superblock and
165 * then reread with the proper length.
166 * We don't verify it yet, because it may not be complete.
167 */
168 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
169 buf_ops = NULL;
170
171 /*
Brian Fosterc891c302016-07-20 11:13:43 +1000172 * Allocate a (locked) buffer to hold the superblock. This will be kept
173 * around at all times to optimize access to the superblock. Therefore,
174 * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
175 * elevated.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 */
Dave Chinner26af6552010-09-22 10:47:20 +1000177reread:
Dave Chinnerba3726742014-10-02 09:05:32 +1000178 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
Brian Fosterc891c302016-07-20 11:13:43 +1000179 BTOBB(sector_size), XBF_NO_IOACCT, &bp,
180 buf_ops);
Dave Chinnerba3726742014-10-02 09:05:32 +1000181 if (error) {
Dave Chinnereab4e632012-11-12 22:54:02 +1100182 if (loud)
Dave Chinnere721f502013-04-03 16:11:32 +1100183 xfs_warn(mp, "SB validate failed with error %d.", error);
Dave Chinnerac75a1f2014-03-07 16:19:14 +1100184 /* bad CRC means corrupted metadata */
Dave Chinner24513372014-06-25 14:58:08 +1000185 if (error == -EFSBADCRC)
186 error = -EFSCORRUPTED;
Dave Chinnerba3726742014-10-02 09:05:32 +1000187 return error;
Dave Chinnereab4e632012-11-12 22:54:02 +1100188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190 /*
191 * Initialize the mount structure from the superblock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 */
Christoph Hellwig3e6e8af2020-03-10 08:57:30 -0700193 xfs_sb_from_disk(sbp, bp->b_addr);
Dave Chinner556b8882014-06-06 16:00:43 +1000194
195 /*
196 * If we haven't validated the superblock, do so now before we try
197 * to check the sector size and reread the superblock appropriately.
198 */
199 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
200 if (loud)
201 xfs_warn(mp, "Invalid superblock magic number");
Dave Chinner24513372014-06-25 14:58:08 +1000202 error = -EINVAL;
Dave Chinner556b8882014-06-06 16:00:43 +1000203 goto release_buf;
204 }
Dave Chinnerff550682013-08-12 20:49:41 +1000205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 /*
207 * We must be able to do sector-sized and sector-aligned IO.
208 */
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100209 if (sector_size > sbp->sb_sectsize) {
Dave Chinneraf34e092011-03-07 10:04:35 +1100210 if (loud)
211 xfs_warn(mp, "device supports %u byte sectors (not %u)",
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100212 sector_size, sbp->sb_sectsize);
Dave Chinner24513372014-06-25 14:58:08 +1000213 error = -ENOSYS;
Dave Chinner26af6552010-09-22 10:47:20 +1000214 goto release_buf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 }
216
Eric Sandeendaba5422014-02-19 15:39:16 +1100217 if (buf_ops == NULL) {
Dave Chinner556b8882014-06-06 16:00:43 +1000218 /*
219 * Re-read the superblock so the buffer is correctly sized,
220 * and properly verified.
221 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 xfs_buf_relse(bp);
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100223 sector_size = sbp->sb_sectsize;
Eric Sandeendaba5422014-02-19 15:39:16 +1100224 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
Dave Chinner26af6552010-09-22 10:47:20 +1000225 goto reread;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 }
227
Dave Chinnera1d86e82021-08-18 18:46:26 -0700228 mp->m_features |= xfs_sb_version_to_features(sbp);
Dave Chinner5681ca42015-02-23 21:22:31 +1100229 xfs_reinit_percpu_counters(mp);
David Chinner8d280b92006-03-14 13:13:09 +1100230
Dave Chinner04a1e6c2013-04-03 16:11:31 +1100231 /* no need to be quiet anymore, so reset the buf ops */
232 bp->b_ops = &xfs_sb_buf_ops;
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 mp->m_sb_bp = bp;
Dave Chinner26af6552010-09-22 10:47:20 +1000235 xfs_buf_unlock(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 return 0;
237
Dave Chinner26af6552010-09-22 10:47:20 +1000238release_buf:
239 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 return error;
241}
242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243/*
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800244 * If the sunit/swidth change would move the precomputed root inode value, we
245 * must reject the ondisk change because repair will stumble over that.
246 * However, we allow the mount to proceed because we never rejected this
247 * combination before. Returns true to update the sb, false otherwise.
248 */
249static inline int
250xfs_check_new_dalign(
251 struct xfs_mount *mp,
252 int new_dalign,
253 bool *update_sb)
254{
255 struct xfs_sb *sbp = &mp->m_sb;
256 xfs_ino_t calc_ino;
257
258 calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
259 trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
260
261 if (sbp->sb_rootino == calc_ino) {
262 *update_sb = true;
263 return 0;
264 }
265
266 xfs_warn(mp,
267"Cannot change stripe alignment; would require moving root inode.");
268
269 /*
270 * XXX: Next time we add a new incompat feature, this should start
271 * returning -EINVAL to fail the mount. Until then, spit out a warning
272 * that we're ignoring the administrator's instructions.
273 */
274 xfs_warn(mp, "Skipping superblock stripe alignment update.");
275 *update_sb = false;
276 return 0;
277}
278
279/*
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800280 * If we were provided with new sunit/swidth values as mount options, make sure
281 * that they pass basic alignment and superblock feature checks, and convert
282 * them into the same units (FSB) that everything else expects. This step
283 * /must/ be done before computing the inode geometry.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000285STATIC int
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800286xfs_validate_new_dalign(
287 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288{
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800289 if (mp->m_dalign == 0)
290 return 0;
291
292 /*
293 * If stripe unit and stripe width are not multiples
294 * of the fs blocksize turn off alignment.
295 */
296 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
297 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
298 xfs_warn(mp,
299 "alignment check failed: sunit/swidth vs. blocksize(%d)",
300 mp->m_sb.sb_blocksize);
301 return -EINVAL;
302 } else {
303 /*
304 * Convert the stripe unit and width to FSBs.
305 */
306 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
307 if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
308 xfs_warn(mp,
309 "alignment check failed: sunit/swidth vs. agsize(%d)",
310 mp->m_sb.sb_agblocks);
311 return -EINVAL;
312 } else if (mp->m_dalign) {
313 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
314 } else {
315 xfs_warn(mp,
316 "alignment check failed: sunit(%d) less than bsize(%d)",
317 mp->m_dalign, mp->m_sb.sb_blocksize);
318 return -EINVAL;
319 }
320 }
321
Dave Chinner38c26bf2021-08-18 18:46:37 -0700322 if (!xfs_has_dalign(mp)) {
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800323 xfs_warn(mp,
324"cannot change alignment: superblock does not support data alignment");
325 return -EINVAL;
326 }
327
328 return 0;
329}
330
331/* Update alignment values based on mount options and sb values. */
332STATIC int
333xfs_update_alignment(
334 struct xfs_mount *mp)
335{
336 struct xfs_sb *sbp = &mp->m_sb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Christoph Hellwig42490232008-08-13 16:49:32 +1000338 if (mp->m_dalign) {
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800339 bool update_sb;
340 int error;
341
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800342 if (sbp->sb_unit == mp->m_dalign &&
343 sbp->sb_width == mp->m_swidth)
344 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
Darrick J. Wong13eaec42019-12-11 13:19:06 -0800346 error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
347 if (error || !update_sb)
348 return error;
349
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800350 sbp->sb_unit = mp->m_dalign;
351 sbp->sb_width = mp->m_swidth;
352 mp->m_update_sb = true;
Dave Chinner0560f312021-08-18 18:46:52 -0700353 } else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) {
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800354 mp->m_dalign = sbp->sb_unit;
355 mp->m_swidth = sbp->sb_width;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 }
357
Eric Sandeen0771fb42007-10-12 11:03:40 +1000358 return 0;
359}
360
361/*
Dave Chinner055388a2011-01-04 11:35:03 +1100362 * precalculate the low space thresholds for dynamic speculative preallocation.
363 */
364void
365xfs_set_low_space_thresholds(
366 struct xfs_mount *mp)
367{
Darrick J. Wong65f03d82021-08-06 11:05:41 -0700368 uint64_t dblocks = mp->m_sb.sb_dblocks;
369 uint64_t rtexts = mp->m_sb.sb_rextents;
370 int i;
371
372 do_div(dblocks, 100);
373 do_div(rtexts, 100);
Dave Chinner055388a2011-01-04 11:35:03 +1100374
375 for (i = 0; i < XFS_LOWSP_MAX; i++) {
Darrick J. Wong65f03d82021-08-06 11:05:41 -0700376 mp->m_low_space[i] = dblocks * (i + 1);
377 mp->m_low_rtexts[i] = rtexts * (i + 1);
Dave Chinner055388a2011-01-04 11:35:03 +1100378 }
379}
380
Eric Sandeen0771fb42007-10-12 11:03:40 +1000381/*
Zhi Yong Wu0471f622013-08-07 10:10:58 +0000382 * Check that the data (and log if separate) is an ok size.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000383 */
384STATIC int
Dave Chinnerba3726742014-10-02 09:05:32 +1000385xfs_check_sizes(
386 struct xfs_mount *mp)
Eric Sandeen0771fb42007-10-12 11:03:40 +1000387{
Dave Chinnerba3726742014-10-02 09:05:32 +1000388 struct xfs_buf *bp;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000389 xfs_daddr_t d;
Dave Chinnerba3726742014-10-02 09:05:32 +1000390 int error;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000391
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
393 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100394 xfs_warn(mp, "filesystem size mismatch detected");
Dave Chinner24513372014-06-25 14:58:08 +1000395 return -EFBIG;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 }
Dave Chinnerba3726742014-10-02 09:05:32 +1000397 error = xfs_buf_read_uncached(mp->m_ddev_targp,
Dave Chinner1922c942010-09-22 10:47:20 +1000398 d - XFS_FSS_TO_BB(mp, 1),
Dave Chinnerba3726742014-10-02 09:05:32 +1000399 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
400 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100401 xfs_warn(mp, "last sector read failed");
Dave Chinnerba3726742014-10-02 09:05:32 +1000402 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
Dave Chinner1922c942010-09-22 10:47:20 +1000404 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405
Dave Chinnerba3726742014-10-02 09:05:32 +1000406 if (mp->m_logdev_targp == mp->m_ddev_targp)
407 return 0;
408
409 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
410 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
411 xfs_warn(mp, "log size mismatch detected");
412 return -EFBIG;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 }
Dave Chinnerba3726742014-10-02 09:05:32 +1000414 error = xfs_buf_read_uncached(mp->m_logdev_targp,
415 d - XFS_FSB_TO_BB(mp, 1),
416 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
417 if (error) {
418 xfs_warn(mp, "log device read failed");
419 return error;
420 }
421 xfs_buf_relse(bp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000422 return 0;
423}
424
425/*
Christoph Hellwig7d095252009-06-08 15:33:32 +0200426 * Clear the quotaflags in memory and in the superblock.
427 */
428int
429xfs_mount_reset_sbqflags(
430 struct xfs_mount *mp)
431{
Christoph Hellwig7d095252009-06-08 15:33:32 +0200432 mp->m_qflags = 0;
433
Dave Chinner61e63ec2015-01-22 09:10:31 +1100434 /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
Christoph Hellwig7d095252009-06-08 15:33:32 +0200435 if (mp->m_sb.sb_qflags == 0)
436 return 0;
437 spin_lock(&mp->m_sb_lock);
438 mp->m_sb.sb_qflags = 0;
439 spin_unlock(&mp->m_sb_lock);
440
Dave Chinner61e63ec2015-01-22 09:10:31 +1100441 if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
Christoph Hellwig7d095252009-06-08 15:33:32 +0200442 return 0;
443
Dave Chinner61e63ec2015-01-22 09:10:31 +1100444 return xfs_sync_sb(mp, false);
Christoph Hellwig7d095252009-06-08 15:33:32 +0200445}
446
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700447uint64_t
Eric Sandeend5db0f92010-02-05 22:59:53 +0000448xfs_default_resblks(xfs_mount_t *mp)
449{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700450 uint64_t resblks;
Eric Sandeend5db0f92010-02-05 22:59:53 +0000451
452 /*
Dave Chinner8babd8a2010-03-04 01:46:25 +0000453 * We default to 5% or 8192 fsbs of space reserved, whichever is
454 * smaller. This is intended to cover concurrent allocation
455 * transactions when we initially hit enospc. These each require a 4
456 * block reservation. Hence by default we cover roughly 2000 concurrent
457 * allocation reservations.
Eric Sandeend5db0f92010-02-05 22:59:53 +0000458 */
459 resblks = mp->m_sb.sb_dblocks;
460 do_div(resblks, 20);
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700461 resblks = min_t(uint64_t, resblks, 8192);
Eric Sandeend5db0f92010-02-05 22:59:53 +0000462 return resblks;
463}
464
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700465/* Ensure the summary counts are correct. */
466STATIC int
467xfs_check_summary_counts(
468 struct xfs_mount *mp)
469{
470 /*
471 * The AG0 superblock verifier rejects in-progress filesystems,
472 * so we should never see the flag set this far into mounting.
473 */
474 if (mp->m_sb.sb_inprogress) {
475 xfs_err(mp, "sb_inprogress set after log recovery??");
476 WARN_ON(1);
477 return -EFSCORRUPTED;
478 }
479
480 /*
481 * Now the log is mounted, we know if it was an unclean shutdown or
482 * not. If it was, with the first phase of recovery has completed, we
483 * have consistent AG blocks on disk. We have not recovered EFIs yet,
484 * but they are recovered transactionally in the second recovery phase
485 * later.
486 *
487 * If the log was clean when we mounted, we can check the summary
488 * counters. If any of them are obviously incorrect, we can recompute
489 * them from the AGF headers in the next step.
490 */
491 if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
492 (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
Darrick J. Wong00d22a12018-08-10 17:55:56 -0700493 !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700494 mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
Darrick J. Wong39353ff2019-04-12 07:41:15 -0700495 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700496
497 /*
498 * We can safely re-initialise incore superblock counters from the
499 * per-ag data. These may not be correct if the filesystem was not
500 * cleanly unmounted, so we waited for recovery to finish before doing
501 * this.
502 *
503 * If the filesystem was cleanly unmounted or the previous check did
504 * not flag anything weird, then we can trust the values in the
505 * superblock to be correct and we don't need to do anything here.
506 * Otherwise, recalculate the summary counters.
507 */
Dave Chinner38c26bf2021-08-18 18:46:37 -0700508 if ((!xfs_has_lazysbcount(mp) ||
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700509 XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
Darrick J. Wong39353ff2019-04-12 07:41:15 -0700510 !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700511 return 0;
512
513 return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
514}
515
Christoph Hellwig7d095252009-06-08 15:33:32 +0200516/*
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800517 * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
518 * internal inode structures can be sitting in the CIL and AIL at this point,
519 * so we need to unpin them, write them back and/or reclaim them before unmount
Dave Chinnerab23a772021-08-06 11:05:39 -0700520 * can proceed. In other words, callers are required to have inactivated all
521 * inodes.
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800522 *
523 * An inode cluster that has been freed can have its buffer still pinned in
524 * memory because the transaction is still sitting in a iclog. The stale inodes
525 * on that buffer will be pinned to the buffer until the transaction hits the
526 * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
527 * may never see the pinned buffer, so nothing will push out the iclog and
528 * unpin the buffer.
529 *
530 * Hence we need to force the log to unpin everything first. However, log
531 * forces don't wait for the discards they issue to complete, so we have to
532 * explicitly wait for them to complete here as well.
533 *
534 * Then we can tell the world we are unmounting so that error handling knows
535 * that the filesystem is going away and we should error out anything that we
536 * have been retrying in the background. This will prevent never-ending
537 * retries in AIL pushing from hanging the unmount.
538 *
539 * Finally, we can push the AIL to clean all the remaining dirty objects, then
540 * reclaim the remaining inodes that are still in memory at this point in time.
541 */
542static void
543xfs_unmount_flush_inodes(
544 struct xfs_mount *mp)
545{
546 xfs_log_force(mp, XFS_LOG_SYNC);
547 xfs_extent_busy_wait_all(mp);
548 flush_workqueue(xfs_discard_wq);
549
550 mp->m_flags |= XFS_MOUNT_UNMOUNTING;
551
552 xfs_ail_push_all_sync(mp->m_ail);
Dave Chinnerab23a772021-08-06 11:05:39 -0700553 xfs_inodegc_stop(mp);
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800554 cancel_delayed_work_sync(&mp->m_reclaim_work);
555 xfs_reclaim_inodes(mp);
556 xfs_health_unmount(mp);
557}
558
Dave Chinnerb2941042021-04-06 07:03:24 -0700559static void
560xfs_mount_setup_inode_geom(
561 struct xfs_mount *mp)
562{
563 struct xfs_ino_geometry *igeo = M_IGEO(mp);
564
565 igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp);
566 ASSERT(igeo->attr_fork_offset < XFS_LITINO(mp));
567
568 xfs_ialloc_setup_geometry(mp);
569}
570
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800571/*
Eric Sandeen0771fb42007-10-12 11:03:40 +1000572 * This function does the following on an initial mount of a file system:
573 * - reads the superblock from disk and init the mount struct
574 * - if we're a 32-bit kernel, do a size check on the superblock
575 * so we don't mount terabyte filesystems
576 * - init mount struct realtime fields
577 * - allocate inode hash table for fs
578 * - init directory manager
579 * - perform recovery and init the log manager
580 */
581int
582xfs_mountfs(
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000583 struct xfs_mount *mp)
Eric Sandeen0771fb42007-10-12 11:03:40 +1000584{
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000585 struct xfs_sb *sbp = &(mp->m_sb);
586 struct xfs_inode *rip;
Darrick J. Wongef325952019-06-05 11:19:34 -0700587 struct xfs_ino_geometry *igeo = M_IGEO(mp);
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700588 uint64_t resblks;
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000589 uint quotamount = 0;
590 uint quotaflags = 0;
591 int error = 0;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000592
Dave Chinnerff550682013-08-12 20:49:41 +1000593 xfs_sb_mount_common(mp, sbp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000594
595 /*
Dave Chinner074e4272015-01-22 09:10:33 +1100596 * Check for a mismatched features2 values. Older kernels read & wrote
597 * into the wrong sb offset for sb_features2 on some platforms due to
598 * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
599 * which made older superblock reading/writing routines swap it as a
600 * 64-bit value.
David Chinneree1c0902008-03-06 13:45:50 +1100601 *
Eric Sandeene6957ea42008-04-10 12:19:34 +1000602 * For backwards compatibility, we make both slots equal.
603 *
Dave Chinner074e4272015-01-22 09:10:33 +1100604 * If we detect a mismatched field, we OR the set bits into the existing
605 * features2 field in case it has already been modified; we don't want
606 * to lose any features. We then update the bad location with the ORed
607 * value so that older kernels will see any features2 flags. The
608 * superblock writeback code ensures the new sb_features2 is copied to
609 * sb_bad_features2 before it is logged or written to disk.
David Chinneree1c0902008-03-06 13:45:50 +1100610 */
Eric Sandeene6957ea42008-04-10 12:19:34 +1000611 if (xfs_sb_has_mismatched_features2(sbp)) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100612 xfs_warn(mp, "correcting sb_features alignment problem");
David Chinneree1c0902008-03-06 13:45:50 +1100613 sbp->sb_features2 |= sbp->sb_bad_features2;
Dave Chinner61e63ec2015-01-22 09:10:31 +1100614 mp->m_update_sb = true;
Tim Shimmin7c12f292008-04-30 18:15:28 +1000615 }
Eric Sandeene6957ea42008-04-10 12:19:34 +1000616
David Chinneree1c0902008-03-06 13:45:50 +1100617
Dave Chinner263997a2014-05-20 07:46:40 +1000618 /* always use v2 inodes by default now */
619 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
620 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
Dave Chinner38c26bf2021-08-18 18:46:37 -0700621 mp->m_features |= XFS_FEAT_NLINK;
Dave Chinner61e63ec2015-01-22 09:10:31 +1100622 mp->m_update_sb = true;
Dave Chinner263997a2014-05-20 07:46:40 +1000623 }
624
David Chinneree1c0902008-03-06 13:45:50 +1100625 /*
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800626 * If we were given new sunit/swidth options, do some basic validation
627 * checks and convert the incore dalign and swidth values to the
628 * same units (FSB) that everything else uses. This /must/ happen
629 * before computing the inode geometry.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000630 */
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800631 error = xfs_validate_new_dalign(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000632 if (error)
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100633 goto out;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000634
635 xfs_alloc_compute_maxlevels(mp);
636 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
637 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
Dave Chinnerb2941042021-04-06 07:03:24 -0700638 xfs_mount_setup_inode_geom(mp);
Darrick J. Wong035e00a2016-08-03 11:36:07 +1000639 xfs_rmapbt_compute_maxlevels(mp);
Darrick J. Wong1946b912016-10-03 09:11:18 -0700640 xfs_refcountbt_compute_maxlevels(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000641
Darrick J. Wong4f5b1b32019-12-18 11:13:16 -0800642 /*
643 * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
644 * is NOT aligned turn off m_dalign since allocator alignment is within
645 * an ag, therefore ag has to be aligned at stripe boundary. Note that
646 * we must compute the free space and rmap btree geometry before doing
647 * this.
648 */
649 error = xfs_update_alignment(mp);
650 if (error)
651 goto out;
652
Carlos Maiolinoe6b3bb72016-05-18 11:11:27 +1000653 /* enable fail_at_unmount as default */
Thomas Meyer749f24f2017-10-09 11:38:54 -0700654 mp->m_fail_unmount = true;
Carlos Maiolinoe6b3bb72016-05-18 11:11:27 +1000655
Ian Kente1d3d212019-11-04 13:58:40 -0800656 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
657 NULL, mp->m_super->s_id);
Christoph Hellwig27174202009-03-30 10:21:31 +0200658 if (error)
659 goto out;
Eric Sandeen0771fb42007-10-12 11:03:40 +1000660
Bill O'Donnell225e4632015-10-12 18:21:19 +1100661 error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
662 &mp->m_kobj, "stats");
Brian Fostera31b1d32014-07-15 08:07:01 +1000663 if (error)
664 goto out_remove_sysfs;
665
Carlos Maiolino192852b2016-05-18 10:58:51 +1000666 error = xfs_error_sysfs_init(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +1100667 if (error)
668 goto out_del_stats;
669
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700670 error = xfs_errortag_init(mp);
671 if (error)
672 goto out_remove_error_sysfs;
Carlos Maiolino192852b2016-05-18 10:58:51 +1000673
674 error = xfs_uuid_mount(mp);
675 if (error)
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700676 goto out_remove_errortag;
Carlos Maiolino192852b2016-05-18 10:58:51 +1000677
Eric Sandeen0771fb42007-10-12 11:03:40 +1000678 /*
Christoph Hellwig2fcddee2019-10-28 08:41:45 -0700679 * Update the preferred write size based on the information from the
680 * on-disk superblock.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000681 */
Christoph Hellwig2fcddee2019-10-28 08:41:45 -0700682 mp->m_allocsize_log =
683 max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
684 mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000685
Dave Chinner055388a2011-01-04 11:35:03 +1100686 /* set the low space thresholds for dynamic preallocation */
687 xfs_set_low_space_thresholds(mp);
688
Eric Sandeen0771fb42007-10-12 11:03:40 +1000689 /*
Brian Fostere5376fc2015-05-29 08:57:27 +1000690 * If enabled, sparse inode chunk alignment is expected to match the
691 * cluster size. Full inode chunk alignment must match the chunk size,
692 * but that is checked on sb read verification...
693 */
Dave Chinner38c26bf2021-08-18 18:46:37 -0700694 if (xfs_has_sparseinodes(mp) &&
Brian Fostere5376fc2015-05-29 08:57:27 +1000695 mp->m_sb.sb_spino_align !=
Darrick J. Wong490d4512019-06-05 11:19:35 -0700696 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
Brian Fostere5376fc2015-05-29 08:57:27 +1000697 xfs_warn(mp,
698 "Sparse inode block alignment (%u) must match cluster size (%llu).",
699 mp->m_sb.sb_spino_align,
Darrick J. Wong490d4512019-06-05 11:19:35 -0700700 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
Brian Fostere5376fc2015-05-29 08:57:27 +1000701 error = -EINVAL;
702 goto out_remove_uuid;
703 }
704
705 /*
Zhi Yong Wuc2bfbc92013-08-12 03:15:03 +0000706 * Check that the data (and log if separate) is an ok size.
Eric Sandeen0771fb42007-10-12 11:03:40 +1000707 */
Christoph Hellwig42490232008-08-13 16:49:32 +1000708 error = xfs_check_sizes(mp);
Eric Sandeen0771fb42007-10-12 11:03:40 +1000709 if (error)
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100710 goto out_remove_uuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
712 /*
713 * Initialize realtime fields in the mount structure
714 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000715 error = xfs_rtmount_init(mp);
716 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100717 xfs_warn(mp, "RT mount failed");
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100718 goto out_remove_uuid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 }
720
721 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 * Copies the low order bits of the timestamp and the randomly
723 * set "sequence" number out of a UUID.
724 */
Christoph Hellwigcb0ba6c2017-05-05 09:39:10 +0200725 mp->m_fixedfsid[0] =
726 (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
727 get_unaligned_be16(&sbp->sb_uuid.b[4]);
728 mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
Dave Chinner0650b552014-06-06 15:01:58 +1000730 error = xfs_da_mount(mp);
731 if (error) {
732 xfs_warn(mp, "Failed dir/attr init: %d", error);
733 goto out_remove_uuid;
734 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736 /*
737 * Initialize the precomputed transaction reservations values.
738 */
739 xfs_trans_init(mp);
740
741 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 * Allocate and initialize the per-ag data.
743 */
Dave Chinner1c1c6eb2010-01-11 11:47:44 +0000744 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
745 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100746 xfs_warn(mp, "Failed per-ag init: %d", error);
Dave Chinner0650b552014-06-06 15:01:58 +1000747 goto out_free_dir;
Dave Chinner1c1c6eb2010-01-11 11:47:44 +0000748 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
Darrick J. Wonga71895c2019-11-11 12:53:22 -0800750 if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100751 xfs_warn(mp, "no log defined");
Dave Chinner24513372014-06-25 14:58:08 +1000752 error = -EFSCORRUPTED;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100753 goto out_free_perag;
754 }
755
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700756 error = xfs_inodegc_register_shrinker(mp);
757 if (error)
758 goto out_fail_wait;
759
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 /*
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000761 * Log's mount-time initialization. The first part of recovery can place
762 * some items on the AIL, to be handled when recovery is finished or
763 * cancelled.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 */
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100765 error = xfs_log_mount(mp, mp->m_logdev_targp,
766 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
767 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
768 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100769 xfs_warn(mp, "log mount failed");
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700770 goto out_inodegc_shrinker;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 }
772
Darrick J. Wong2e9e6482018-07-19 12:29:13 -0700773 /* Make sure the summary counts are ok. */
774 error = xfs_check_summary_counts(mp);
775 if (error)
776 goto out_log_dealloc;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100777
Dave Chinnerab23a772021-08-06 11:05:39 -0700778 /* Enable background inode inactivation workers. */
779 xfs_inodegc_start(mp);
Darrick J. Wong6f649092021-08-06 11:05:42 -0700780 xfs_blockgc_start(mp);
Dave Chinnerab23a772021-08-06 11:05:39 -0700781
David Chinner92821e22007-05-24 15:26:31 +1000782 /*
Dave Chinnere23b55d2021-08-18 18:46:25 -0700783 * Now that we've recovered any pending superblock feature bit
784 * additions, we can finish setting up the attr2 behaviour for the
Dave Chinner0560f312021-08-18 18:46:52 -0700785 * mount. The noattr2 option overrides the superblock flag, so only
786 * check the superblock feature flag if the mount option is not set.
Dave Chinnere23b55d2021-08-18 18:46:25 -0700787 */
Dave Chinner0560f312021-08-18 18:46:52 -0700788 if (xfs_has_noattr2(mp)) {
789 mp->m_features &= ~XFS_FEAT_ATTR2;
790 } else if (!xfs_has_attr2(mp) &&
791 (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
792 mp->m_features |= XFS_FEAT_ATTR2;
793 }
Dave Chinnere23b55d2021-08-18 18:46:25 -0700794
795 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 * Get and sanity-check the root inode.
797 * Save the pointer to it in the mount structure.
798 */
Dave Chinner541b5ac2018-06-05 10:09:33 -0700799 error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
800 XFS_ILOCK_EXCL, &rip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 if (error) {
Dave Chinner541b5ac2018-06-05 10:09:33 -0700802 xfs_warn(mp,
803 "Failed to read root inode 0x%llx, error %d",
804 sbp->sb_rootino, -error);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100805 goto out_log_dealloc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 }
807
808 ASSERT(rip != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
Darrick J. Wonga71895c2019-11-11 12:53:22 -0800810 if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100811 xfs_warn(mp, "corrupted root inode %llu: not a directory",
Nathan Scottb6574522006-06-09 15:29:40 +1000812 (unsigned long long)rip->i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 xfs_iunlock(rip, XFS_ILOCK_EXCL);
Dave Chinner24513372014-06-25 14:58:08 +1000814 error = -EFSCORRUPTED;
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100815 goto out_rele_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 }
817 mp->m_rootip = rip; /* save it */
818
819 xfs_iunlock(rip, XFS_ILOCK_EXCL);
820
821 /*
822 * Initialize realtime inode pointers in the mount structure
823 */
Eric Sandeen0771fb42007-10-12 11:03:40 +1000824 error = xfs_rtmount_inodes(mp);
825 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 /*
827 * Free up the root inode.
828 */
Dave Chinner0b932cc2011-03-07 10:08:35 +1100829 xfs_warn(mp, "failed to read RT inodes");
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100830 goto out_rele_rip;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 }
832
833 /*
Christoph Hellwig7884bc82009-01-19 02:04:07 +0100834 * If this is a read-only mount defer the superblock updates until
835 * the next remount into writeable mode. Otherwise we would never
836 * perform the update e.g. for the root filesystem.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 */
Dave Chinner61e63ec2015-01-22 09:10:31 +1100838 if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
839 error = xfs_sync_sb(mp, false);
David Chinnere5720ee2008-04-10 12:21:18 +1000840 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100841 xfs_warn(mp, "failed to write sb changes");
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100842 goto out_rtunmount;
David Chinnere5720ee2008-04-10 12:21:18 +1000843 }
844 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
846 /*
847 * Initialise the XFS quota management subsystem for this mount
848 */
Christoph Hellwig149e53a2021-08-06 11:05:37 -0700849 if (XFS_IS_QUOTA_ON(mp)) {
Christoph Hellwig7d095252009-06-08 15:33:32 +0200850 error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
851 if (error)
852 goto out_rtunmount;
853 } else {
Christoph Hellwig7d095252009-06-08 15:33:32 +0200854 /*
855 * If a file system had quotas running earlier, but decided to
856 * mount without -o uquota/pquota/gquota options, revoke the
857 * quotachecked license.
858 */
859 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100860 xfs_notice(mp, "resetting quota flags");
Christoph Hellwig7d095252009-06-08 15:33:32 +0200861 error = xfs_mount_reset_sbqflags(mp);
862 if (error)
Brian Fostera70a4fa2014-07-15 07:41:25 +1000863 goto out_rtunmount;
Christoph Hellwig7d095252009-06-08 15:33:32 +0200864 }
865 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867 /*
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000868 * Finish recovering the file system. This part needed to be delayed
869 * until after the root and real-time bitmap inodes were consistently
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700870 * read in. Temporarily create per-AG space reservations for metadata
871 * btree shape changes because space freeing transactions (for inode
872 * inactivation) require the per-AG reservation in lieu of reserving
873 * blocks.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 */
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700875 error = xfs_fs_reserve_ag_blocks(mp);
876 if (error && error == -ENOSPC)
877 xfs_warn(mp,
878 "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
Christoph Hellwig42490232008-08-13 16:49:32 +1000879 error = xfs_log_mount_finish(mp);
Darrick J. Wong81ed9472021-06-18 11:57:07 -0700880 xfs_fs_unreserve_ag_blocks(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 if (error) {
Dave Chinner0b932cc2011-03-07 10:08:35 +1100882 xfs_warn(mp, "log mount finish failed");
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100883 goto out_rtunmount;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 }
885
886 /*
Dave Chinnerddeb14f2016-09-26 08:21:44 +1000887 * Now the log is fully replayed, we can transition to full read-only
888 * mode for read-only mounts. This will sync all the metadata and clean
889 * the log so that the recovery we just performed does not have to be
890 * replayed again on the next mount.
891 *
892 * We use the same quiesce mechanism as the rw->ro remount, as they are
893 * semantically identical operations.
894 */
Dave Chinner0560f312021-08-18 18:46:52 -0700895 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !xfs_has_norecovery(mp))
Brian Fosterea2064d2021-01-22 16:48:24 -0800896 xfs_log_clean(mp);
Dave Chinnerddeb14f2016-09-26 08:21:44 +1000897
898 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 * Complete the quota initialisation, post-log-replay component.
900 */
Christoph Hellwig7d095252009-06-08 15:33:32 +0200901 if (quotamount) {
902 ASSERT(mp->m_qflags == 0);
903 mp->m_qflags = quotaflags;
904
905 xfs_qm_mount_quotas(mp);
906 }
907
David Chinner84e1e992007-06-18 16:50:27 +1000908 /*
909 * Now we are mounted, reserve a small amount of unused space for
910 * privileged transactions. This is needed so that transaction
911 * space required for critical operations can dip into this pool
912 * when at ENOSPC. This is needed for operations like create with
913 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
914 * are not allowed to use this reserved space.
Dave Chinner8babd8a2010-03-04 01:46:25 +0000915 *
916 * This may drive us straight to ENOSPC on mount, but that implies
917 * we were already there on the last unmount. Warn if this occurs.
David Chinner84e1e992007-06-18 16:50:27 +1000918 */
Eric Sandeend5db0f92010-02-05 22:59:53 +0000919 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
920 resblks = xfs_default_resblks(mp);
921 error = xfs_reserve_blocks(mp, &resblks, NULL);
922 if (error)
Dave Chinner0b932cc2011-03-07 10:08:35 +1100923 xfs_warn(mp,
924 "Unable to allocate reserve blocks. Continuing without reserve pool.");
Darrick J. Wong174edb02016-10-03 09:11:39 -0700925
926 /* Recover any CoW blocks that never got remapped. */
927 error = xfs_reflink_recover_cow(mp);
928 if (error) {
929 xfs_err(mp,
930 "Error %d recovering leftover CoW allocations.", error);
931 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
932 goto out_quota;
933 }
Darrick J. Wong84d69612016-10-03 09:11:44 -0700934
935 /* Reserve AG blocks for future btree expansion. */
936 error = xfs_fs_reserve_ag_blocks(mp);
937 if (error && error != -ENOSPC)
938 goto out_agresv;
Eric Sandeend5db0f92010-02-05 22:59:53 +0000939 }
David Chinner84e1e992007-06-18 16:50:27 +1000940
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 return 0;
942
Darrick J. Wong84d69612016-10-03 09:11:44 -0700943 out_agresv:
944 xfs_fs_unreserve_ag_blocks(mp);
Darrick J. Wong174edb02016-10-03 09:11:39 -0700945 out_quota:
946 xfs_qm_unmount_quotas(mp);
Christoph Hellwigb93b6e42009-02-04 09:33:58 +0100947 out_rtunmount:
948 xfs_rtunmount_inodes(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100949 out_rele_rip:
Darrick J. Wong44a87362018-07-25 12:52:32 -0700950 xfs_irele(rip);
Darrick J. Wong77aff8c2017-08-10 14:20:29 -0700951 /* Clean out dquots that might be in memory after quotacheck. */
952 xfs_qm_unmount(mp);
Dave Chinnerab23a772021-08-06 11:05:39 -0700953
954 /*
955 * Inactivate all inodes that might still be in memory after a log
956 * intent recovery failure so that reclaim can free them. Metadata
957 * inodes and the root directory shouldn't need inactivation, but the
958 * mount failed for some reason, so pull down all the state and flee.
959 */
960 xfs_inodegc_flush(mp);
961
Darrick J. Wong2d1d1da2017-11-08 16:26:49 -0800962 /*
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800963 * Flush all inode reclamation work and flush the log.
Darrick J. Wong2d1d1da2017-11-08 16:26:49 -0800964 * We have to do this /after/ rtunmount and qm_unmount because those
965 * two will have scheduled delayed reclaim for the rt/quota inodes.
966 *
967 * This is slightly different from the unmountfs call sequence
968 * because we could be tearing down a partially set up mount. In
969 * particular, if log_mount_finish fails we bail out without calling
970 * qm_unmount_quotas and therefore rely on qm_unmount to release the
971 * quota inodes.
972 */
Darrick J. Wongd336f7e2021-03-02 09:32:53 -0800973 xfs_unmount_flush_inodes(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100974 out_log_dealloc:
Brian Fosterf0b2efa2015-08-19 09:58:36 +1000975 xfs_log_mount_cancel(mp);
Darrick J. Wong40b1de0072021-08-06 11:05:43 -0700976 out_inodegc_shrinker:
977 unregister_shrinker(&mp->m_inodegc_shrinker);
Dave Chinnerd4f35122012-04-23 15:59:06 +1000978 out_fail_wait:
979 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
Brian Foster10fb9ac2021-01-22 16:48:19 -0800980 xfs_buftarg_drain(mp->m_logdev_targp);
981 xfs_buftarg_drain(mp->m_ddev_targp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100982 out_free_perag:
Christoph Hellwigff4f0382008-08-13 16:50:47 +1000983 xfs_free_perag(mp);
Dave Chinner0650b552014-06-06 15:01:58 +1000984 out_free_dir:
985 xfs_da_unmount(mp);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100986 out_remove_uuid:
Christoph Hellwig27174202009-03-30 10:21:31 +0200987 xfs_uuid_unmount(mp);
Darrick J. Wong31965ef2017-06-20 17:54:46 -0700988 out_remove_errortag:
989 xfs_errortag_del(mp);
Carlos Maiolino192852b2016-05-18 10:58:51 +1000990 out_remove_error_sysfs:
991 xfs_error_sysfs_del(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +1100992 out_del_stats:
993 xfs_sysfs_del(&mp->m_stats.xs_kobj);
Brian Fostera31b1d32014-07-15 08:07:01 +1000994 out_remove_sysfs:
995 xfs_sysfs_del(&mp->m_kobj);
Christoph Hellwigf9057e32009-02-04 09:31:52 +0100996 out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 return error;
998}
999
1000/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 * This flushes out the inodes,dquots and the superblock, unmounts the
1002 * log and makes sure that incore structures are freed.
1003 */
Christoph Hellwig41b5c2e2008-08-13 16:49:57 +10001004void
1005xfs_unmountfs(
1006 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07001008 uint64_t resblks;
Christoph Hellwig41b5c2e2008-08-13 16:49:57 +10001009 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010
Dave Chinnerab23a772021-08-06 11:05:39 -07001011 /*
1012 * Perform all on-disk metadata updates required to inactivate inodes
1013 * that the VFS evicted earlier in the unmount process. Freeing inodes
1014 * and discarding CoW fork preallocations can cause shape changes to
1015 * the free inode and refcount btrees, respectively, so we must finish
1016 * this before we discard the metadata space reservations. Metadata
1017 * inodes and the root directory do not require inactivation.
1018 */
1019 xfs_inodegc_flush(mp);
1020
Darrick J. Wongc9a65262021-01-22 16:48:44 -08001021 xfs_blockgc_stop(mp);
Darrick J. Wong84d69612016-10-03 09:11:44 -07001022 xfs_fs_unreserve_ag_blocks(mp);
Christoph Hellwig7d095252009-06-08 15:33:32 +02001023 xfs_qm_unmount_quotas(mp);
Christoph Hellwigb93b6e42009-02-04 09:33:58 +01001024 xfs_rtunmount_inodes(mp);
Darrick J. Wong44a87362018-07-25 12:52:32 -07001025 xfs_irele(mp->m_rootip);
Christoph Hellwig77508ec2008-08-13 16:49:04 +10001026
Darrick J. Wongd336f7e2021-03-02 09:32:53 -08001027 xfs_unmount_flush_inodes(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028
Christoph Hellwig7d095252009-06-08 15:33:32 +02001029 xfs_qm_unmount(mp);
Lachlan McIlroya357a122008-10-30 16:53:25 +11001030
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 /*
David Chinner84e1e992007-06-18 16:50:27 +10001032 * Unreserve any blocks we have so that when we unmount we don't account
1033 * the reserved free space as used. This is really only necessary for
1034 * lazy superblock counting because it trusts the incore superblock
Malcolm Parsons9da096f2009-03-29 09:55:42 +02001035 * counters to be absolutely correct on clean unmount.
David Chinner84e1e992007-06-18 16:50:27 +10001036 *
1037 * We don't bother correcting this elsewhere for lazy superblock
1038 * counting because on mount of an unclean filesystem we reconstruct the
1039 * correct counter value and this is irrelevant.
1040 *
1041 * For non-lazy counter filesystems, this doesn't matter at all because
1042 * we only every apply deltas to the superblock and hence the incore
1043 * value does not matter....
1044 */
1045 resblks = 0;
David Chinner714082b2008-04-10 12:20:03 +10001046 error = xfs_reserve_blocks(mp, &resblks, NULL);
1047 if (error)
Dave Chinner0b932cc2011-03-07 10:08:35 +11001048 xfs_warn(mp, "Unable to free reserved block pool. "
David Chinner714082b2008-04-10 12:20:03 +10001049 "Freespace may not be correct on next mount.");
1050
Christoph Hellwig21b699c2009-03-16 08:19:29 +01001051 xfs_log_unmount(mp);
Dave Chinner0650b552014-06-06 15:01:58 +10001052 xfs_da_unmount(mp);
Christoph Hellwig27174202009-03-30 10:21:31 +02001053 xfs_uuid_unmount(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
Christoph Hellwig1550d0b2008-08-13 16:17:37 +10001055#if defined(DEBUG)
Darrick J. Wong31965ef2017-06-20 17:54:46 -07001056 xfs_errortag_clearall(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057#endif
Darrick J. Wong40b1de0072021-08-06 11:05:43 -07001058 unregister_shrinker(&mp->m_inodegc_shrinker);
Christoph Hellwigff4f0382008-08-13 16:50:47 +10001059 xfs_free_perag(mp);
Brian Fostera31b1d32014-07-15 08:07:01 +10001060
Darrick J. Wong31965ef2017-06-20 17:54:46 -07001061 xfs_errortag_del(mp);
Carlos Maiolino192852b2016-05-18 10:58:51 +10001062 xfs_error_sysfs_del(mp);
Bill O'Donnell225e4632015-10-12 18:21:19 +11001063 xfs_sysfs_del(&mp->m_stats.xs_kobj);
Brian Fostera31b1d32014-07-15 08:07:01 +10001064 xfs_sysfs_del(&mp->m_kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065}
1066
Brian Foster91ee5752014-11-28 14:02:59 +11001067/*
1068 * Determine whether modifications can proceed. The caller specifies the minimum
1069 * freeze level for which modifications should not be allowed. This allows
1070 * certain operations to proceed while the freeze sequence is in progress, if
1071 * necessary.
1072 */
1073bool
1074xfs_fs_writable(
1075 struct xfs_mount *mp,
1076 int level)
David Chinner92821e22007-05-24 15:26:31 +10001077{
Brian Foster91ee5752014-11-28 14:02:59 +11001078 ASSERT(level > SB_UNFROZEN);
1079 if ((mp->m_super->s_writers.frozen >= level) ||
1080 XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
1081 return false;
1082
1083 return true;
David Chinner92821e22007-05-24 15:26:31 +10001084}
1085
Dave Chinner0d485ad2015-02-23 21:22:03 +11001086int
1087xfs_mod_fdblocks(
1088 struct xfs_mount *mp,
1089 int64_t delta,
1090 bool rsvd)
1091{
1092 int64_t lcounter;
1093 long long res_used;
1094 s32 batch;
Brian Fosterfd43cf62021-04-28 15:06:05 -07001095 uint64_t set_aside;
Dave Chinner0d485ad2015-02-23 21:22:03 +11001096
1097 if (delta > 0) {
1098 /*
1099 * If the reserve pool is depleted, put blocks back into it
1100 * first. Most of the time the pool is full.
1101 */
1102 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1103 percpu_counter_add(&mp->m_fdblocks, delta);
1104 return 0;
1105 }
1106
1107 spin_lock(&mp->m_sb_lock);
1108 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1109
1110 if (res_used > delta) {
1111 mp->m_resblks_avail += delta;
1112 } else {
1113 delta -= res_used;
1114 mp->m_resblks_avail = mp->m_resblks;
1115 percpu_counter_add(&mp->m_fdblocks, delta);
1116 }
1117 spin_unlock(&mp->m_sb_lock);
1118 return 0;
1119 }
1120
1121 /*
1122 * Taking blocks away, need to be more accurate the closer we
1123 * are to zero.
1124 *
Dave Chinner0d485ad2015-02-23 21:22:03 +11001125 * If the counter has a value of less than 2 * max batch size,
1126 * then make everything serialise as we are real close to
1127 * ENOSPC.
1128 */
Dave Chinner8c1903d2015-05-29 07:39:34 +10001129 if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
1130 XFS_FDBLOCKS_BATCH) < 0)
Dave Chinner0d485ad2015-02-23 21:22:03 +11001131 batch = 1;
1132 else
Dave Chinner8c1903d2015-05-29 07:39:34 +10001133 batch = XFS_FDBLOCKS_BATCH;
Dave Chinner0d485ad2015-02-23 21:22:03 +11001134
Brian Fosterfd43cf62021-04-28 15:06:05 -07001135 /*
1136 * Set aside allocbt blocks because these blocks are tracked as free
1137 * space but not available for allocation. Technically this means that a
1138 * single reservation cannot consume all remaining free space, but the
1139 * ratio of allocbt blocks to usable free blocks should be rather small.
1140 * The tradeoff without this is that filesystems that maintain high
1141 * perag block reservations can over reserve physical block availability
1142 * and fail physical allocation, which leads to much more serious
1143 * problems (i.e. transaction abort, pagecache discards, etc.) than
1144 * slightly premature -ENOSPC.
1145 */
1146 set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
Nikolay Borisov104b4e52017-06-20 21:01:20 +03001147 percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
Brian Fosterfd43cf62021-04-28 15:06:05 -07001148 if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
Dave Chinner8c1903d2015-05-29 07:39:34 +10001149 XFS_FDBLOCKS_BATCH) >= 0) {
Dave Chinner0d485ad2015-02-23 21:22:03 +11001150 /* we had space! */
1151 return 0;
1152 }
1153
1154 /*
1155 * lock up the sb for dipping into reserves before releasing the space
1156 * that took us to ENOSPC.
1157 */
1158 spin_lock(&mp->m_sb_lock);
1159 percpu_counter_add(&mp->m_fdblocks, -delta);
1160 if (!rsvd)
1161 goto fdblocks_enospc;
1162
1163 lcounter = (long long)mp->m_resblks_avail + delta;
1164 if (lcounter >= 0) {
1165 mp->m_resblks_avail = lcounter;
1166 spin_unlock(&mp->m_sb_lock);
1167 return 0;
1168 }
Eric Sandeenec43f6d2020-04-27 11:00:42 -07001169 xfs_warn_once(mp,
1170"Reserve blocks depleted! Consider increasing reserve pool size.");
1171
Dave Chinner0d485ad2015-02-23 21:22:03 +11001172fdblocks_enospc:
1173 spin_unlock(&mp->m_sb_lock);
1174 return -ENOSPC;
1175}
1176
Dave Chinnerbab98bb2015-02-23 21:22:54 +11001177int
1178xfs_mod_frextents(
1179 struct xfs_mount *mp,
1180 int64_t delta)
1181{
1182 int64_t lcounter;
1183 int ret = 0;
1184
1185 spin_lock(&mp->m_sb_lock);
1186 lcounter = mp->m_sb.sb_frextents + delta;
1187 if (lcounter < 0)
1188 ret = -ENOSPC;
1189 else
1190 mp->m_sb.sb_frextents = lcounter;
1191 spin_unlock(&mp->m_sb_lock);
1192 return ret;
1193}
1194
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 * Used to free the superblock along various error paths.
1197 */
1198void
1199xfs_freesb(
Dave Chinner26af6552010-09-22 10:47:20 +10001200 struct xfs_mount *mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201{
Dave Chinner26af6552010-09-22 10:47:20 +10001202 struct xfs_buf *bp = mp->m_sb_bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Dave Chinner26af6552010-09-22 10:47:20 +10001204 xfs_buf_lock(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 mp->m_sb_bp = NULL;
Dave Chinner26af6552010-09-22 10:47:20 +10001206 xfs_buf_relse(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207}
1208
1209/*
Christoph Hellwigdda35b82010-02-15 09:44:46 +00001210 * If the underlying (data/log/rt) device is readonly, there are some
1211 * operations that cannot proceed.
1212 */
1213int
1214xfs_dev_is_read_only(
1215 struct xfs_mount *mp,
1216 char *message)
1217{
1218 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1219 xfs_readonly_buftarg(mp->m_logdev_targp) ||
1220 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
Dave Chinner0b932cc2011-03-07 10:08:35 +11001221 xfs_notice(mp, "%s required on read-only device.", message);
1222 xfs_notice(mp, "write access unavailable, cannot proceed.");
Dave Chinner24513372014-06-25 14:58:08 +10001223 return -EROFS;
Christoph Hellwigdda35b82010-02-15 09:44:46 +00001224 }
1225 return 0;
1226}
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001227
1228/* Force the summary counters to be recalculated at next mount. */
1229void
1230xfs_force_summary_recalc(
1231 struct xfs_mount *mp)
1232{
Dave Chinner38c26bf2021-08-18 18:46:37 -07001233 if (!xfs_has_lazysbcount(mp))
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001234 return;
1235
Darrick J. Wong39353ff2019-04-12 07:41:15 -07001236 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
Darrick J. Wongf467cad2018-07-20 09:28:40 -07001237}
Darrick J. Wong9fe82b82019-04-25 18:26:22 -07001238
1239/*
Darrick J. Wong908ce712021-08-08 08:27:12 -07001240 * Enable a log incompat feature flag in the primary superblock. The caller
1241 * cannot have any other transactions in progress.
1242 */
1243int
1244xfs_add_incompat_log_feature(
1245 struct xfs_mount *mp,
1246 uint32_t feature)
1247{
1248 struct xfs_dsb *dsb;
1249 int error;
1250
1251 ASSERT(hweight32(feature) == 1);
1252 ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
1253
1254 /*
1255 * Force the log to disk and kick the background AIL thread to reduce
1256 * the chances that the bwrite will stall waiting for the AIL to unpin
1257 * the primary superblock buffer. This isn't a data integrity
1258 * operation, so we don't need a synchronous push.
1259 */
1260 error = xfs_log_force(mp, XFS_LOG_SYNC);
1261 if (error)
1262 return error;
1263 xfs_ail_push_all(mp->m_ail);
1264
1265 /*
1266 * Lock the primary superblock buffer to serialize all callers that
1267 * are trying to set feature bits.
1268 */
1269 xfs_buf_lock(mp->m_sb_bp);
1270 xfs_buf_hold(mp->m_sb_bp);
1271
1272 if (XFS_FORCED_SHUTDOWN(mp)) {
1273 error = -EIO;
1274 goto rele;
1275 }
1276
1277 if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
1278 goto rele;
1279
1280 /*
1281 * Write the primary superblock to disk immediately, because we need
1282 * the log_incompat bit to be set in the primary super now to protect
1283 * the log items that we're going to commit later.
1284 */
1285 dsb = mp->m_sb_bp->b_addr;
1286 xfs_sb_to_disk(dsb, &mp->m_sb);
1287 dsb->sb_features_log_incompat |= cpu_to_be32(feature);
1288 error = xfs_bwrite(mp->m_sb_bp);
1289 if (error)
1290 goto shutdown;
1291
1292 /*
1293 * Add the feature bits to the incore superblock before we unlock the
1294 * buffer.
1295 */
1296 xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
1297 xfs_buf_relse(mp->m_sb_bp);
1298
1299 /* Log the superblock to disk. */
1300 return xfs_sync_sb(mp, false);
1301shutdown:
1302 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1303rele:
1304 xfs_buf_relse(mp->m_sb_bp);
1305 return error;
1306}
1307
1308/*
1309 * Clear all the log incompat flags from the superblock.
1310 *
1311 * The caller cannot be in a transaction, must ensure that the log does not
1312 * contain any log items protected by any log incompat bit, and must ensure
1313 * that there are no other threads that depend on the state of the log incompat
1314 * feature flags in the primary super.
1315 *
1316 * Returns true if the superblock is dirty.
1317 */
1318bool
1319xfs_clear_incompat_log_features(
1320 struct xfs_mount *mp)
1321{
1322 bool ret = false;
1323
1324 if (!xfs_sb_version_hascrc(&mp->m_sb) ||
1325 !xfs_sb_has_incompat_log_feature(&mp->m_sb,
1326 XFS_SB_FEAT_INCOMPAT_LOG_ALL) ||
1327 XFS_FORCED_SHUTDOWN(mp))
1328 return false;
1329
1330 /*
1331 * Update the incore superblock. We synchronize on the primary super
1332 * buffer lock to be consistent with the add function, though at least
1333 * in theory this shouldn't be necessary.
1334 */
1335 xfs_buf_lock(mp->m_sb_bp);
1336 xfs_buf_hold(mp->m_sb_bp);
1337
1338 if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
1339 XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
1340 xfs_info(mp, "Clearing log incompat feature flags.");
1341 xfs_sb_remove_incompat_log_features(&mp->m_sb);
1342 ret = true;
1343 }
1344
1345 xfs_buf_relse(mp->m_sb_bp);
1346 return ret;
1347}
1348
1349/*
Darrick J. Wong9fe82b82019-04-25 18:26:22 -07001350 * Update the in-core delayed block counter.
1351 *
1352 * We prefer to update the counter without having to take a spinlock for every
1353 * counter update (i.e. batching). Each change to delayed allocation
1354 * reservations can change can easily exceed the default percpu counter
1355 * batching, so we use a larger batch factor here.
1356 *
1357 * Note that we don't currently have any callers requiring fast summation
1358 * (e.g. percpu_counter_read) so we can use a big batch value here.
1359 */
1360#define XFS_DELALLOC_BATCH (4096)
1361void
1362xfs_mod_delalloc(
1363 struct xfs_mount *mp,
1364 int64_t delta)
1365{
1366 percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
1367 XFS_DELALLOC_BATCH);
1368}