blob: be21a298400156ebb81fdac488a81b910975a774 [file] [log] [blame]
Darrick J. Wong84d42ea2018-05-14 06:34:36 -07001/*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_alloc.h"
35#include "xfs_alloc_btree.h"
36#include "xfs_ialloc.h"
37#include "xfs_ialloc_btree.h"
38#include "xfs_rmap.h"
39#include "xfs_rmap_btree.h"
40#include "xfs_refcount.h"
41#include "xfs_refcount_btree.h"
42#include "xfs_extent_busy.h"
43#include "xfs_ag_resv.h"
44#include "xfs_trans_space.h"
45#include "scrub/xfs_scrub.h"
46#include "scrub/scrub.h"
47#include "scrub/common.h"
48#include "scrub/trace.h"
49#include "scrub/repair.h"
50
51/*
52 * Attempt to repair some metadata, if the metadata is corrupt and userspace
53 * told us to fix it. This function returns -EAGAIN to mean "re-run scrub",
54 * and will set *fixed to true if it thinks it repaired anything.
55 */
56int
57xfs_repair_attempt(
58 struct xfs_inode *ip,
59 struct xfs_scrub_context *sc,
60 bool *fixed)
61{
62 int error = 0;
63
64 trace_xfs_repair_attempt(ip, sc->sm, error);
65
66 xfs_scrub_ag_btcur_free(&sc->sa);
67
68 /* Repair whatever's broken. */
69 ASSERT(sc->ops->repair);
70 error = sc->ops->repair(sc);
71 trace_xfs_repair_done(ip, sc->sm, error);
72 switch (error) {
73 case 0:
74 /*
75 * Repair succeeded. Commit the fixes and perform a second
76 * scrub so that we can tell userspace if we fixed the problem.
77 */
78 sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
79 *fixed = true;
80 return -EAGAIN;
81 case -EDEADLOCK:
82 case -EAGAIN:
83 /* Tell the caller to try again having grabbed all the locks. */
84 if (!sc->try_harder) {
85 sc->try_harder = true;
86 return -EAGAIN;
87 }
88 /*
89 * We tried harder but still couldn't grab all the resources
90 * we needed to fix it. The corruption has not been fixed,
91 * so report back to userspace.
92 */
93 return -EFSCORRUPTED;
94 default:
95 return error;
96 }
97}
98
99/*
100 * Complain about unfixable problems in the filesystem. We don't log
101 * corruptions when IFLAG_REPAIR wasn't set on the assumption that the driver
102 * program is xfs_scrub, which will call back with IFLAG_REPAIR set if the
103 * administrator isn't running xfs_scrub in no-repairs mode.
104 *
105 * Use this helper function because _ratelimited silently declares a static
106 * structure to track rate limiting information.
107 */
108void
109xfs_repair_failure(
110 struct xfs_mount *mp)
111{
112 xfs_alert_ratelimited(mp,
113"Corruption not fixed during online repair. Unmount and run xfs_repair.");
114}
115
116/*
117 * Repair probe -- userspace uses this to probe if we're willing to repair a
118 * given mountpoint.
119 */
120int
121xfs_repair_probe(
122 struct xfs_scrub_context *sc)
123{
124 int error = 0;
125
126 if (xfs_scrub_should_terminate(sc, &error))
127 return error;
128
129 return 0;
130}
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700131
132/*
133 * Roll a transaction, keeping the AG headers locked and reinitializing
134 * the btree cursors.
135 */
136int
137xfs_repair_roll_ag_trans(
138 struct xfs_scrub_context *sc)
139{
140 int error;
141
142 /* Keep the AG header buffers locked so we can keep going. */
143 xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
144 xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
145 xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
146
147 /* Roll the transaction. */
148 error = xfs_trans_roll(&sc->tp);
149 if (error)
150 goto out_release;
151
152 /* Join AG headers to the new transaction. */
153 xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
154 xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
155 xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
156
157 return 0;
158
159out_release:
160 /*
161 * Rolling failed, so release the hold on the buffers. The
162 * buffers will be released during teardown on our way out
163 * of the kernel.
164 */
165 xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
166 xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
167 xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
168
169 return error;
170}
171
172/*
173 * Does the given AG have enough space to rebuild a btree? Neither AG
174 * reservation can be critical, and we must have enough space (factoring
175 * in AG reservations) to construct a whole btree.
176 */
177bool
178xfs_repair_ag_has_space(
179 struct xfs_perag *pag,
180 xfs_extlen_t nr_blocks,
181 enum xfs_ag_resv_type type)
182{
183 return !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
184 !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
185 pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
186}
187
188/*
189 * Figure out how many blocks to reserve for an AG repair. We calculate the
190 * worst case estimate for the number of blocks we'd need to rebuild one of
191 * any type of per-AG btree.
192 */
193xfs_extlen_t
194xfs_repair_calc_ag_resblks(
195 struct xfs_scrub_context *sc)
196{
197 struct xfs_mount *mp = sc->mp;
198 struct xfs_scrub_metadata *sm = sc->sm;
199 struct xfs_perag *pag;
200 struct xfs_buf *bp;
201 xfs_agino_t icount = 0;
202 xfs_extlen_t aglen = 0;
203 xfs_extlen_t usedlen;
204 xfs_extlen_t freelen;
205 xfs_extlen_t bnobt_sz;
206 xfs_extlen_t inobt_sz;
207 xfs_extlen_t rmapbt_sz;
208 xfs_extlen_t refcbt_sz;
209 int error;
210
211 if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
212 return 0;
213
214 /* Use in-core counters if possible. */
215 pag = xfs_perag_get(mp, sm->sm_agno);
216 if (pag->pagi_init)
217 icount = pag->pagi_count;
218
219 /*
220 * Otherwise try to get the actual counters from disk; if not, make
221 * some worst case assumptions.
222 */
223 if (icount == 0) {
224 error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp);
225 if (error) {
226 icount = mp->m_sb.sb_agblocks / mp->m_sb.sb_inopblock;
227 } else {
228 icount = pag->pagi_count;
229 xfs_buf_relse(bp);
230 }
231 }
232
233 /* Now grab the block counters from the AGF. */
234 error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
235 if (error) {
236 aglen = mp->m_sb.sb_agblocks;
237 freelen = aglen;
238 usedlen = aglen;
239 } else {
240 aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length);
241 freelen = pag->pagf_freeblks;
242 usedlen = aglen - freelen;
243 xfs_buf_relse(bp);
244 }
245 xfs_perag_put(pag);
246
247 trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
248 freelen, usedlen);
249
250 /*
251 * Figure out how many blocks we'd need worst case to rebuild
252 * each type of btree. Note that we can only rebuild the
253 * bnobt/cntbt or inobt/finobt as pairs.
254 */
255 bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
256 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
257 inobt_sz = xfs_iallocbt_calc_size(mp, icount /
258 XFS_INODES_PER_HOLEMASK_BIT);
259 else
260 inobt_sz = xfs_iallocbt_calc_size(mp, icount /
261 XFS_INODES_PER_CHUNK);
262 if (xfs_sb_version_hasfinobt(&mp->m_sb))
263 inobt_sz *= 2;
264 if (xfs_sb_version_hasreflink(&mp->m_sb))
265 refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
266 else
267 refcbt_sz = 0;
268 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
269 /*
270 * Guess how many blocks we need to rebuild the rmapbt.
271 * For non-reflink filesystems we can't have more records than
272 * used blocks. However, with reflink it's possible to have
273 * more than one rmap record per AG block. We don't know how
274 * many rmaps there could be in the AG, so we start off with
275 * what we hope is an generous over-estimation.
276 */
277 if (xfs_sb_version_hasreflink(&mp->m_sb))
278 rmapbt_sz = xfs_rmapbt_calc_size(mp,
279 (unsigned long long)aglen * 2);
280 else
281 rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
282 } else {
283 rmapbt_sz = 0;
284 }
285
286 trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
287 inobt_sz, rmapbt_sz, refcbt_sz);
288
289 return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
290}
Darrick J. Wong73d6b422018-05-29 22:18:09 -0700291
292/* Allocate a block in an AG. */
293int
294xfs_repair_alloc_ag_block(
295 struct xfs_scrub_context *sc,
296 struct xfs_owner_info *oinfo,
297 xfs_fsblock_t *fsbno,
298 enum xfs_ag_resv_type resv)
299{
300 struct xfs_alloc_arg args = {0};
301 xfs_agblock_t bno;
302 int error;
303
304 switch (resv) {
305 case XFS_AG_RESV_AGFL:
306 case XFS_AG_RESV_RMAPBT:
307 error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
308 if (error)
309 return error;
310 if (bno == NULLAGBLOCK)
311 return -ENOSPC;
312 xfs_extent_busy_reuse(sc->mp, sc->sa.agno, bno,
313 1, false);
314 *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
315 if (resv == XFS_AG_RESV_RMAPBT)
316 xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno);
317 return 0;
318 default:
319 break;
320 }
321
322 args.tp = sc->tp;
323 args.mp = sc->mp;
324 args.oinfo = *oinfo;
325 args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
326 args.minlen = 1;
327 args.maxlen = 1;
328 args.prod = 1;
329 args.type = XFS_ALLOCTYPE_THIS_AG;
330 args.resv = resv;
331
332 error = xfs_alloc_vextent(&args);
333 if (error)
334 return error;
335 if (args.fsbno == NULLFSBLOCK)
336 return -ENOSPC;
337 ASSERT(args.len == 1);
338 *fsbno = args.fsbno;
339
340 return 0;
341}
342
343/* Initialize a new AG btree root block with zero entries. */
344int
345xfs_repair_init_btblock(
346 struct xfs_scrub_context *sc,
347 xfs_fsblock_t fsb,
348 struct xfs_buf **bpp,
349 xfs_btnum_t btnum,
350 const struct xfs_buf_ops *ops)
351{
352 struct xfs_trans *tp = sc->tp;
353 struct xfs_mount *mp = sc->mp;
354 struct xfs_buf *bp;
355
356 trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
357 XFS_FSB_TO_AGBNO(mp, fsb), btnum);
358
359 ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
360 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
361 XFS_FSB_TO_BB(mp, 1), 0);
362 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
363 xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
364 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
365 xfs_trans_log_buf(tp, bp, 0, bp->b_length);
366 bp->b_ops = ops;
367 *bpp = bp;
368
369 return 0;
370}