blob: 41198a5f872c1adb491bdd7401a0bfb63fbd85bb [file] [log] [blame]
Darrick J. Wongdcb660f2017-10-17 21:37:36 -07001/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
Darrick J. Wong80e4e122017-10-17 21:37:42 -070033#include "xfs_icache.h"
34#include "xfs_itable.h"
Darrick J. Wongdcb660f2017-10-17 21:37:36 -070035#include "xfs_alloc.h"
36#include "xfs_alloc_btree.h"
37#include "xfs_bmap.h"
38#include "xfs_bmap_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_ialloc_btree.h"
41#include "xfs_refcount.h"
42#include "xfs_refcount_btree.h"
43#include "xfs_rmap.h"
44#include "xfs_rmap_btree.h"
Darrick J. Wong3daa6642017-10-17 21:37:40 -070045#include "xfs_log.h"
46#include "xfs_trans_priv.h"
Darrick J. Wong87d9d602018-05-14 06:34:33 -070047#include "xfs_attr.h"
48#include "xfs_reflink.h"
Darrick J. Wongdcb660f2017-10-17 21:37:36 -070049#include "scrub/xfs_scrub.h"
50#include "scrub/scrub.h"
51#include "scrub/common.h"
52#include "scrub/trace.h"
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -070053#include "scrub/btree.h"
Darrick J. Wong0a9633f2018-05-29 22:18:08 -070054#include "scrub/repair.h"
Darrick J. Wongdcb660f2017-10-17 21:37:36 -070055
56/* Common code for the metadata scrubbers. */
57
Darrick J. Wong4700d222017-10-17 21:37:36 -070058/*
59 * Handling operational errors.
60 *
61 * The *_process_error() family of functions are used to process error return
62 * codes from functions called as part of a scrub operation.
63 *
64 * If there's no error, we return true to tell the caller that it's ok
65 * to move on to the next check in its list.
66 *
67 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
68 * caller that something bad happened, and we preserve *error so that
69 * the caller can return the *error up the stack to userspace.
70 *
71 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
72 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
73 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
74 * not via return codes. We return false to tell the caller that
75 * something bad happened. Since the error has been cleared, the caller
76 * will (presumably) return that zero and scrubbing will move on to
77 * whatever's next.
78 *
79 * ftrace can be used to record the precise metadata location and the
80 * approximate code location of the failed operation.
81 */
82
83/* Check for operational errors. */
Darrick J. Wong64b12562018-01-16 18:52:14 -080084static bool
85__xfs_scrub_process_error(
86 struct xfs_scrub_context *sc,
87 xfs_agnumber_t agno,
88 xfs_agblock_t bno,
89 int *error,
90 __u32 errflag,
91 void *ret_ip)
92{
93 switch (*error) {
94 case 0:
95 return true;
96 case -EDEADLOCK:
97 /* Used to restart an op with deadlock avoidance. */
98 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
99 break;
100 case -EFSBADCRC:
101 case -EFSCORRUPTED:
102 /* Note the badness but don't abort. */
103 sc->sm->sm_flags |= errflag;
104 *error = 0;
105 /* fall through */
106 default:
107 trace_xfs_scrub_op_error(sc, agno, bno, *error,
108 ret_ip);
109 break;
110 }
111 return false;
112}
113
Darrick J. Wong4700d222017-10-17 21:37:36 -0700114bool
115xfs_scrub_process_error(
116 struct xfs_scrub_context *sc,
117 xfs_agnumber_t agno,
118 xfs_agblock_t bno,
119 int *error)
120{
Darrick J. Wong64b12562018-01-16 18:52:14 -0800121 return __xfs_scrub_process_error(sc, agno, bno, error,
122 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
123}
124
125bool
126xfs_scrub_xref_process_error(
127 struct xfs_scrub_context *sc,
128 xfs_agnumber_t agno,
129 xfs_agblock_t bno,
130 int *error)
131{
132 return __xfs_scrub_process_error(sc, agno, bno, error,
133 XFS_SCRUB_OFLAG_XFAIL, __return_address);
Darrick J. Wong4700d222017-10-17 21:37:36 -0700134}
135
136/* Check for operational errors for a file offset. */
Darrick J. Wong64b12562018-01-16 18:52:14 -0800137static bool
138__xfs_scrub_fblock_process_error(
Darrick J. Wong4700d222017-10-17 21:37:36 -0700139 struct xfs_scrub_context *sc,
140 int whichfork,
141 xfs_fileoff_t offset,
Darrick J. Wong64b12562018-01-16 18:52:14 -0800142 int *error,
143 __u32 errflag,
144 void *ret_ip)
Darrick J. Wong4700d222017-10-17 21:37:36 -0700145{
146 switch (*error) {
147 case 0:
148 return true;
149 case -EDEADLOCK:
150 /* Used to restart an op with deadlock avoidance. */
151 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
152 break;
153 case -EFSBADCRC:
154 case -EFSCORRUPTED:
155 /* Note the badness but don't abort. */
Darrick J. Wong64b12562018-01-16 18:52:14 -0800156 sc->sm->sm_flags |= errflag;
Darrick J. Wong4700d222017-10-17 21:37:36 -0700157 *error = 0;
158 /* fall through */
159 default:
160 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
Darrick J. Wong64b12562018-01-16 18:52:14 -0800161 ret_ip);
Darrick J. Wong4700d222017-10-17 21:37:36 -0700162 break;
163 }
164 return false;
165}
166
Darrick J. Wong64b12562018-01-16 18:52:14 -0800167bool
168xfs_scrub_fblock_process_error(
169 struct xfs_scrub_context *sc,
170 int whichfork,
171 xfs_fileoff_t offset,
172 int *error)
173{
174 return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
175 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
176}
177
178bool
179xfs_scrub_fblock_xref_process_error(
180 struct xfs_scrub_context *sc,
181 int whichfork,
182 xfs_fileoff_t offset,
183 int *error)
184{
185 return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
186 XFS_SCRUB_OFLAG_XFAIL, __return_address);
187}
188
Darrick J. Wong4700d222017-10-17 21:37:36 -0700189/*
190 * Handling scrub corruption/optimization/warning checks.
191 *
192 * The *_set_{corrupt,preen,warning}() family of functions are used to
193 * record the presence of metadata that is incorrect (corrupt), could be
194 * optimized somehow (preen), or should be flagged for administrative
195 * review but is not incorrect (warn).
196 *
197 * ftrace can be used to record the precise metadata location and
198 * approximate code location of the failed check.
199 */
200
201/* Record a block which could be optimized. */
202void
203xfs_scrub_block_set_preen(
204 struct xfs_scrub_context *sc,
205 struct xfs_buf *bp)
206{
207 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
208 trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
209}
210
211/*
212 * Record an inode which could be optimized. The trace data will
213 * include the block given by bp if bp is given; otherwise it will use
214 * the block location of the inode record itself.
215 */
216void
217xfs_scrub_ino_set_preen(
218 struct xfs_scrub_context *sc,
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700219 xfs_ino_t ino)
Darrick J. Wong4700d222017-10-17 21:37:36 -0700220{
221 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700222 trace_xfs_scrub_ino_preen(sc, ino, __return_address);
Darrick J. Wong4700d222017-10-17 21:37:36 -0700223}
224
225/* Record a corrupt block. */
226void
227xfs_scrub_block_set_corrupt(
228 struct xfs_scrub_context *sc,
229 struct xfs_buf *bp)
230{
231 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
232 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
233}
234
Darrick J. Wong64b12562018-01-16 18:52:14 -0800235/* Record a corruption while cross-referencing. */
236void
237xfs_scrub_block_xref_set_corrupt(
238 struct xfs_scrub_context *sc,
239 struct xfs_buf *bp)
240{
241 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
242 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
243}
244
Darrick J. Wong4700d222017-10-17 21:37:36 -0700245/*
246 * Record a corrupt inode. The trace data will include the block given
247 * by bp if bp is given; otherwise it will use the block location of the
248 * inode record itself.
249 */
250void
251xfs_scrub_ino_set_corrupt(
252 struct xfs_scrub_context *sc,
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700253 xfs_ino_t ino)
Darrick J. Wong4700d222017-10-17 21:37:36 -0700254{
255 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700256 trace_xfs_scrub_ino_error(sc, ino, __return_address);
Darrick J. Wong4700d222017-10-17 21:37:36 -0700257}
258
Darrick J. Wong64b12562018-01-16 18:52:14 -0800259/* Record a corruption while cross-referencing with an inode. */
260void
261xfs_scrub_ino_xref_set_corrupt(
262 struct xfs_scrub_context *sc,
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700263 xfs_ino_t ino)
Darrick J. Wong64b12562018-01-16 18:52:14 -0800264{
265 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700266 trace_xfs_scrub_ino_error(sc, ino, __return_address);
Darrick J. Wong64b12562018-01-16 18:52:14 -0800267}
268
Darrick J. Wong4700d222017-10-17 21:37:36 -0700269/* Record corruption in a block indexed by a file fork. */
270void
271xfs_scrub_fblock_set_corrupt(
272 struct xfs_scrub_context *sc,
273 int whichfork,
274 xfs_fileoff_t offset)
275{
276 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
277 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
278}
279
Darrick J. Wong64b12562018-01-16 18:52:14 -0800280/* Record a corruption while cross-referencing a fork block. */
281void
282xfs_scrub_fblock_xref_set_corrupt(
283 struct xfs_scrub_context *sc,
284 int whichfork,
285 xfs_fileoff_t offset)
286{
287 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
288 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
289}
290
Darrick J. Wong4700d222017-10-17 21:37:36 -0700291/*
292 * Warn about inodes that need administrative review but is not
293 * incorrect.
294 */
295void
296xfs_scrub_ino_set_warning(
297 struct xfs_scrub_context *sc,
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700298 xfs_ino_t ino)
Darrick J. Wong4700d222017-10-17 21:37:36 -0700299{
300 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
Darrick J. Wong7e56d9e2018-03-23 10:06:54 -0700301 trace_xfs_scrub_ino_warning(sc, ino, __return_address);
Darrick J. Wong4700d222017-10-17 21:37:36 -0700302}
303
304/* Warn about a block indexed by a file fork that needs review. */
305void
306xfs_scrub_fblock_set_warning(
307 struct xfs_scrub_context *sc,
308 int whichfork,
309 xfs_fileoff_t offset)
310{
311 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
312 trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
313}
314
315/* Signal an incomplete scrub. */
316void
317xfs_scrub_set_incomplete(
318 struct xfs_scrub_context *sc)
319{
320 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
321 trace_xfs_scrub_incomplete(sc, __return_address);
322}
323
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700324/*
Darrick J. Wongd8526572018-01-16 18:53:08 -0800325 * rmap scrubbing -- compute the number of blocks with a given owner,
326 * at least according to the reverse mapping data.
327 */
328
329struct xfs_scrub_rmap_ownedby_info {
330 struct xfs_owner_info *oinfo;
331 xfs_filblks_t *blocks;
332};
333
334STATIC int
335xfs_scrub_count_rmap_ownedby_irec(
336 struct xfs_btree_cur *cur,
337 struct xfs_rmap_irec *rec,
338 void *priv)
339{
340 struct xfs_scrub_rmap_ownedby_info *sroi = priv;
341 bool irec_attr;
342 bool oinfo_attr;
343
344 irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
345 oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
346
347 if (rec->rm_owner != sroi->oinfo->oi_owner)
348 return 0;
349
350 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
351 (*sroi->blocks) += rec->rm_blockcount;
352
353 return 0;
354}
355
356/*
357 * Calculate the number of blocks the rmap thinks are owned by something.
358 * The caller should pass us an rmapbt cursor.
359 */
360int
361xfs_scrub_count_rmap_ownedby_ag(
362 struct xfs_scrub_context *sc,
363 struct xfs_btree_cur *cur,
364 struct xfs_owner_info *oinfo,
365 xfs_filblks_t *blocks)
366{
367 struct xfs_scrub_rmap_ownedby_info sroi;
368
369 sroi.oinfo = oinfo;
370 *blocks = 0;
371 sroi.blocks = blocks;
372
373 return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
374 &sroi);
375}
376
377/*
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700378 * AG scrubbing
379 *
380 * These helpers facilitate locking an allocation group's header
381 * buffers, setting up cursors for all btrees that are present, and
382 * cleaning everything up once we're through.
383 */
384
Darrick J. Wongab9d5dc2017-10-17 21:37:39 -0700385/* Decide if we want to return an AG header read failure. */
386static inline bool
387want_ag_read_header_failure(
388 struct xfs_scrub_context *sc,
389 unsigned int type)
390{
391 /* Return all AG header read failures when scanning btrees. */
392 if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
Darrick J. Wonga12890a2017-10-17 21:37:39 -0700393 sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
394 sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
Darrick J. Wongab9d5dc2017-10-17 21:37:39 -0700395 return true;
396 /*
397 * If we're scanning a given type of AG header, we only want to
398 * see read failures from that specific header. We'd like the
399 * other headers to cross-check them, but this isn't required.
400 */
401 if (sc->sm->sm_type == type)
402 return true;
403 return false;
404}
405
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700406/*
407 * Grab all the headers for an AG.
408 *
409 * The headers should be released by xfs_scrub_ag_free, but as a fail
410 * safe we attach all the buffers we grab to the scrub transaction so
411 * they'll all be freed when we cancel it.
412 */
413int
414xfs_scrub_ag_read_headers(
415 struct xfs_scrub_context *sc,
416 xfs_agnumber_t agno,
417 struct xfs_buf **agi,
418 struct xfs_buf **agf,
419 struct xfs_buf **agfl)
420{
421 struct xfs_mount *mp = sc->mp;
422 int error;
423
424 error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
Darrick J. Wonga12890a2017-10-17 21:37:39 -0700425 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700426 goto out;
427
428 error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
Darrick J. Wongab9d5dc2017-10-17 21:37:39 -0700429 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700430 goto out;
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700431
432 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
Darrick J. Wongab9d5dc2017-10-17 21:37:39 -0700433 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700434 goto out;
Darrick J. Wong5a0f4332018-01-08 10:49:02 -0800435 error = 0;
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700436out:
437 return error;
438}
439
440/* Release all the AG btree cursors. */
441void
442xfs_scrub_ag_btcur_free(
443 struct xfs_scrub_ag *sa)
444{
445 if (sa->refc_cur)
446 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
447 if (sa->rmap_cur)
448 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
449 if (sa->fino_cur)
450 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
451 if (sa->ino_cur)
452 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
453 if (sa->cnt_cur)
454 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
455 if (sa->bno_cur)
456 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
457
458 sa->refc_cur = NULL;
459 sa->rmap_cur = NULL;
460 sa->fino_cur = NULL;
461 sa->ino_cur = NULL;
462 sa->bno_cur = NULL;
463 sa->cnt_cur = NULL;
464}
465
466/* Initialize all the btree cursors for an AG. */
467int
468xfs_scrub_ag_btcur_init(
469 struct xfs_scrub_context *sc,
470 struct xfs_scrub_ag *sa)
471{
472 struct xfs_mount *mp = sc->mp;
473 xfs_agnumber_t agno = sa->agno;
474
475 if (sa->agf_bp) {
476 /* Set up a bnobt cursor for cross-referencing. */
477 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
478 agno, XFS_BTNUM_BNO);
479 if (!sa->bno_cur)
480 goto err;
481
482 /* Set up a cntbt cursor for cross-referencing. */
483 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
484 agno, XFS_BTNUM_CNT);
485 if (!sa->cnt_cur)
486 goto err;
487 }
488
489 /* Set up a inobt cursor for cross-referencing. */
490 if (sa->agi_bp) {
491 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
492 agno, XFS_BTNUM_INO);
493 if (!sa->ino_cur)
494 goto err;
495 }
496
497 /* Set up a finobt cursor for cross-referencing. */
498 if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
499 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
500 agno, XFS_BTNUM_FINO);
501 if (!sa->fino_cur)
502 goto err;
503 }
504
505 /* Set up a rmapbt cursor for cross-referencing. */
506 if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
507 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
508 agno);
509 if (!sa->rmap_cur)
510 goto err;
511 }
512
513 /* Set up a refcountbt cursor for cross-referencing. */
514 if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
515 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
516 sa->agf_bp, agno, NULL);
517 if (!sa->refc_cur)
518 goto err;
519 }
520
521 return 0;
522err:
523 return -ENOMEM;
524}
525
526/* Release the AG header context and btree cursors. */
527void
528xfs_scrub_ag_free(
529 struct xfs_scrub_context *sc,
530 struct xfs_scrub_ag *sa)
531{
532 xfs_scrub_ag_btcur_free(sa);
533 if (sa->agfl_bp) {
534 xfs_trans_brelse(sc->tp, sa->agfl_bp);
535 sa->agfl_bp = NULL;
536 }
537 if (sa->agf_bp) {
538 xfs_trans_brelse(sc->tp, sa->agf_bp);
539 sa->agf_bp = NULL;
540 }
541 if (sa->agi_bp) {
542 xfs_trans_brelse(sc->tp, sa->agi_bp);
543 sa->agi_bp = NULL;
544 }
Darrick J. Wong51863d72018-05-29 22:24:44 -0700545 if (sa->pag) {
546 xfs_perag_put(sa->pag);
547 sa->pag = NULL;
548 }
Darrick J. Wongb6c1beb2017-10-17 21:37:38 -0700549 sa->agno = NULLAGNUMBER;
550}
551
552/*
553 * For scrub, grab the AGI and the AGF headers, in that order. Locking
554 * order requires us to get the AGI before the AGF. We use the
555 * transaction to avoid deadlocking on crosslinked metadata buffers;
556 * either the caller passes one in (bmap scrub) or we have to create a
557 * transaction ourselves.
558 */
559int
560xfs_scrub_ag_init(
561 struct xfs_scrub_context *sc,
562 xfs_agnumber_t agno,
563 struct xfs_scrub_ag *sa)
564{
565 int error;
566
567 sa->agno = agno;
568 error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
569 &sa->agf_bp, &sa->agfl_bp);
570 if (error)
571 return error;
572
573 return xfs_scrub_ag_btcur_init(sc, sa);
574}
575
Darrick J. Wong51863d72018-05-29 22:24:44 -0700576/*
577 * Grab the per-ag structure if we haven't already gotten it. Teardown of the
578 * xfs_scrub_ag will release it for us.
579 */
580void
581xfs_scrub_perag_get(
582 struct xfs_mount *mp,
583 struct xfs_scrub_ag *sa)
584{
585 if (!sa->pag)
586 sa->pag = xfs_perag_get(mp, sa->agno);
587}
588
Darrick J. Wongdcb660f2017-10-17 21:37:36 -0700589/* Per-scrubber setup functions */
590
Darrick J. Wong9d9c9022018-05-09 10:02:01 -0700591/*
592 * Grab an empty transaction so that we can re-grab locked buffers if
593 * one of our btrees turns out to be cyclic.
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700594 *
595 * If we're going to repair something, we need to ask for the largest possible
596 * log reservation so that we can handle the worst case scenario for metadata
597 * updates while rebuilding a metadata item. We also need to reserve as many
598 * blocks in the head transaction as we think we're going to need to rebuild
599 * the metadata object.
Darrick J. Wong9d9c9022018-05-09 10:02:01 -0700600 */
601int
602xfs_scrub_trans_alloc(
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700603 struct xfs_scrub_context *sc,
604 uint resblks)
Darrick J. Wong9d9c9022018-05-09 10:02:01 -0700605{
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700606 if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
607 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
608 resblks, 0, 0, &sc->tp);
609
Darrick J. Wong9d9c9022018-05-09 10:02:01 -0700610 return xfs_trans_alloc_empty(sc->mp, &sc->tp);
611}
612
Darrick J. Wongdcb660f2017-10-17 21:37:36 -0700613/* Set us up with a transaction and an empty context. */
614int
615xfs_scrub_setup_fs(
616 struct xfs_scrub_context *sc,
617 struct xfs_inode *ip)
618{
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700619 uint resblks;
620
621 resblks = xfs_repair_calc_ag_resblks(sc);
622 return xfs_scrub_trans_alloc(sc, resblks);
Darrick J. Wongdcb660f2017-10-17 21:37:36 -0700623}
Darrick J. Wongefa7a992017-10-17 21:37:40 -0700624
625/* Set us up with AG headers and btree cursors. */
626int
627xfs_scrub_setup_ag_btree(
628 struct xfs_scrub_context *sc,
629 struct xfs_inode *ip,
630 bool force_log)
631{
Darrick J. Wong3daa6642017-10-17 21:37:40 -0700632 struct xfs_mount *mp = sc->mp;
Darrick J. Wongefa7a992017-10-17 21:37:40 -0700633 int error;
634
Darrick J. Wong3daa6642017-10-17 21:37:40 -0700635 /*
636 * If the caller asks us to checkpont the log, do so. This
637 * expensive operation should be performed infrequently and only
638 * as a last resort. Any caller that sets force_log should
639 * document why they need to do so.
640 */
641 if (force_log) {
642 error = xfs_scrub_checkpoint_log(mp);
643 if (error)
644 return error;
645 }
646
Eric Sandeen8e630832018-01-08 10:41:34 -0800647 error = xfs_scrub_setup_fs(sc, ip);
Darrick J. Wongefa7a992017-10-17 21:37:40 -0700648 if (error)
649 return error;
650
651 return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
652}
Darrick J. Wong3daa6642017-10-17 21:37:40 -0700653
654/* Push everything out of the log onto disk. */
655int
656xfs_scrub_checkpoint_log(
657 struct xfs_mount *mp)
658{
659 int error;
660
Christoph Hellwig60e5bb72018-03-13 23:15:28 -0700661 error = xfs_log_force(mp, XFS_LOG_SYNC);
Darrick J. Wong3daa6642017-10-17 21:37:40 -0700662 if (error)
663 return error;
664 xfs_ail_push_all_sync(mp->m_ail);
665 return 0;
666}
Darrick J. Wong80e4e122017-10-17 21:37:42 -0700667
668/*
669 * Given an inode and the scrub control structure, grab either the
670 * inode referenced in the control structure or the inode passed in.
671 * The inode is not locked.
672 */
673int
674xfs_scrub_get_inode(
675 struct xfs_scrub_context *sc,
676 struct xfs_inode *ip_in)
677{
Darrick J. Wongd658e722018-01-08 10:49:04 -0800678 struct xfs_imap imap;
Darrick J. Wong80e4e122017-10-17 21:37:42 -0700679 struct xfs_mount *mp = sc->mp;
680 struct xfs_inode *ip = NULL;
681 int error;
682
Darrick J. Wong80e4e122017-10-17 21:37:42 -0700683 /* We want to scan the inode we already had opened. */
684 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
685 sc->ip = ip_in;
686 return 0;
687 }
688
689 /* Look up the inode, see if the generation number matches. */
690 if (xfs_internal_inum(mp, sc->sm->sm_ino))
691 return -ENOENT;
692 error = xfs_iget(mp, NULL, sc->sm->sm_ino,
693 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
Darrick J. Wongd658e722018-01-08 10:49:04 -0800694 switch (error) {
695 case -ENOENT:
696 /* Inode doesn't exist, just bail out. */
697 return error;
698 case 0:
699 /* Got an inode, continue. */
700 break;
701 case -EINVAL:
702 /*
703 * -EINVAL with IGET_UNTRUSTED could mean one of several
704 * things: userspace gave us an inode number that doesn't
705 * correspond to fs space, or doesn't have an inobt entry;
706 * or it could simply mean that the inode buffer failed the
707 * read verifiers.
708 *
709 * Try just the inode mapping lookup -- if it succeeds, then
710 * the inode buffer verifier failed and something needs fixing.
711 * Otherwise, we really couldn't find it so tell userspace
712 * that it no longer exists.
713 */
714 error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
715 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
716 if (error)
717 return -ENOENT;
718 error = -EFSCORRUPTED;
719 /* fall through */
720 default:
Darrick J. Wong80e4e122017-10-17 21:37:42 -0700721 trace_xfs_scrub_op_error(sc,
722 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
723 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
724 error, __return_address);
725 return error;
726 }
727 if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
728 iput(VFS_I(ip));
729 return -ENOENT;
730 }
731
732 sc->ip = ip;
733 return 0;
734}
Darrick J. Wonga5c46e52017-10-17 21:37:44 -0700735
736/* Set us up to scrub a file's contents. */
737int
738xfs_scrub_setup_inode_contents(
739 struct xfs_scrub_context *sc,
740 struct xfs_inode *ip,
741 unsigned int resblks)
742{
Darrick J. Wonga5c46e52017-10-17 21:37:44 -0700743 int error;
744
745 error = xfs_scrub_get_inode(sc, ip);
746 if (error)
747 return error;
748
749 /* Got the inode, lock it and we're ready to go. */
750 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
751 xfs_ilock(sc->ip, sc->ilock_flags);
Darrick J. Wong0a9633f2018-05-29 22:18:08 -0700752 error = xfs_scrub_trans_alloc(sc, resblks);
Darrick J. Wonga5c46e52017-10-17 21:37:44 -0700753 if (error)
754 goto out;
755 sc->ilock_flags |= XFS_ILOCK_EXCL;
756 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
757
758out:
759 /* scrub teardown will unlock and release the inode for us */
760 return error;
761}
Darrick J. Wong64b12562018-01-16 18:52:14 -0800762
763/*
764 * Predicate that decides if we need to evaluate the cross-reference check.
765 * If there was an error accessing the cross-reference btree, just delete
766 * the cursor and skip the check.
767 */
768bool
769xfs_scrub_should_check_xref(
770 struct xfs_scrub_context *sc,
771 int *error,
772 struct xfs_btree_cur **curpp)
773{
Darrick J. Wong8389f3ff2018-05-14 06:34:31 -0700774 /* No point in xref if we already know we're corrupt. */
775 if (xfs_scrub_skip_xref(sc->sm))
776 return false;
777
Darrick J. Wong64b12562018-01-16 18:52:14 -0800778 if (*error == 0)
779 return true;
780
781 if (curpp) {
782 /* If we've already given up on xref, just bail out. */
783 if (!*curpp)
784 return false;
785
786 /* xref error, delete cursor and bail out. */
787 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
788 *curpp = NULL;
789 }
790
791 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
792 trace_xfs_scrub_xref_error(sc, *error, __return_address);
793
794 /*
795 * Errors encountered during cross-referencing with another
796 * data structure should not cause this scrubber to abort.
797 */
798 *error = 0;
799 return false;
800}
Darrick J. Wongcf1b0b82018-01-16 18:53:11 -0800801
802/* Run the structure verifiers on in-memory buffers to detect bad memory. */
803void
804xfs_scrub_buffer_recheck(
805 struct xfs_scrub_context *sc,
806 struct xfs_buf *bp)
807{
808 xfs_failaddr_t fa;
809
810 if (bp->b_ops == NULL) {
811 xfs_scrub_block_set_corrupt(sc, bp);
812 return;
813 }
814 if (bp->b_ops->verify_struct == NULL) {
815 xfs_scrub_set_incomplete(sc);
816 return;
817 }
818 fa = bp->b_ops->verify_struct(bp);
819 if (!fa)
820 return;
821 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
822 trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
823}
Darrick J. Wong87d9d602018-05-14 06:34:33 -0700824
825/*
826 * Scrub the attr/data forks of a metadata inode. The metadata inode must be
827 * pointed to by sc->ip and the ILOCK must be held.
828 */
829int
830xfs_scrub_metadata_inode_forks(
831 struct xfs_scrub_context *sc)
832{
833 __u32 smtype;
834 bool shared;
835 int error;
836
837 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
838 return 0;
839
840 /* Metadata inodes don't live on the rt device. */
841 if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
842 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
843 return 0;
844 }
845
846 /* They should never participate in reflink. */
847 if (xfs_is_reflink_inode(sc->ip)) {
848 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
849 return 0;
850 }
851
852 /* They also should never have extended attributes. */
853 if (xfs_inode_hasattr(sc->ip)) {
854 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
855 return 0;
856 }
857
858 /* Invoke the data fork scrubber. */
859 smtype = sc->sm->sm_type;
860 sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
861 error = xfs_scrub_bmap_data(sc);
862 sc->sm->sm_type = smtype;
863 if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
864 return error;
865
866 /* Look for incorrect shared blocks. */
867 if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
868 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
869 &shared);
870 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
871 &error))
872 return error;
873 if (shared)
874 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
875 }
876
877 return error;
878}
Darrick J. Wongddd10c22018-05-14 06:34:34 -0700879
880/*
881 * Try to lock an inode in violation of the usual locking order rules. For
882 * example, trying to get the IOLOCK while in transaction context, or just
883 * plain breaking AG-order or inode-order inode locking rules. Either way,
884 * the only way to avoid an ABBA deadlock is to use trylock and back off if
885 * we can't.
886 */
887int
888xfs_scrub_ilock_inverted(
889 struct xfs_inode *ip,
890 uint lock_mode)
891{
892 int i;
893
894 for (i = 0; i < 20; i++) {
895 if (xfs_ilock_nowait(ip, lock_mode))
896 return 0;
897 delay(1);
898 }
899 return -EDEADLOCK;
900}