blob: df0766132acea690e0d883e87a02365539810168 [file] [log] [blame]
Darrick J. Wong537964bc2017-10-17 21:37:37 -07001/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "scrub/scrub.h"
35#include "scrub/common.h"
36#include "scrub/btree.h"
37#include "scrub/trace.h"
38
39/* btree scrubbing */
40
41/*
42 * Check for btree operation errors. See the section about handling
43 * operational errors in common.c.
44 */
45bool
46xfs_scrub_btree_process_error(
47 struct xfs_scrub_context *sc,
48 struct xfs_btree_cur *cur,
49 int level,
50 int *error)
51{
52 if (*error == 0)
53 return true;
54
55 switch (*error) {
56 case -EDEADLOCK:
57 /* Used to restart an op with deadlock avoidance. */
58 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
59 break;
60 case -EFSBADCRC:
61 case -EFSCORRUPTED:
62 /* Note the badness but don't abort. */
63 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
64 *error = 0;
65 /* fall through */
66 default:
67 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
68 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
69 *error, __return_address);
70 else
71 trace_xfs_scrub_btree_op_error(sc, cur, level,
72 *error, __return_address);
73 break;
74 }
75 return false;
76}
77
78/* Record btree block corruption. */
79void
80xfs_scrub_btree_set_corrupt(
81 struct xfs_scrub_context *sc,
82 struct xfs_btree_cur *cur,
83 int level)
84{
85 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
86
87 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
88 trace_xfs_scrub_ifork_btree_error(sc, cur, level,
89 __return_address);
90 else
91 trace_xfs_scrub_btree_error(sc, cur, level,
92 __return_address);
93}
94
95/*
Darrick J. Wong37f3fa72017-10-17 21:37:37 -070096 * Make sure this record is in order and doesn't stray outside of the parent
97 * keys.
98 */
99STATIC void
100xfs_scrub_btree_rec(
101 struct xfs_scrub_btree *bs)
102{
103 struct xfs_btree_cur *cur = bs->cur;
104 union xfs_btree_rec *rec;
105 union xfs_btree_key key;
106 union xfs_btree_key hkey;
107 union xfs_btree_key *keyp;
108 struct xfs_btree_block *block;
109 struct xfs_btree_block *keyblock;
110 struct xfs_buf *bp;
111
112 block = xfs_btree_get_block(cur, 0, &bp);
113 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
114
115 trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
116
117 /* If this isn't the first record, are they in order? */
118 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
119 xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
120 bs->firstrec = false;
121 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
122
123 if (cur->bc_nlevels == 1)
124 return;
125
126 /* Is this at least as large as the parent low key? */
127 cur->bc_ops->init_key_from_rec(&key, rec);
128 keyblock = xfs_btree_get_block(cur, 1, &bp);
129 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
130 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
131 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
132
133 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
134 return;
135
136 /* Is this no larger than the parent high key? */
137 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
138 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
139 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
140 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
141}
142
143/*
144 * Make sure this key is in order and doesn't stray outside of the parent
145 * keys.
146 */
147STATIC void
148xfs_scrub_btree_key(
149 struct xfs_scrub_btree *bs,
150 int level)
151{
152 struct xfs_btree_cur *cur = bs->cur;
153 union xfs_btree_key *key;
154 union xfs_btree_key *keyp;
155 struct xfs_btree_block *block;
156 struct xfs_btree_block *keyblock;
157 struct xfs_buf *bp;
158
159 block = xfs_btree_get_block(cur, level, &bp);
160 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
161
162 trace_xfs_scrub_btree_key(bs->sc, cur, level);
163
164 /* If this isn't the first key, are they in order? */
165 if (!bs->firstkey[level] &&
166 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
167 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
168 bs->firstkey[level] = false;
169 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
170
171 if (level + 1 >= cur->bc_nlevels)
172 return;
173
174 /* Is this at least as large as the parent low key? */
175 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
176 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
177 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
178 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
179
180 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
181 return;
182
183 /* Is this no larger than the parent high key? */
184 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
185 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
186 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
187 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
188}
189
190/*
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700191 * Check a btree pointer. Returns true if it's ok to use this pointer.
192 * Callers do not need to set the corrupt flag.
193 */
194static bool
195xfs_scrub_btree_ptr_ok(
196 struct xfs_scrub_btree *bs,
197 int level,
198 union xfs_btree_ptr *ptr)
199{
200 bool res;
201
202 /* A btree rooted in an inode has no block pointer to the root. */
203 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
204 level == bs->cur->bc_nlevels)
205 return true;
206
207 /* Otherwise, check the pointers. */
208 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
209 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
210 else
211 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
212 if (!res)
213 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
214
215 return res;
216}
217
218/* Check that a btree block's sibling matches what we expect it. */
219STATIC int
220xfs_scrub_btree_block_check_sibling(
221 struct xfs_scrub_btree *bs,
222 int level,
223 int direction,
224 union xfs_btree_ptr *sibling)
225{
226 struct xfs_btree_cur *cur = bs->cur;
227 struct xfs_btree_block *pblock;
228 struct xfs_buf *pbp;
229 struct xfs_btree_cur *ncur = NULL;
230 union xfs_btree_ptr *pp;
231 int success;
232 int error;
233
234 error = xfs_btree_dup_cursor(cur, &ncur);
235 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
236 !ncur)
237 return error;
238
239 /*
240 * If the pointer is null, we shouldn't be able to move the upper
241 * level pointer anywhere.
242 */
243 if (xfs_btree_ptr_is_null(cur, sibling)) {
244 if (direction > 0)
245 error = xfs_btree_increment(ncur, level + 1, &success);
246 else
247 error = xfs_btree_decrement(ncur, level + 1, &success);
248 if (error == 0 && success)
249 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
250 error = 0;
251 goto out;
252 }
253
254 /* Increment upper level pointer. */
255 if (direction > 0)
256 error = xfs_btree_increment(ncur, level + 1, &success);
257 else
258 error = xfs_btree_decrement(ncur, level + 1, &success);
259 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
260 goto out;
261 if (!success) {
262 xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
263 goto out;
264 }
265
266 /* Compare upper level pointer to sibling pointer. */
267 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
268 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
269 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
270 goto out;
271
272 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
273 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
274out:
275 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
276 return error;
277}
278
279/* Check the siblings of a btree block. */
280STATIC int
281xfs_scrub_btree_block_check_siblings(
282 struct xfs_scrub_btree *bs,
283 struct xfs_btree_block *block)
284{
285 struct xfs_btree_cur *cur = bs->cur;
286 union xfs_btree_ptr leftsib;
287 union xfs_btree_ptr rightsib;
288 int level;
289 int error = 0;
290
291 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
292 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
293 level = xfs_btree_get_level(block);
294
295 /* Root block should never have siblings. */
296 if (level == cur->bc_nlevels - 1) {
297 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
298 !xfs_btree_ptr_is_null(cur, &rightsib))
299 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
300 goto out;
301 }
302
303 /*
304 * Does the left & right sibling pointers match the adjacent
305 * parent level pointers?
306 * (These function absorbs error codes for us.)
307 */
308 error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
309 if (error)
310 return error;
311 error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
312 if (error)
313 return error;
314out:
315 return error;
316}
317
318/*
319 * Grab and scrub a btree block given a btree pointer. Returns block
320 * and buffer pointers (if applicable) if they're ok to use.
321 */
322STATIC int
323xfs_scrub_btree_get_block(
324 struct xfs_scrub_btree *bs,
325 int level,
326 union xfs_btree_ptr *pp,
327 struct xfs_btree_block **pblock,
328 struct xfs_buf **pbp)
329{
330 void *failed_at;
331 int error;
332
333 *pblock = NULL;
334 *pbp = NULL;
335
336 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
337 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
Darrick J. Wonga605e862017-11-06 12:09:29 -0800338 !*pblock)
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700339 return error;
340
341 xfs_btree_get_block(bs->cur, level, pbp);
342 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
343 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
344 level, *pbp);
345 else
346 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
347 level, *pbp);
348 if (failed_at) {
349 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
350 return 0;
351 }
352
353 /*
354 * Check the block's siblings; this function absorbs error codes
355 * for us.
356 */
357 return xfs_scrub_btree_block_check_siblings(bs, *pblock);
358}
359
360/*
Darrick J. Wong2fdbec52017-10-25 15:03:46 -0700361 * Check that the low and high keys of this block match the keys stored
362 * in the parent block.
363 */
364STATIC void
365xfs_scrub_btree_block_keys(
366 struct xfs_scrub_btree *bs,
367 int level,
368 struct xfs_btree_block *block)
369{
370 union xfs_btree_key block_keys;
371 struct xfs_btree_cur *cur = bs->cur;
372 union xfs_btree_key *high_bk;
373 union xfs_btree_key *parent_keys;
374 union xfs_btree_key *high_pk;
375 struct xfs_btree_block *parent_block;
376 struct xfs_buf *bp;
377
378 if (level >= cur->bc_nlevels - 1)
379 return;
380
381 /* Calculate the keys for this block. */
382 xfs_btree_get_keys(cur, block, &block_keys);
383
384 /* Obtain the parent's copy of the keys for this block. */
385 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
386 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
387 parent_block);
388
389 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
390 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
391
392 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
393 return;
394
395 /* Get high keys */
396 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
397 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
398 parent_block);
399
400 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
401 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
402}
403
404/*
Darrick J. Wong537964bc2017-10-17 21:37:37 -0700405 * Visit all nodes and leaves of a btree. Check that all pointers and
406 * records are in order, that the keys reflect the records, and use a callback
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700407 * so that the caller can verify individual records.
Darrick J. Wong537964bc2017-10-17 21:37:37 -0700408 */
409int
410xfs_scrub_btree(
411 struct xfs_scrub_context *sc,
412 struct xfs_btree_cur *cur,
413 xfs_scrub_btree_rec_fn scrub_fn,
414 struct xfs_owner_info *oinfo,
415 void *private)
416{
Christoph Hellwig88aa5de2017-11-06 11:53:58 -0800417 struct xfs_scrub_btree bs = { NULL };
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700418 union xfs_btree_ptr ptr;
419 union xfs_btree_ptr *pp;
Darrick J. Wong37f3fa72017-10-17 21:37:37 -0700420 union xfs_btree_rec *recp;
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700421 struct xfs_btree_block *block;
422 int level;
423 struct xfs_buf *bp;
424 int i;
425 int error = 0;
Darrick J. Wong537964bc2017-10-17 21:37:37 -0700426
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700427 /* Initialize scrub state */
428 bs.cur = cur;
429 bs.scrub_rec = scrub_fn;
430 bs.oinfo = oinfo;
431 bs.firstrec = true;
432 bs.private = private;
433 bs.sc = sc;
434 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
435 bs.firstkey[i] = true;
436 INIT_LIST_HEAD(&bs.to_check);
437
438 /* Don't try to check a tree with a height we can't handle. */
439 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
440 xfs_scrub_btree_set_corrupt(sc, cur, 0);
441 goto out;
442 }
443
444 /*
445 * Load the root of the btree. The helper function absorbs
446 * error codes for us.
447 */
448 level = cur->bc_nlevels - 1;
449 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
450 if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
451 goto out;
452 error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
453 if (error || !block)
454 goto out;
455
456 cur->bc_ptrs[level] = 1;
457
458 while (level < cur->bc_nlevels) {
459 block = xfs_btree_get_block(cur, level, &bp);
460
461 if (level == 0) {
462 /* End of leaf, pop back towards the root. */
463 if (cur->bc_ptrs[level] >
464 be16_to_cpu(block->bb_numrecs)) {
Darrick J. Wong2fdbec52017-10-25 15:03:46 -0700465 xfs_scrub_btree_block_keys(&bs, level, block);
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700466 if (level < cur->bc_nlevels - 1)
467 cur->bc_ptrs[level + 1]++;
468 level++;
469 continue;
470 }
471
Darrick J. Wong37f3fa72017-10-17 21:37:37 -0700472 /* Records in order for scrub? */
473 xfs_scrub_btree_rec(&bs);
474
475 /* Call out to the record checker. */
476 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
477 error = bs.scrub_rec(&bs, recp);
478 if (error)
479 break;
480 if (xfs_scrub_should_terminate(sc, &error) ||
481 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700482 break;
483
484 cur->bc_ptrs[level]++;
485 continue;
486 }
487
488 /* End of node, pop back towards the root. */
489 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
Darrick J. Wong2fdbec52017-10-25 15:03:46 -0700490 xfs_scrub_btree_block_keys(&bs, level, block);
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700491 if (level < cur->bc_nlevels - 1)
492 cur->bc_ptrs[level + 1]++;
493 level++;
494 continue;
495 }
496
Darrick J. Wong37f3fa72017-10-17 21:37:37 -0700497 /* Keys in order for scrub? */
498 xfs_scrub_btree_key(&bs, level);
499
Darrick J. Wongcc3e0942017-10-17 21:37:37 -0700500 /* Drill another level deeper. */
501 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
502 if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
503 cur->bc_ptrs[level]++;
504 continue;
505 }
506 level--;
507 error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
508 if (error || !block)
509 goto out;
510
511 cur->bc_ptrs[level] = 1;
512 }
513
514out:
Darrick J. Wong537964bc2017-10-17 21:37:37 -0700515 return error;
516}