Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Assorted bcache debug code |
| 3 | * |
| 4 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> |
| 5 | * Copyright 2012 Google, Inc. |
| 6 | */ |
| 7 | |
| 8 | #include "bcache.h" |
| 9 | #include "btree.h" |
| 10 | #include "debug.h" |
| 11 | #include "request.h" |
| 12 | |
| 13 | #include <linux/console.h> |
| 14 | #include <linux/debugfs.h> |
| 15 | #include <linux/module.h> |
| 16 | #include <linux/random.h> |
| 17 | #include <linux/seq_file.h> |
| 18 | |
| 19 | static struct dentry *debug; |
| 20 | |
| 21 | const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) |
| 22 | { |
| 23 | unsigned i; |
| 24 | |
| 25 | for (i = 0; i < KEY_PTRS(k); i++) |
| 26 | if (ptr_available(c, k, i)) { |
| 27 | struct cache *ca = PTR_CACHE(c, k, i); |
| 28 | size_t bucket = PTR_BUCKET_NR(c, k, i); |
| 29 | size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); |
| 30 | |
| 31 | if (KEY_SIZE(k) + r > c->sb.bucket_size) |
| 32 | return "bad, length too big"; |
| 33 | if (bucket < ca->sb.first_bucket) |
| 34 | return "bad, short offset"; |
| 35 | if (bucket >= ca->sb.nbuckets) |
| 36 | return "bad, offset past end of device"; |
| 37 | if (ptr_stale(c, k, i)) |
| 38 | return "stale"; |
| 39 | } |
| 40 | |
| 41 | if (!bkey_cmp(k, &ZERO_KEY)) |
| 42 | return "bad, null key"; |
| 43 | if (!KEY_PTRS(k)) |
| 44 | return "bad, no pointers"; |
| 45 | if (!KEY_SIZE(k)) |
| 46 | return "zeroed key"; |
| 47 | return ""; |
| 48 | } |
| 49 | |
| 50 | struct keyprint_hack bch_pkey(const struct bkey *k) |
| 51 | { |
| 52 | unsigned i = 0; |
| 53 | struct keyprint_hack r; |
| 54 | char *out = r.s, *end = r.s + KEYHACK_SIZE; |
| 55 | |
| 56 | #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) |
| 57 | |
| 58 | p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k)); |
| 59 | |
| 60 | if (KEY_PTRS(k)) |
| 61 | while (1) { |
| 62 | p("%llu:%llu gen %llu", |
| 63 | PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i)); |
| 64 | |
| 65 | if (++i == KEY_PTRS(k)) |
| 66 | break; |
| 67 | |
| 68 | p(", "); |
| 69 | } |
| 70 | |
| 71 | p("]"); |
| 72 | |
| 73 | if (KEY_DIRTY(k)) |
| 74 | p(" dirty"); |
| 75 | if (KEY_CSUM(k)) |
| 76 | p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); |
| 77 | #undef p |
| 78 | return r; |
| 79 | } |
| 80 | |
| 81 | struct keyprint_hack bch_pbtree(const struct btree *b) |
| 82 | { |
| 83 | struct keyprint_hack r; |
| 84 | |
| 85 | snprintf(r.s, 40, "%li level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), |
| 86 | b->level, b->c->root ? b->c->root->level : -1); |
| 87 | return r; |
| 88 | } |
| 89 | |
| 90 | #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) |
| 91 | |
| 92 | static bool skipped_backwards(struct btree *b, struct bkey *k) |
| 93 | { |
| 94 | return bkey_cmp(k, (!b->level) |
| 95 | ? &START_KEY(bkey_next(k)) |
| 96 | : bkey_next(k)) > 0; |
| 97 | } |
| 98 | |
| 99 | static void dump_bset(struct btree *b, struct bset *i) |
| 100 | { |
| 101 | struct bkey *k; |
| 102 | unsigned j; |
| 103 | |
| 104 | for (k = i->start; k < end(i); k = bkey_next(k)) { |
| 105 | printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), |
| 106 | (uint64_t *) k - i->d, i->keys, pkey(k)); |
| 107 | |
| 108 | for (j = 0; j < KEY_PTRS(k); j++) { |
| 109 | size_t n = PTR_BUCKET_NR(b->c, k, j); |
| 110 | printk(" bucket %zu", n); |
| 111 | |
| 112 | if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) |
| 113 | printk(" prio %i", |
| 114 | PTR_BUCKET(b->c, k, j)->prio); |
| 115 | } |
| 116 | |
| 117 | printk(" %s\n", bch_ptr_status(b->c, k)); |
| 118 | |
| 119 | if (bkey_next(k) < end(i) && |
| 120 | skipped_backwards(b, k)) |
| 121 | printk(KERN_ERR "Key skipped backwards\n"); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | #endif |
| 126 | |
| 127 | #ifdef CONFIG_BCACHE_DEBUG |
| 128 | |
| 129 | void bch_btree_verify(struct btree *b, struct bset *new) |
| 130 | { |
| 131 | struct btree *v = b->c->verify_data; |
| 132 | struct closure cl; |
| 133 | closure_init_stack(&cl); |
| 134 | |
| 135 | if (!b->c->verify) |
| 136 | return; |
| 137 | |
| 138 | closure_wait_event(&b->io.wait, &cl, |
| 139 | atomic_read(&b->io.cl.remaining) == -1); |
| 140 | |
| 141 | mutex_lock(&b->c->verify_lock); |
| 142 | |
| 143 | bkey_copy(&v->key, &b->key); |
| 144 | v->written = 0; |
| 145 | v->level = b->level; |
| 146 | |
| 147 | bch_btree_read(v); |
| 148 | closure_wait_event(&v->io.wait, &cl, |
| 149 | atomic_read(&b->io.cl.remaining) == -1); |
| 150 | |
| 151 | if (new->keys != v->sets[0].data->keys || |
| 152 | memcmp(new->start, |
| 153 | v->sets[0].data->start, |
| 154 | (void *) end(new) - (void *) new->start)) { |
| 155 | unsigned i, j; |
| 156 | |
| 157 | console_lock(); |
| 158 | |
| 159 | printk(KERN_ERR "*** original memory node:\n"); |
| 160 | for (i = 0; i <= b->nsets; i++) |
| 161 | dump_bset(b, b->sets[i].data); |
| 162 | |
| 163 | printk(KERN_ERR "*** sorted memory node:\n"); |
| 164 | dump_bset(b, new); |
| 165 | |
| 166 | printk(KERN_ERR "*** on disk node:\n"); |
| 167 | dump_bset(v, v->sets[0].data); |
| 168 | |
| 169 | for (j = 0; j < new->keys; j++) |
| 170 | if (new->d[j] != v->sets[0].data->d[j]) |
| 171 | break; |
| 172 | |
| 173 | console_unlock(); |
| 174 | panic("verify failed at %u\n", j); |
| 175 | } |
| 176 | |
| 177 | mutex_unlock(&b->c->verify_lock); |
| 178 | } |
| 179 | |
| 180 | static void data_verify_endio(struct bio *bio, int error) |
| 181 | { |
| 182 | struct closure *cl = bio->bi_private; |
| 183 | closure_put(cl); |
| 184 | } |
| 185 | |
| 186 | void bch_data_verify(struct search *s) |
| 187 | { |
| 188 | char name[BDEVNAME_SIZE]; |
| 189 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); |
| 190 | struct closure *cl = &s->cl; |
| 191 | struct bio *check; |
| 192 | struct bio_vec *bv; |
| 193 | int i; |
| 194 | |
| 195 | if (!s->unaligned_bvec) |
| 196 | bio_for_each_segment(bv, s->orig_bio, i) |
| 197 | bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; |
| 198 | |
| 199 | check = bio_clone(s->orig_bio, GFP_NOIO); |
| 200 | if (!check) |
| 201 | return; |
| 202 | |
| 203 | if (bio_alloc_pages(check, GFP_NOIO)) |
| 204 | goto out_put; |
| 205 | |
| 206 | check->bi_rw = READ_SYNC; |
| 207 | check->bi_private = cl; |
| 208 | check->bi_end_io = data_verify_endio; |
| 209 | |
| 210 | closure_bio_submit(check, cl, &dc->disk); |
| 211 | closure_sync(cl); |
| 212 | |
| 213 | bio_for_each_segment(bv, s->orig_bio, i) { |
| 214 | void *p1 = kmap(bv->bv_page); |
| 215 | void *p2 = kmap(check->bi_io_vec[i].bv_page); |
| 216 | |
| 217 | if (memcmp(p1 + bv->bv_offset, |
| 218 | p2 + bv->bv_offset, |
| 219 | bv->bv_len)) |
| 220 | printk(KERN_ERR "bcache (%s): verify failed" |
| 221 | " at sector %llu\n", |
| 222 | bdevname(dc->bdev, name), |
| 223 | (uint64_t) s->orig_bio->bi_sector); |
| 224 | |
| 225 | kunmap(bv->bv_page); |
| 226 | kunmap(check->bi_io_vec[i].bv_page); |
| 227 | } |
| 228 | |
| 229 | __bio_for_each_segment(bv, check, i, 0) |
| 230 | __free_page(bv->bv_page); |
| 231 | out_put: |
| 232 | bio_put(check); |
| 233 | } |
| 234 | |
| 235 | #endif |
| 236 | |
| 237 | #ifdef CONFIG_BCACHE_EDEBUG |
| 238 | |
| 239 | unsigned bch_count_data(struct btree *b) |
| 240 | { |
| 241 | unsigned ret = 0; |
| 242 | struct btree_iter iter; |
| 243 | struct bkey *k; |
| 244 | |
| 245 | if (!b->level) |
| 246 | for_each_key(b, k, &iter) |
| 247 | ret += KEY_SIZE(k); |
| 248 | return ret; |
| 249 | } |
| 250 | |
| 251 | static void vdump_bucket_and_panic(struct btree *b, const char *fmt, |
| 252 | va_list args) |
| 253 | { |
| 254 | unsigned i; |
| 255 | |
| 256 | console_lock(); |
| 257 | |
| 258 | for (i = 0; i <= b->nsets; i++) |
| 259 | dump_bset(b, b->sets[i].data); |
| 260 | |
| 261 | vprintk(fmt, args); |
| 262 | |
| 263 | console_unlock(); |
| 264 | |
| 265 | panic("at %s\n", pbtree(b)); |
| 266 | } |
| 267 | |
| 268 | void bch_check_key_order_msg(struct btree *b, struct bset *i, |
| 269 | const char *fmt, ...) |
| 270 | { |
| 271 | struct bkey *k; |
| 272 | |
| 273 | if (!i->keys) |
| 274 | return; |
| 275 | |
| 276 | for (k = i->start; bkey_next(k) < end(i); k = bkey_next(k)) |
| 277 | if (skipped_backwards(b, k)) { |
| 278 | va_list args; |
| 279 | va_start(args, fmt); |
| 280 | |
| 281 | vdump_bucket_and_panic(b, fmt, args); |
| 282 | va_end(args); |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | void bch_check_keys(struct btree *b, const char *fmt, ...) |
| 287 | { |
| 288 | va_list args; |
| 289 | struct bkey *k, *p = NULL; |
| 290 | struct btree_iter iter; |
| 291 | |
| 292 | if (b->level) |
| 293 | return; |
| 294 | |
| 295 | for_each_key(b, k, &iter) { |
| 296 | if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) { |
| 297 | printk(KERN_ERR "Keys out of order:\n"); |
| 298 | goto bug; |
| 299 | } |
| 300 | |
| 301 | if (bch_ptr_invalid(b, k)) |
| 302 | continue; |
| 303 | |
| 304 | if (p && bkey_cmp(p, &START_KEY(k)) > 0) { |
| 305 | printk(KERN_ERR "Overlapping keys:\n"); |
| 306 | goto bug; |
| 307 | } |
| 308 | p = k; |
| 309 | } |
| 310 | return; |
| 311 | bug: |
| 312 | va_start(args, fmt); |
| 313 | vdump_bucket_and_panic(b, fmt, args); |
| 314 | va_end(args); |
| 315 | } |
| 316 | |
| 317 | #endif |
| 318 | |
| 319 | #ifdef CONFIG_DEBUG_FS |
| 320 | |
| 321 | /* XXX: cache set refcounting */ |
| 322 | |
| 323 | struct dump_iterator { |
| 324 | char buf[PAGE_SIZE]; |
| 325 | size_t bytes; |
| 326 | struct cache_set *c; |
| 327 | struct keybuf keys; |
| 328 | }; |
| 329 | |
| 330 | static bool dump_pred(struct keybuf *buf, struct bkey *k) |
| 331 | { |
| 332 | return true; |
| 333 | } |
| 334 | |
| 335 | static ssize_t bch_dump_read(struct file *file, char __user *buf, |
| 336 | size_t size, loff_t *ppos) |
| 337 | { |
| 338 | struct dump_iterator *i = file->private_data; |
| 339 | ssize_t ret = 0; |
| 340 | |
| 341 | while (size) { |
| 342 | struct keybuf_key *w; |
| 343 | unsigned bytes = min(i->bytes, size); |
| 344 | |
| 345 | int err = copy_to_user(buf, i->buf, bytes); |
| 346 | if (err) |
| 347 | return err; |
| 348 | |
| 349 | ret += bytes; |
| 350 | buf += bytes; |
| 351 | size -= bytes; |
| 352 | i->bytes -= bytes; |
| 353 | memmove(i->buf, i->buf + bytes, i->bytes); |
| 354 | |
| 355 | if (i->bytes) |
| 356 | break; |
| 357 | |
| 358 | w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); |
| 359 | if (!w) |
| 360 | break; |
| 361 | |
| 362 | i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); |
| 363 | bch_keybuf_del(&i->keys, w); |
| 364 | } |
| 365 | |
| 366 | return ret; |
| 367 | } |
| 368 | |
| 369 | static int bch_dump_open(struct inode *inode, struct file *file) |
| 370 | { |
| 371 | struct cache_set *c = inode->i_private; |
| 372 | struct dump_iterator *i; |
| 373 | |
| 374 | i = kzalloc(sizeof(struct dump_iterator), GFP_KERNEL); |
| 375 | if (!i) |
| 376 | return -ENOMEM; |
| 377 | |
| 378 | file->private_data = i; |
| 379 | i->c = c; |
| 380 | bch_keybuf_init(&i->keys, dump_pred); |
| 381 | i->keys.last_scanned = KEY(0, 0, 0); |
| 382 | |
| 383 | return 0; |
| 384 | } |
| 385 | |
| 386 | static int bch_dump_release(struct inode *inode, struct file *file) |
| 387 | { |
| 388 | kfree(file->private_data); |
| 389 | return 0; |
| 390 | } |
| 391 | |
| 392 | static const struct file_operations cache_set_debug_ops = { |
| 393 | .owner = THIS_MODULE, |
| 394 | .open = bch_dump_open, |
| 395 | .read = bch_dump_read, |
| 396 | .release = bch_dump_release |
| 397 | }; |
| 398 | |
| 399 | void bch_debug_init_cache_set(struct cache_set *c) |
| 400 | { |
| 401 | if (!IS_ERR_OR_NULL(debug)) { |
| 402 | char name[50]; |
| 403 | snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); |
| 404 | |
| 405 | c->debug = debugfs_create_file(name, 0400, debug, c, |
| 406 | &cache_set_debug_ops); |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | #endif |
| 411 | |
| 412 | #ifdef CONFIG_BCACHE_DEBUG |
| 413 | static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, |
| 414 | const char *buffer, size_t size) |
| 415 | { |
| 416 | void dump(struct btree *b) |
| 417 | { |
| 418 | struct bset *i; |
| 419 | |
| 420 | for (i = b->sets[0].data; |
| 421 | index(i, b) < btree_blocks(b) && |
| 422 | i->seq == b->sets[0].data->seq; |
| 423 | i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) |
| 424 | dump_bset(b, i); |
| 425 | } |
| 426 | |
| 427 | struct cache_sb *sb; |
| 428 | struct cache_set *c; |
| 429 | struct btree *all[3], *b, *fill, *orig; |
| 430 | int j; |
| 431 | |
| 432 | struct btree_op op; |
| 433 | bch_btree_op_init_stack(&op); |
| 434 | |
| 435 | sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); |
| 436 | if (!sb) |
| 437 | return -ENOMEM; |
| 438 | |
| 439 | sb->bucket_size = 128; |
| 440 | sb->block_size = 4; |
| 441 | |
| 442 | c = bch_cache_set_alloc(sb); |
| 443 | if (!c) |
| 444 | return -ENOMEM; |
| 445 | |
| 446 | for (j = 0; j < 3; j++) { |
| 447 | BUG_ON(list_empty(&c->btree_cache)); |
| 448 | all[j] = list_first_entry(&c->btree_cache, struct btree, list); |
| 449 | list_del_init(&all[j]->list); |
| 450 | |
| 451 | all[j]->key = KEY(0, 0, c->sb.bucket_size); |
| 452 | bkey_copy_key(&all[j]->key, &MAX_KEY); |
| 453 | } |
| 454 | |
| 455 | b = all[0]; |
| 456 | fill = all[1]; |
| 457 | orig = all[2]; |
| 458 | |
| 459 | while (1) { |
| 460 | for (j = 0; j < 3; j++) |
| 461 | all[j]->written = all[j]->nsets = 0; |
| 462 | |
| 463 | bch_bset_init_next(b); |
| 464 | |
| 465 | while (1) { |
| 466 | struct bset *i = write_block(b); |
| 467 | struct bkey *k = op.keys.top; |
| 468 | unsigned rand; |
| 469 | |
| 470 | bkey_init(k); |
| 471 | rand = get_random_int(); |
| 472 | |
| 473 | op.type = rand & 1 |
| 474 | ? BTREE_INSERT |
| 475 | : BTREE_REPLACE; |
| 476 | rand >>= 1; |
| 477 | |
| 478 | SET_KEY_SIZE(k, bucket_remainder(c, rand)); |
| 479 | rand >>= c->bucket_bits; |
| 480 | rand &= 1024 * 512 - 1; |
| 481 | rand += c->sb.bucket_size; |
| 482 | SET_KEY_OFFSET(k, rand); |
| 483 | #if 0 |
| 484 | SET_KEY_PTRS(k, 1); |
| 485 | #endif |
| 486 | bch_keylist_push(&op.keys); |
| 487 | bch_btree_insert_keys(b, &op); |
| 488 | |
| 489 | if (should_split(b) || |
| 490 | set_blocks(i, b->c) != |
| 491 | __set_blocks(i, i->keys + 15, b->c)) { |
| 492 | i->csum = csum_set(i); |
| 493 | |
| 494 | memcpy(write_block(fill), |
| 495 | i, set_bytes(i)); |
| 496 | |
| 497 | b->written += set_blocks(i, b->c); |
| 498 | fill->written = b->written; |
| 499 | if (b->written == btree_blocks(b)) |
| 500 | break; |
| 501 | |
| 502 | bch_btree_sort_lazy(b); |
| 503 | bch_bset_init_next(b); |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | memcpy(orig->sets[0].data, |
| 508 | fill->sets[0].data, |
| 509 | btree_bytes(c)); |
| 510 | |
| 511 | bch_btree_sort(b); |
| 512 | fill->written = 0; |
| 513 | bch_btree_read_done(&fill->io.cl); |
| 514 | |
| 515 | if (b->sets[0].data->keys != fill->sets[0].data->keys || |
| 516 | memcmp(b->sets[0].data->start, |
| 517 | fill->sets[0].data->start, |
| 518 | b->sets[0].data->keys * sizeof(uint64_t))) { |
| 519 | struct bset *i = b->sets[0].data; |
| 520 | struct bkey *k, *l; |
| 521 | |
| 522 | for (k = i->start, |
| 523 | l = fill->sets[0].data->start; |
| 524 | k < end(i); |
| 525 | k = bkey_next(k), l = bkey_next(l)) |
| 526 | if (bkey_cmp(k, l) || |
| 527 | KEY_SIZE(k) != KEY_SIZE(l)) |
| 528 | pr_err("key %zi differs: %s " |
| 529 | "!= %s", (uint64_t *) k - i->d, |
| 530 | pkey(k), pkey(l)); |
| 531 | |
| 532 | for (j = 0; j < 3; j++) { |
| 533 | pr_err("**** Set %i ****", j); |
| 534 | dump(all[j]); |
| 535 | } |
| 536 | panic("\n"); |
| 537 | } |
| 538 | |
| 539 | pr_info("fuzz complete: %i keys", b->sets[0].data->keys); |
| 540 | } |
| 541 | } |
| 542 | |
| 543 | kobj_attribute_write(fuzz, btree_fuzz); |
| 544 | #endif |
| 545 | |
| 546 | void bch_debug_exit(void) |
| 547 | { |
| 548 | if (!IS_ERR_OR_NULL(debug)) |
| 549 | debugfs_remove_recursive(debug); |
| 550 | } |
| 551 | |
| 552 | int __init bch_debug_init(struct kobject *kobj) |
| 553 | { |
| 554 | int ret = 0; |
| 555 | #ifdef CONFIG_BCACHE_DEBUG |
| 556 | ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); |
| 557 | if (ret) |
| 558 | return ret; |
| 559 | #endif |
| 560 | |
| 561 | debug = debugfs_create_dir("bcache", NULL); |
| 562 | return ret; |
| 563 | } |