Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 1 | /* |
| 2 | * fs/logfs/segment.c - Handling the Object Store |
| 3 | * |
| 4 | * As should be obvious for Linux kernel code, license is GPLv2 |
| 5 | * |
| 6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> |
| 7 | * |
| 8 | * Object store or ostore makes up the complete device with exception of |
| 9 | * the superblock and journal areas. Apart from its own metadata it stores |
| 10 | * three kinds of objects: inodes, dentries and blocks, both data and indirect. |
| 11 | */ |
| 12 | #include "logfs.h" |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 13 | #include <linux/slab.h> |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 14 | |
| 15 | static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) |
| 16 | { |
| 17 | struct logfs_super *super = logfs_super(sb); |
| 18 | struct btree_head32 *head = &super->s_reserved_segments; |
| 19 | int err; |
| 20 | |
| 21 | err = btree_insert32(head, segno, (void *)1, GFP_NOFS); |
| 22 | if (err) |
| 23 | return err; |
| 24 | logfs_super(sb)->s_bad_segments++; |
| 25 | /* FIXME: write to journal */ |
| 26 | return 0; |
| 27 | } |
| 28 | |
Joern Engel | 9421502 | 2010-03-04 21:30:58 +0100 | [diff] [blame] | 29 | int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 30 | { |
| 31 | struct logfs_super *super = logfs_super(sb); |
| 32 | |
| 33 | super->s_gec++; |
| 34 | |
| 35 | return super->s_devops->erase(sb, (u64)segno << super->s_segshift, |
Joern Engel | 9421502 | 2010-03-04 21:30:58 +0100 | [diff] [blame] | 36 | super->s_segsize, ensure_erase); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 37 | } |
| 38 | |
| 39 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) |
| 40 | { |
| 41 | s32 ofs; |
| 42 | |
| 43 | logfs_open_area(area, bytes); |
| 44 | |
| 45 | ofs = area->a_used_bytes; |
| 46 | area->a_used_bytes += bytes; |
| 47 | BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); |
| 48 | |
| 49 | return dev_ofs(area->a_sb, area->a_segno, ofs); |
| 50 | } |
| 51 | |
| 52 | static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, |
| 53 | int use_filler) |
| 54 | { |
| 55 | struct logfs_super *super = logfs_super(sb); |
| 56 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
| 57 | filler_t *filler = super->s_devops->readpage; |
| 58 | struct page *page; |
| 59 | |
| 60 | BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); |
| 61 | if (use_filler) |
| 62 | page = read_cache_page(mapping, index, filler, sb); |
| 63 | else { |
| 64 | page = find_or_create_page(mapping, index, GFP_NOFS); |
| 65 | unlock_page(page); |
| 66 | } |
| 67 | return page; |
| 68 | } |
| 69 | |
Joern Engel | 2050366 | 2010-05-03 20:54:34 +0200 | [diff] [blame] | 70 | int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 71 | int use_filler) |
| 72 | { |
| 73 | pgoff_t index = ofs >> PAGE_SHIFT; |
| 74 | struct page *page; |
| 75 | long offset = ofs & (PAGE_SIZE-1); |
| 76 | long copylen; |
| 77 | |
| 78 | /* Only logfs_wbuf_recover may use len==0 */ |
| 79 | BUG_ON(!len && !use_filler); |
| 80 | do { |
| 81 | copylen = min((ulong)len, PAGE_SIZE - offset); |
| 82 | |
| 83 | page = get_mapping_page(area->a_sb, index, use_filler); |
Joern Engel | 2050366 | 2010-05-03 20:54:34 +0200 | [diff] [blame] | 84 | if (IS_ERR(page)) |
| 85 | return PTR_ERR(page); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 86 | BUG_ON(!page); /* FIXME: reserve a pool */ |
Joern Engel | 2050366 | 2010-05-03 20:54:34 +0200 | [diff] [blame] | 87 | SetPageUptodate(page); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 88 | memcpy(page_address(page) + offset, buf, copylen); |
| 89 | SetPagePrivate(page); |
| 90 | page_cache_release(page); |
| 91 | |
| 92 | buf += copylen; |
| 93 | len -= copylen; |
| 94 | offset = 0; |
| 95 | index++; |
| 96 | } while (len); |
Joern Engel | 2050366 | 2010-05-03 20:54:34 +0200 | [diff] [blame] | 97 | return 0; |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 98 | } |
| 99 | |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 100 | static void pad_partial_page(struct logfs_area *area) |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 101 | { |
| 102 | struct super_block *sb = area->a_sb; |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 103 | struct page *page; |
| 104 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
| 105 | pgoff_t index = ofs >> PAGE_SHIFT; |
| 106 | long offset = ofs & (PAGE_SIZE-1); |
| 107 | u32 len = PAGE_SIZE - offset; |
| 108 | |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 109 | if (len % PAGE_SIZE) { |
| 110 | page = get_mapping_page(sb, index, 0); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 111 | BUG_ON(!page); /* FIXME: reserve a pool */ |
| 112 | memset(page_address(page) + offset, 0xff, len); |
| 113 | SetPagePrivate(page); |
| 114 | page_cache_release(page); |
| 115 | } |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 116 | } |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 117 | |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 118 | static void pad_full_pages(struct logfs_area *area) |
| 119 | { |
| 120 | struct super_block *sb = area->a_sb; |
| 121 | struct logfs_super *super = logfs_super(sb); |
| 122 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
| 123 | u32 len = super->s_segsize - area->a_used_bytes; |
| 124 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; |
| 125 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; |
| 126 | struct page *page; |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 127 | |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 128 | while (no_indizes) { |
| 129 | page = get_mapping_page(sb, index, 0); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 130 | BUG_ON(!page); /* FIXME: reserve a pool */ |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 131 | SetPageUptodate(page); |
| 132 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 133 | SetPagePrivate(page); |
| 134 | page_cache_release(page); |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 135 | index++; |
| 136 | no_indizes--; |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 137 | } |
| 138 | } |
| 139 | |
| 140 | /* |
Joern Engel | 81def6b | 2010-03-28 12:47:09 +0200 | [diff] [blame] | 141 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. |
| 142 | * Also make sure we allocate (and memset) all pages for final writeout. |
| 143 | */ |
| 144 | static void pad_wbuf(struct logfs_area *area, int final) |
| 145 | { |
| 146 | pad_partial_page(area); |
| 147 | if (final) |
| 148 | pad_full_pages(area); |
| 149 | } |
| 150 | |
| 151 | /* |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 152 | * We have to be careful with the alias tree. Since lookup is done by bix, |
| 153 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with |
| 154 | * indirect blocks. So always use it through accessor functions. |
| 155 | */ |
| 156 | static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, |
| 157 | level_t level) |
| 158 | { |
| 159 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; |
| 160 | pgoff_t index = logfs_pack_index(bix, level); |
| 161 | |
| 162 | return btree_lookup128(head, ino, index); |
| 163 | } |
| 164 | |
| 165 | static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, |
| 166 | level_t level, void *val) |
| 167 | { |
| 168 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; |
| 169 | pgoff_t index = logfs_pack_index(bix, level); |
| 170 | |
| 171 | return btree_insert128(head, ino, index, val, GFP_NOFS); |
| 172 | } |
| 173 | |
| 174 | static int btree_write_alias(struct super_block *sb, struct logfs_block *block, |
| 175 | write_alias_t *write_one_alias) |
| 176 | { |
| 177 | struct object_alias_item *item; |
| 178 | int err; |
| 179 | |
| 180 | list_for_each_entry(item, &block->item_list, list) { |
| 181 | err = write_alias_journal(sb, block->ino, block->bix, |
| 182 | block->level, item->child_no, item->val); |
| 183 | if (err) |
| 184 | return err; |
| 185 | } |
| 186 | return 0; |
| 187 | } |
| 188 | |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 189 | static struct logfs_block_ops btree_block_ops = { |
| 190 | .write_block = btree_write_block, |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 191 | .free_block = __free_block, |
| 192 | .write_alias = btree_write_alias, |
| 193 | }; |
| 194 | |
| 195 | int logfs_load_object_aliases(struct super_block *sb, |
| 196 | struct logfs_obj_alias *oa, int count) |
| 197 | { |
| 198 | struct logfs_super *super = logfs_super(sb); |
| 199 | struct logfs_block *block; |
| 200 | struct object_alias_item *item; |
| 201 | u64 ino, bix; |
| 202 | level_t level; |
| 203 | int i, err; |
| 204 | |
| 205 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; |
| 206 | count /= sizeof(*oa); |
| 207 | for (i = 0; i < count; i++) { |
| 208 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); |
| 209 | if (!item) |
| 210 | return -ENOMEM; |
| 211 | memset(item, 0, sizeof(*item)); |
| 212 | |
| 213 | super->s_no_object_aliases++; |
| 214 | item->val = oa[i].val; |
| 215 | item->child_no = be16_to_cpu(oa[i].child_no); |
| 216 | |
| 217 | ino = be64_to_cpu(oa[i].ino); |
| 218 | bix = be64_to_cpu(oa[i].bix); |
| 219 | level = LEVEL(oa[i].level); |
| 220 | |
| 221 | log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", |
| 222 | ino, bix, level, item->child_no, |
| 223 | be64_to_cpu(item->val)); |
| 224 | block = alias_tree_lookup(sb, ino, bix, level); |
| 225 | if (!block) { |
| 226 | block = __alloc_block(sb, ino, bix, level); |
| 227 | block->ops = &btree_block_ops; |
| 228 | err = alias_tree_insert(sb, ino, bix, level, block); |
| 229 | BUG_ON(err); /* mempool empty */ |
| 230 | } |
| 231 | if (test_and_set_bit(item->child_no, block->alias_map)) { |
| 232 | printk(KERN_ERR"LogFS: Alias collision detected\n"); |
| 233 | return -EIO; |
| 234 | } |
| 235 | list_move_tail(&block->alias_list, &super->s_object_alias); |
| 236 | list_add(&item->list, &block->item_list); |
| 237 | } |
| 238 | return 0; |
| 239 | } |
| 240 | |
| 241 | static void kill_alias(void *_block, unsigned long ignore0, |
| 242 | u64 ignore1, u64 ignore2, size_t ignore3) |
| 243 | { |
| 244 | struct logfs_block *block = _block; |
| 245 | struct super_block *sb = block->sb; |
| 246 | struct logfs_super *super = logfs_super(sb); |
| 247 | struct object_alias_item *item; |
| 248 | |
| 249 | while (!list_empty(&block->item_list)) { |
| 250 | item = list_entry(block->item_list.next, typeof(*item), list); |
| 251 | list_del(&item->list); |
| 252 | mempool_free(item, super->s_alias_pool); |
| 253 | } |
| 254 | block->ops->free_block(sb, block); |
| 255 | } |
| 256 | |
| 257 | static int obj_type(struct inode *inode, level_t level) |
| 258 | { |
| 259 | if (level == 0) { |
| 260 | if (S_ISDIR(inode->i_mode)) |
| 261 | return OBJ_DENTRY; |
| 262 | if (inode->i_ino == LOGFS_INO_MASTER) |
| 263 | return OBJ_INODE; |
| 264 | } |
| 265 | return OBJ_BLOCK; |
| 266 | } |
| 267 | |
| 268 | static int obj_len(struct super_block *sb, int obj_type) |
| 269 | { |
| 270 | switch (obj_type) { |
| 271 | case OBJ_DENTRY: |
| 272 | return sizeof(struct logfs_disk_dentry); |
| 273 | case OBJ_INODE: |
| 274 | return sizeof(struct logfs_disk_inode); |
| 275 | case OBJ_BLOCK: |
| 276 | return sb->s_blocksize; |
| 277 | default: |
| 278 | BUG(); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | static int __logfs_segment_write(struct inode *inode, void *buf, |
| 283 | struct logfs_shadow *shadow, int type, int len, int compr) |
| 284 | { |
| 285 | struct logfs_area *area; |
| 286 | struct super_block *sb = inode->i_sb; |
| 287 | s64 ofs; |
| 288 | struct logfs_object_header h; |
| 289 | int acc_len; |
| 290 | |
| 291 | if (shadow->gc_level == 0) |
| 292 | acc_len = len; |
| 293 | else |
| 294 | acc_len = obj_len(sb, type); |
| 295 | |
| 296 | area = get_area(sb, shadow->gc_level); |
| 297 | ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); |
| 298 | LOGFS_BUG_ON(ofs <= 0, sb); |
| 299 | /* |
| 300 | * Order is important. logfs_get_free_bytes(), by modifying the |
| 301 | * segment file, may modify the content of the very page we're about |
| 302 | * to write now. Which is fine, as long as the calculated crc and |
| 303 | * written data still match. So do the modifications _before_ |
| 304 | * calculating the crc. |
| 305 | */ |
| 306 | |
| 307 | h.len = cpu_to_be16(len); |
| 308 | h.type = type; |
| 309 | h.compr = compr; |
| 310 | h.ino = cpu_to_be64(inode->i_ino); |
| 311 | h.bix = cpu_to_be64(shadow->bix); |
| 312 | h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); |
| 313 | h.data_crc = logfs_crc32(buf, len, 0); |
| 314 | |
| 315 | logfs_buf_write(area, ofs, &h, sizeof(h)); |
| 316 | logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); |
| 317 | |
| 318 | shadow->new_ofs = ofs; |
| 319 | shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; |
| 320 | |
| 321 | return 0; |
| 322 | } |
| 323 | |
| 324 | static s64 logfs_segment_write_compress(struct inode *inode, void *buf, |
| 325 | struct logfs_shadow *shadow, int type, int len) |
| 326 | { |
| 327 | struct super_block *sb = inode->i_sb; |
| 328 | void *compressor_buf = logfs_super(sb)->s_compressed_je; |
| 329 | ssize_t compr_len; |
| 330 | int ret; |
| 331 | |
| 332 | mutex_lock(&logfs_super(sb)->s_journal_mutex); |
| 333 | compr_len = logfs_compress(buf, compressor_buf, len, len); |
| 334 | |
| 335 | if (compr_len >= 0) { |
| 336 | ret = __logfs_segment_write(inode, compressor_buf, shadow, |
| 337 | type, compr_len, COMPR_ZLIB); |
| 338 | } else { |
| 339 | ret = __logfs_segment_write(inode, buf, shadow, type, len, |
| 340 | COMPR_NONE); |
| 341 | } |
| 342 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
| 343 | return ret; |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * logfs_segment_write - write data block to object store |
| 348 | * @inode: inode containing data |
| 349 | * |
| 350 | * Returns an errno or zero. |
| 351 | */ |
| 352 | int logfs_segment_write(struct inode *inode, struct page *page, |
| 353 | struct logfs_shadow *shadow) |
| 354 | { |
| 355 | struct super_block *sb = inode->i_sb; |
| 356 | struct logfs_super *super = logfs_super(sb); |
| 357 | int do_compress, type, len; |
| 358 | int ret; |
| 359 | void *buf; |
| 360 | |
Joern Engel | c6d38301 | 2010-03-04 21:36:19 +0100 | [diff] [blame] | 361 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
| 362 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 363 | do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; |
| 364 | if (shadow->gc_level != 0) { |
| 365 | /* temporarily disable compression for indirect blocks */ |
| 366 | do_compress = 0; |
| 367 | } |
| 368 | |
| 369 | type = obj_type(inode, shrink_level(shadow->gc_level)); |
| 370 | len = obj_len(sb, type); |
| 371 | buf = kmap(page); |
| 372 | if (do_compress) |
| 373 | ret = logfs_segment_write_compress(inode, buf, shadow, type, |
| 374 | len); |
| 375 | else |
| 376 | ret = __logfs_segment_write(inode, buf, shadow, type, len, |
| 377 | COMPR_NONE); |
| 378 | kunmap(page); |
| 379 | |
| 380 | log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", |
| 381 | shadow->ino, shadow->bix, shadow->gc_level, |
| 382 | shadow->old_ofs, shadow->new_ofs, |
| 383 | shadow->old_len, shadow->new_len); |
| 384 | /* this BUG_ON did catch a locking bug. useful */ |
| 385 | BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); |
| 386 | return ret; |
| 387 | } |
| 388 | |
| 389 | int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) |
| 390 | { |
| 391 | pgoff_t index = ofs >> PAGE_SHIFT; |
| 392 | struct page *page; |
| 393 | long offset = ofs & (PAGE_SIZE-1); |
| 394 | long copylen; |
| 395 | |
| 396 | while (len) { |
| 397 | copylen = min((ulong)len, PAGE_SIZE - offset); |
| 398 | |
| 399 | page = get_mapping_page(sb, index, 1); |
| 400 | if (IS_ERR(page)) |
| 401 | return PTR_ERR(page); |
| 402 | memcpy(buf, page_address(page) + offset, copylen); |
| 403 | page_cache_release(page); |
| 404 | |
| 405 | buf += copylen; |
| 406 | len -= copylen; |
| 407 | offset = 0; |
| 408 | index++; |
| 409 | } |
| 410 | return 0; |
| 411 | } |
| 412 | |
| 413 | /* |
| 414 | * The "position" of indirect blocks is ambiguous. It can be the position |
| 415 | * of any data block somewhere behind this indirect block. So we need to |
| 416 | * normalize the positions through logfs_block_mask() before comparing. |
| 417 | */ |
| 418 | static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) |
| 419 | { |
| 420 | return (pos1 & logfs_block_mask(sb, level)) != |
| 421 | (pos2 & logfs_block_mask(sb, level)); |
| 422 | } |
| 423 | |
| 424 | #if 0 |
| 425 | static int read_seg_header(struct super_block *sb, u64 ofs, |
| 426 | struct logfs_segment_header *sh) |
| 427 | { |
| 428 | __be32 crc; |
| 429 | int err; |
| 430 | |
| 431 | err = wbuf_read(sb, ofs, sizeof(*sh), sh); |
| 432 | if (err) |
| 433 | return err; |
| 434 | crc = logfs_crc32(sh, sizeof(*sh), 4); |
| 435 | if (crc != sh->crc) { |
| 436 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " |
| 437 | "got %x\n", ofs, be32_to_cpu(sh->crc), |
| 438 | be32_to_cpu(crc)); |
| 439 | return -EIO; |
| 440 | } |
| 441 | return 0; |
| 442 | } |
| 443 | #endif |
| 444 | |
| 445 | static int read_obj_header(struct super_block *sb, u64 ofs, |
| 446 | struct logfs_object_header *oh) |
| 447 | { |
| 448 | __be32 crc; |
| 449 | int err; |
| 450 | |
| 451 | err = wbuf_read(sb, ofs, sizeof(*oh), oh); |
| 452 | if (err) |
| 453 | return err; |
| 454 | crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); |
| 455 | if (crc != oh->crc) { |
| 456 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " |
| 457 | "got %x\n", ofs, be32_to_cpu(oh->crc), |
| 458 | be32_to_cpu(crc)); |
| 459 | return -EIO; |
| 460 | } |
| 461 | return 0; |
| 462 | } |
| 463 | |
| 464 | static void move_btree_to_page(struct inode *inode, struct page *page, |
| 465 | __be64 *data) |
| 466 | { |
| 467 | struct super_block *sb = inode->i_sb; |
| 468 | struct logfs_super *super = logfs_super(sb); |
| 469 | struct btree_head128 *head = &super->s_object_alias_tree; |
| 470 | struct logfs_block *block; |
| 471 | struct object_alias_item *item, *next; |
| 472 | |
| 473 | if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) |
| 474 | return; |
| 475 | |
| 476 | block = btree_remove128(head, inode->i_ino, page->index); |
| 477 | if (!block) |
| 478 | return; |
| 479 | |
| 480 | log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", |
| 481 | block->ino, block->bix, block->level); |
| 482 | list_for_each_entry_safe(item, next, &block->item_list, list) { |
| 483 | data[item->child_no] = item->val; |
| 484 | list_del(&item->list); |
| 485 | mempool_free(item, super->s_alias_pool); |
| 486 | } |
| 487 | block->page = page; |
| 488 | SetPagePrivate(page); |
| 489 | page->private = (unsigned long)block; |
| 490 | block->ops = &indirect_block_ops; |
| 491 | initialize_block_counters(page, block, data, 0); |
| 492 | } |
| 493 | |
| 494 | /* |
| 495 | * This silences a false, yet annoying gcc warning. I hate it when my editor |
| 496 | * jumps into bitops.h each time I recompile this file. |
| 497 | * TODO: Complain to gcc folks about this and upgrade compiler. |
| 498 | */ |
| 499 | static unsigned long fnb(const unsigned long *addr, |
| 500 | unsigned long size, unsigned long offset) |
| 501 | { |
| 502 | return find_next_bit(addr, size, offset); |
| 503 | } |
| 504 | |
| 505 | void move_page_to_btree(struct page *page) |
| 506 | { |
| 507 | struct logfs_block *block = logfs_block(page); |
| 508 | struct super_block *sb = block->sb; |
| 509 | struct logfs_super *super = logfs_super(sb); |
| 510 | struct object_alias_item *item; |
| 511 | unsigned long pos; |
| 512 | __be64 *child; |
| 513 | int err; |
| 514 | |
| 515 | if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { |
| 516 | block->ops->free_block(sb, block); |
| 517 | return; |
| 518 | } |
| 519 | log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", |
| 520 | block->ino, block->bix, block->level); |
| 521 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; |
| 522 | |
| 523 | for (pos = 0; ; pos++) { |
| 524 | pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); |
| 525 | if (pos >= LOGFS_BLOCK_FACTOR) |
| 526 | break; |
| 527 | |
| 528 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); |
| 529 | BUG_ON(!item); /* mempool empty */ |
| 530 | memset(item, 0, sizeof(*item)); |
| 531 | |
| 532 | child = kmap_atomic(page, KM_USER0); |
| 533 | item->val = child[pos]; |
| 534 | kunmap_atomic(child, KM_USER0); |
| 535 | item->child_no = pos; |
| 536 | list_add(&item->list, &block->item_list); |
| 537 | } |
| 538 | block->page = NULL; |
| 539 | ClearPagePrivate(page); |
| 540 | page->private = 0; |
| 541 | block->ops = &btree_block_ops; |
| 542 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, |
| 543 | block); |
| 544 | BUG_ON(err); /* mempool empty */ |
| 545 | ClearPageUptodate(page); |
| 546 | } |
| 547 | |
| 548 | static int __logfs_segment_read(struct inode *inode, void *buf, |
| 549 | u64 ofs, u64 bix, level_t level) |
| 550 | { |
| 551 | struct super_block *sb = inode->i_sb; |
| 552 | void *compressor_buf = logfs_super(sb)->s_compressed_je; |
| 553 | struct logfs_object_header oh; |
| 554 | __be32 crc; |
| 555 | u16 len; |
| 556 | int err, block_len; |
| 557 | |
| 558 | block_len = obj_len(sb, obj_type(inode, level)); |
| 559 | err = read_obj_header(sb, ofs, &oh); |
| 560 | if (err) |
| 561 | goto out_err; |
| 562 | |
| 563 | err = -EIO; |
| 564 | if (be64_to_cpu(oh.ino) != inode->i_ino |
| 565 | || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { |
| 566 | printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " |
| 567 | "expected (%lx, %llx), got (%llx, %llx)\n", |
| 568 | ofs, inode->i_ino, bix, |
| 569 | be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); |
| 570 | goto out_err; |
| 571 | } |
| 572 | |
| 573 | len = be16_to_cpu(oh.len); |
| 574 | |
| 575 | switch (oh.compr) { |
| 576 | case COMPR_NONE: |
| 577 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); |
| 578 | if (err) |
| 579 | goto out_err; |
| 580 | crc = logfs_crc32(buf, len, 0); |
| 581 | if (crc != oh.data_crc) { |
| 582 | printk(KERN_ERR"LOGFS: uncompressed data crc error at " |
| 583 | "%llx: expected %x, got %x\n", ofs, |
| 584 | be32_to_cpu(oh.data_crc), |
| 585 | be32_to_cpu(crc)); |
| 586 | goto out_err; |
| 587 | } |
| 588 | break; |
| 589 | case COMPR_ZLIB: |
| 590 | mutex_lock(&logfs_super(sb)->s_journal_mutex); |
| 591 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, |
| 592 | compressor_buf); |
| 593 | if (err) { |
| 594 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
| 595 | goto out_err; |
| 596 | } |
| 597 | crc = logfs_crc32(compressor_buf, len, 0); |
| 598 | if (crc != oh.data_crc) { |
| 599 | printk(KERN_ERR"LOGFS: compressed data crc error at " |
| 600 | "%llx: expected %x, got %x\n", ofs, |
| 601 | be32_to_cpu(oh.data_crc), |
| 602 | be32_to_cpu(crc)); |
| 603 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
| 604 | goto out_err; |
| 605 | } |
| 606 | err = logfs_uncompress(compressor_buf, buf, len, block_len); |
| 607 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); |
| 608 | if (err) { |
| 609 | printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); |
| 610 | goto out_err; |
| 611 | } |
| 612 | break; |
| 613 | default: |
| 614 | LOGFS_BUG(sb); |
| 615 | err = -EIO; |
| 616 | goto out_err; |
| 617 | } |
| 618 | return 0; |
| 619 | |
| 620 | out_err: |
| 621 | logfs_set_ro(sb); |
| 622 | printk(KERN_ERR"LOGFS: device is read-only now\n"); |
| 623 | LOGFS_BUG(sb); |
| 624 | return err; |
| 625 | } |
| 626 | |
| 627 | /** |
| 628 | * logfs_segment_read - read data block from object store |
| 629 | * @inode: inode containing data |
| 630 | * @buf: data buffer |
| 631 | * @ofs: physical data offset |
| 632 | * @bix: block index |
| 633 | * @level: block level |
| 634 | * |
| 635 | * Returns 0 on success or a negative errno. |
| 636 | */ |
| 637 | int logfs_segment_read(struct inode *inode, struct page *page, |
| 638 | u64 ofs, u64 bix, level_t level) |
| 639 | { |
| 640 | int err; |
| 641 | void *buf; |
| 642 | |
| 643 | if (PageUptodate(page)) |
| 644 | return 0; |
| 645 | |
| 646 | ofs &= ~LOGFS_FULLY_POPULATED; |
| 647 | |
| 648 | buf = kmap(page); |
| 649 | err = __logfs_segment_read(inode, buf, ofs, bix, level); |
| 650 | if (!err) { |
| 651 | move_btree_to_page(inode, page, buf); |
| 652 | SetPageUptodate(page); |
| 653 | } |
| 654 | kunmap(page); |
| 655 | log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", |
| 656 | inode->i_ino, bix, level, ofs, err); |
| 657 | return err; |
| 658 | } |
| 659 | |
| 660 | int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) |
| 661 | { |
| 662 | struct super_block *sb = inode->i_sb; |
Joern Engel | c6d38301 | 2010-03-04 21:36:19 +0100 | [diff] [blame] | 663 | struct logfs_super *super = logfs_super(sb); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 664 | struct logfs_object_header h; |
| 665 | u16 len; |
| 666 | int err; |
| 667 | |
Joern Engel | c6d38301 | 2010-03-04 21:36:19 +0100 | [diff] [blame] | 668 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
| 669 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 670 | BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); |
| 671 | if (!shadow->old_ofs) |
| 672 | return 0; |
| 673 | |
| 674 | log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", |
| 675 | shadow->ino, shadow->bix, shadow->gc_level, |
| 676 | shadow->old_ofs, shadow->new_ofs, |
| 677 | shadow->old_len, shadow->new_len); |
| 678 | err = read_obj_header(sb, shadow->old_ofs, &h); |
| 679 | LOGFS_BUG_ON(err, sb); |
| 680 | LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); |
| 681 | LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), |
| 682 | shrink_level(shadow->gc_level)), sb); |
| 683 | |
| 684 | if (shadow->gc_level == 0) |
| 685 | len = be16_to_cpu(h.len); |
| 686 | else |
| 687 | len = obj_len(sb, h.type); |
| 688 | shadow->old_len = len + sizeof(h); |
| 689 | return 0; |
| 690 | } |
| 691 | |
Joern Engel | 723b2ff | 2010-03-28 18:10:07 +0200 | [diff] [blame] | 692 | void freeseg(struct super_block *sb, u32 segno) |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 693 | { |
| 694 | struct logfs_super *super = logfs_super(sb); |
| 695 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
| 696 | struct page *page; |
| 697 | u64 ofs, start, end; |
| 698 | |
| 699 | start = dev_ofs(sb, segno, 0); |
| 700 | end = dev_ofs(sb, segno + 1, 0); |
| 701 | for (ofs = start; ofs < end; ofs += PAGE_SIZE) { |
| 702 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); |
| 703 | if (!page) |
| 704 | continue; |
| 705 | ClearPagePrivate(page); |
| 706 | page_cache_release(page); |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | int logfs_open_area(struct logfs_area *area, size_t bytes) |
| 711 | { |
| 712 | struct super_block *sb = area->a_sb; |
| 713 | struct logfs_super *super = logfs_super(sb); |
| 714 | int err, closed = 0; |
| 715 | |
| 716 | if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) |
| 717 | return 0; |
| 718 | |
| 719 | if (area->a_is_open) { |
| 720 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); |
| 721 | u32 len = super->s_segsize - area->a_written_bytes; |
| 722 | |
| 723 | log_gc("logfs_close_area(%x)\n", area->a_segno); |
| 724 | pad_wbuf(area, 1); |
| 725 | super->s_devops->writeseg(area->a_sb, ofs, len); |
| 726 | freeseg(sb, area->a_segno); |
| 727 | closed = 1; |
| 728 | } |
| 729 | |
| 730 | area->a_used_bytes = 0; |
| 731 | area->a_written_bytes = 0; |
| 732 | again: |
| 733 | area->a_ops->get_free_segment(area); |
| 734 | area->a_ops->get_erase_count(area); |
| 735 | |
| 736 | log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); |
| 737 | err = area->a_ops->erase_segment(area); |
| 738 | if (err) { |
| 739 | printk(KERN_WARNING "LogFS: Error erasing segment %x\n", |
| 740 | area->a_segno); |
| 741 | logfs_mark_segment_bad(sb, area->a_segno); |
| 742 | goto again; |
| 743 | } |
| 744 | area->a_is_open = 1; |
| 745 | return closed; |
| 746 | } |
| 747 | |
| 748 | void logfs_sync_area(struct logfs_area *area) |
| 749 | { |
| 750 | struct super_block *sb = area->a_sb; |
| 751 | struct logfs_super *super = logfs_super(sb); |
| 752 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); |
| 753 | u32 len = (area->a_used_bytes - area->a_written_bytes); |
| 754 | |
| 755 | if (super->s_writesize) |
| 756 | len &= ~(super->s_writesize - 1); |
| 757 | if (len == 0) |
| 758 | return; |
| 759 | pad_wbuf(area, 0); |
| 760 | super->s_devops->writeseg(sb, ofs, len); |
| 761 | area->a_written_bytes += len; |
| 762 | } |
| 763 | |
| 764 | void logfs_sync_segments(struct super_block *sb) |
| 765 | { |
| 766 | struct logfs_super *super = logfs_super(sb); |
| 767 | int i; |
| 768 | |
| 769 | for_each_area(i) |
| 770 | logfs_sync_area(super->s_area[i]); |
| 771 | } |
| 772 | |
| 773 | /* |
| 774 | * Pick a free segment to be used for this area. Effectively takes a |
| 775 | * candidate from the free list (not really a candidate anymore). |
| 776 | */ |
| 777 | static void ostore_get_free_segment(struct logfs_area *area) |
| 778 | { |
| 779 | struct super_block *sb = area->a_sb; |
| 780 | struct logfs_super *super = logfs_super(sb); |
| 781 | |
| 782 | if (super->s_free_list.count == 0) { |
| 783 | printk(KERN_ERR"LOGFS: ran out of free segments\n"); |
| 784 | LOGFS_BUG(sb); |
| 785 | } |
| 786 | |
| 787 | area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); |
| 788 | } |
| 789 | |
| 790 | static void ostore_get_erase_count(struct logfs_area *area) |
| 791 | { |
| 792 | struct logfs_segment_entry se; |
| 793 | u32 ec_level; |
| 794 | |
| 795 | logfs_get_segment_entry(area->a_sb, area->a_segno, &se); |
| 796 | BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || |
| 797 | se.valid == cpu_to_be32(RESERVED)); |
| 798 | |
| 799 | ec_level = be32_to_cpu(se.ec_level); |
| 800 | area->a_erase_count = (ec_level >> 4) + 1; |
| 801 | } |
| 802 | |
| 803 | static int ostore_erase_segment(struct logfs_area *area) |
| 804 | { |
| 805 | struct super_block *sb = area->a_sb; |
| 806 | struct logfs_segment_header sh; |
| 807 | u64 ofs; |
| 808 | int err; |
| 809 | |
Joern Engel | 9421502 | 2010-03-04 21:30:58 +0100 | [diff] [blame] | 810 | err = logfs_erase_segment(sb, area->a_segno, 0); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 811 | if (err) |
| 812 | return err; |
| 813 | |
| 814 | sh.pad = 0; |
| 815 | sh.type = SEG_OSTORE; |
| 816 | sh.level = (__force u8)area->a_level; |
| 817 | sh.segno = cpu_to_be32(area->a_segno); |
| 818 | sh.ec = cpu_to_be32(area->a_erase_count); |
| 819 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); |
| 820 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); |
| 821 | |
| 822 | logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, |
| 823 | area->a_level); |
| 824 | |
| 825 | ofs = dev_ofs(sb, area->a_segno, 0); |
| 826 | area->a_used_bytes = sizeof(sh); |
| 827 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); |
| 828 | return 0; |
| 829 | } |
| 830 | |
| 831 | static const struct logfs_area_ops ostore_area_ops = { |
| 832 | .get_free_segment = ostore_get_free_segment, |
| 833 | .get_erase_count = ostore_get_erase_count, |
| 834 | .erase_segment = ostore_erase_segment, |
| 835 | }; |
| 836 | |
| 837 | static void free_area(struct logfs_area *area) |
| 838 | { |
| 839 | if (area) |
| 840 | freeseg(area->a_sb, area->a_segno); |
| 841 | kfree(area); |
| 842 | } |
| 843 | |
| 844 | static struct logfs_area *alloc_area(struct super_block *sb) |
| 845 | { |
| 846 | struct logfs_area *area; |
| 847 | |
| 848 | area = kzalloc(sizeof(*area), GFP_KERNEL); |
| 849 | if (!area) |
| 850 | return NULL; |
| 851 | |
| 852 | area->a_sb = sb; |
| 853 | return area; |
| 854 | } |
| 855 | |
| 856 | static void map_invalidatepage(struct page *page, unsigned long l) |
| 857 | { |
| 858 | BUG(); |
| 859 | } |
| 860 | |
| 861 | static int map_releasepage(struct page *page, gfp_t g) |
| 862 | { |
| 863 | /* Don't release these pages */ |
| 864 | return 0; |
| 865 | } |
| 866 | |
| 867 | static const struct address_space_operations mapping_aops = { |
| 868 | .invalidatepage = map_invalidatepage, |
| 869 | .releasepage = map_releasepage, |
| 870 | .set_page_dirty = __set_page_dirty_nobuffers, |
| 871 | }; |
| 872 | |
| 873 | int logfs_init_mapping(struct super_block *sb) |
| 874 | { |
| 875 | struct logfs_super *super = logfs_super(sb); |
| 876 | struct address_space *mapping; |
| 877 | struct inode *inode; |
| 878 | |
| 879 | inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); |
| 880 | if (IS_ERR(inode)) |
| 881 | return PTR_ERR(inode); |
| 882 | super->s_mapping_inode = inode; |
| 883 | mapping = inode->i_mapping; |
| 884 | mapping->a_ops = &mapping_aops; |
| 885 | /* Would it be possible to use __GFP_HIGHMEM as well? */ |
| 886 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
| 887 | return 0; |
| 888 | } |
| 889 | |
| 890 | int logfs_init_areas(struct super_block *sb) |
| 891 | { |
| 892 | struct logfs_super *super = logfs_super(sb); |
| 893 | int i = -1; |
| 894 | |
| 895 | super->s_alias_pool = mempool_create_kmalloc_pool(600, |
| 896 | sizeof(struct object_alias_item)); |
| 897 | if (!super->s_alias_pool) |
| 898 | return -ENOMEM; |
| 899 | |
| 900 | super->s_journal_area = alloc_area(sb); |
| 901 | if (!super->s_journal_area) |
| 902 | goto err; |
| 903 | |
| 904 | for_each_area(i) { |
| 905 | super->s_area[i] = alloc_area(sb); |
| 906 | if (!super->s_area[i]) |
| 907 | goto err; |
| 908 | super->s_area[i]->a_level = GC_LEVEL(i); |
| 909 | super->s_area[i]->a_ops = &ostore_area_ops; |
| 910 | } |
| 911 | btree_init_mempool128(&super->s_object_alias_tree, |
| 912 | super->s_btree_pool); |
| 913 | return 0; |
| 914 | |
| 915 | err: |
| 916 | for (i--; i >= 0; i--) |
| 917 | free_area(super->s_area[i]); |
| 918 | free_area(super->s_journal_area); |
Joern Engel | 1f1b000 | 2010-04-15 08:03:57 +0200 | [diff] [blame] | 919 | logfs_mempool_destroy(super->s_alias_pool); |
Joern Engel | 5db53f3 | 2009-11-20 20:13:39 +0100 | [diff] [blame] | 920 | return -ENOMEM; |
| 921 | } |
| 922 | |
| 923 | void logfs_cleanup_areas(struct super_block *sb) |
| 924 | { |
| 925 | struct logfs_super *super = logfs_super(sb); |
| 926 | int i; |
| 927 | |
| 928 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); |
| 929 | for_each_area(i) |
| 930 | free_area(super->s_area[i]); |
| 931 | free_area(super->s_journal_area); |
| 932 | destroy_meta_inode(super->s_mapping_inode); |
| 933 | } |