Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | /* |
| 3 | * linux/fs/sysv/itree.c |
| 4 | * |
| 5 | * Handling of indirect blocks' trees. |
| 6 | * AV, Sep--Dec 2000 |
| 7 | */ |
| 8 | |
| 9 | #include <linux/buffer_head.h> |
| 10 | #include <linux/mount.h> |
| 11 | #include <linux/string.h> |
| 12 | #include "sysv.h" |
| 13 | |
| 14 | enum {DIRECT = 10, DEPTH = 4}; /* Have triple indirect */ |
| 15 | |
| 16 | static inline void dirty_indirect(struct buffer_head *bh, struct inode *inode) |
| 17 | { |
| 18 | mark_buffer_dirty_inode(bh, inode); |
| 19 | if (IS_SYNC(inode)) |
| 20 | sync_dirty_buffer(bh); |
| 21 | } |
| 22 | |
| 23 | static int block_to_path(struct inode *inode, long block, int offsets[DEPTH]) |
| 24 | { |
| 25 | struct super_block *sb = inode->i_sb; |
| 26 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
| 27 | int ptrs_bits = sbi->s_ind_per_block_bits; |
| 28 | unsigned long indirect_blocks = sbi->s_ind_per_block, |
| 29 | double_blocks = sbi->s_ind_per_block_2; |
| 30 | int n = 0; |
| 31 | |
| 32 | if (block < 0) { |
| 33 | printk("sysv_block_map: block < 0\n"); |
| 34 | } else if (block < DIRECT) { |
| 35 | offsets[n++] = block; |
| 36 | } else if ( (block -= DIRECT) < indirect_blocks) { |
| 37 | offsets[n++] = DIRECT; |
| 38 | offsets[n++] = block; |
| 39 | } else if ((block -= indirect_blocks) < double_blocks) { |
| 40 | offsets[n++] = DIRECT+1; |
| 41 | offsets[n++] = block >> ptrs_bits; |
| 42 | offsets[n++] = block & (indirect_blocks - 1); |
| 43 | } else if (((block -= double_blocks) >> (ptrs_bits * 2)) < indirect_blocks) { |
| 44 | offsets[n++] = DIRECT+2; |
| 45 | offsets[n++] = block >> (ptrs_bits * 2); |
| 46 | offsets[n++] = (block >> ptrs_bits) & (indirect_blocks - 1); |
| 47 | offsets[n++] = block & (indirect_blocks - 1); |
| 48 | } else { |
| 49 | /* nothing */; |
| 50 | } |
| 51 | return n; |
| 52 | } |
| 53 | |
| 54 | static inline int block_to_cpu(struct sysv_sb_info *sbi, sysv_zone_t nr) |
| 55 | { |
| 56 | return sbi->s_block_base + fs32_to_cpu(sbi, nr); |
| 57 | } |
| 58 | |
| 59 | typedef struct { |
| 60 | sysv_zone_t *p; |
| 61 | sysv_zone_t key; |
| 62 | struct buffer_head *bh; |
| 63 | } Indirect; |
| 64 | |
| 65 | static DEFINE_RWLOCK(pointers_lock); |
| 66 | |
| 67 | static inline void add_chain(Indirect *p, struct buffer_head *bh, sysv_zone_t *v) |
| 68 | { |
| 69 | p->key = *(p->p = v); |
| 70 | p->bh = bh; |
| 71 | } |
| 72 | |
| 73 | static inline int verify_chain(Indirect *from, Indirect *to) |
| 74 | { |
| 75 | while (from <= to && from->key == *from->p) |
| 76 | from++; |
| 77 | return (from > to); |
| 78 | } |
| 79 | |
| 80 | static inline sysv_zone_t *block_end(struct buffer_head *bh) |
| 81 | { |
| 82 | return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); |
| 83 | } |
| 84 | |
| 85 | /* |
| 86 | * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) |
| 87 | */ |
| 88 | static Indirect *get_branch(struct inode *inode, |
| 89 | int depth, |
| 90 | int offsets[], |
| 91 | Indirect chain[], |
| 92 | int *err) |
| 93 | { |
| 94 | struct super_block *sb = inode->i_sb; |
| 95 | Indirect *p = chain; |
| 96 | struct buffer_head *bh; |
| 97 | |
| 98 | *err = 0; |
| 99 | add_chain(chain, NULL, SYSV_I(inode)->i_data + *offsets); |
| 100 | if (!p->key) |
| 101 | goto no_block; |
| 102 | while (--depth) { |
| 103 | int block = block_to_cpu(SYSV_SB(sb), p->key); |
| 104 | bh = sb_bread(sb, block); |
| 105 | if (!bh) |
| 106 | goto failure; |
| 107 | if (!verify_chain(chain, p)) |
| 108 | goto changed; |
| 109 | add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); |
| 110 | if (!p->key) |
| 111 | goto no_block; |
| 112 | } |
| 113 | return NULL; |
| 114 | |
| 115 | changed: |
| 116 | brelse(bh); |
| 117 | *err = -EAGAIN; |
| 118 | goto no_block; |
| 119 | failure: |
| 120 | *err = -EIO; |
| 121 | no_block: |
| 122 | return p; |
| 123 | } |
| 124 | |
| 125 | static int alloc_branch(struct inode *inode, |
| 126 | int num, |
| 127 | int *offsets, |
| 128 | Indirect *branch) |
| 129 | { |
| 130 | int blocksize = inode->i_sb->s_blocksize; |
| 131 | int n = 0; |
| 132 | int i; |
| 133 | |
| 134 | branch[0].key = sysv_new_block(inode->i_sb); |
| 135 | if (branch[0].key) for (n = 1; n < num; n++) { |
| 136 | struct buffer_head *bh; |
| 137 | int parent; |
| 138 | /* Allocate the next block */ |
| 139 | branch[n].key = sysv_new_block(inode->i_sb); |
| 140 | if (!branch[n].key) |
| 141 | break; |
| 142 | /* |
| 143 | * Get buffer_head for parent block, zero it out and set |
| 144 | * the pointer to new one, then send parent to disk. |
| 145 | */ |
| 146 | parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); |
| 147 | bh = sb_getblk(inode->i_sb, parent); |
| 148 | lock_buffer(bh); |
| 149 | memset(bh->b_data, 0, blocksize); |
| 150 | branch[n].bh = bh; |
| 151 | branch[n].p = (sysv_zone_t*) bh->b_data + offsets[n]; |
| 152 | *branch[n].p = branch[n].key; |
| 153 | set_buffer_uptodate(bh); |
| 154 | unlock_buffer(bh); |
| 155 | dirty_indirect(bh, inode); |
| 156 | } |
| 157 | if (n == num) |
| 158 | return 0; |
| 159 | |
| 160 | /* Allocation failed, free what we already allocated */ |
| 161 | for (i = 1; i < n; i++) |
| 162 | bforget(branch[i].bh); |
| 163 | for (i = 0; i < n; i++) |
| 164 | sysv_free_block(inode->i_sb, branch[i].key); |
| 165 | return -ENOSPC; |
| 166 | } |
| 167 | |
| 168 | static inline int splice_branch(struct inode *inode, |
| 169 | Indirect chain[], |
| 170 | Indirect *where, |
| 171 | int num) |
| 172 | { |
| 173 | int i; |
| 174 | |
| 175 | /* Verify that place we are splicing to is still there and vacant */ |
| 176 | write_lock(&pointers_lock); |
| 177 | if (!verify_chain(chain, where-1) || *where->p) |
| 178 | goto changed; |
| 179 | *where->p = where->key; |
| 180 | write_unlock(&pointers_lock); |
| 181 | |
Deepa Dinamani | 02027d4 | 2016-09-14 07:48:05 -0700 | [diff] [blame] | 182 | inode->i_ctime = current_time(inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | |
| 184 | /* had we spliced it onto indirect block? */ |
| 185 | if (where->bh) |
| 186 | dirty_indirect(where->bh, inode); |
| 187 | |
| 188 | if (IS_SYNC(inode)) |
| 189 | sysv_sync_inode(inode); |
| 190 | else |
| 191 | mark_inode_dirty(inode); |
| 192 | return 0; |
| 193 | |
| 194 | changed: |
| 195 | write_unlock(&pointers_lock); |
| 196 | for (i = 1; i < num; i++) |
| 197 | bforget(where[i].bh); |
| 198 | for (i = 0; i < num; i++) |
| 199 | sysv_free_block(inode->i_sb, where[i].key); |
| 200 | return -EAGAIN; |
| 201 | } |
| 202 | |
| 203 | static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) |
| 204 | { |
| 205 | int err = -EIO; |
| 206 | int offsets[DEPTH]; |
| 207 | Indirect chain[DEPTH]; |
| 208 | struct super_block *sb = inode->i_sb; |
| 209 | Indirect *partial; |
| 210 | int left; |
| 211 | int depth = block_to_path(inode, iblock, offsets); |
| 212 | |
| 213 | if (depth == 0) |
| 214 | goto out; |
| 215 | |
| 216 | reread: |
| 217 | read_lock(&pointers_lock); |
| 218 | partial = get_branch(inode, depth, offsets, chain, &err); |
| 219 | read_unlock(&pointers_lock); |
| 220 | |
| 221 | /* Simplest case - block found, no allocation needed */ |
| 222 | if (!partial) { |
| 223 | got_it: |
| 224 | map_bh(bh_result, sb, block_to_cpu(SYSV_SB(sb), |
| 225 | chain[depth-1].key)); |
| 226 | /* Clean up and exit */ |
| 227 | partial = chain+depth-1; /* the whole chain */ |
| 228 | goto cleanup; |
| 229 | } |
| 230 | |
| 231 | /* Next simple case - plain lookup or failed read of indirect block */ |
| 232 | if (!create || err == -EIO) { |
| 233 | cleanup: |
| 234 | while (partial > chain) { |
| 235 | brelse(partial->bh); |
| 236 | partial--; |
| 237 | } |
| 238 | out: |
| 239 | return err; |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | * Indirect block might be removed by truncate while we were |
| 244 | * reading it. Handling of that case (forget what we've got and |
| 245 | * reread) is taken out of the main path. |
| 246 | */ |
| 247 | if (err == -EAGAIN) |
| 248 | goto changed; |
| 249 | |
| 250 | left = (chain + depth) - partial; |
| 251 | err = alloc_branch(inode, left, offsets+(partial-chain), partial); |
| 252 | if (err) |
| 253 | goto cleanup; |
| 254 | |
| 255 | if (splice_branch(inode, chain, partial, left) < 0) |
| 256 | goto changed; |
| 257 | |
| 258 | set_buffer_new(bh_result); |
| 259 | goto got_it; |
| 260 | |
| 261 | changed: |
| 262 | while (partial > chain) { |
| 263 | brelse(partial->bh); |
| 264 | partial--; |
| 265 | } |
| 266 | goto reread; |
| 267 | } |
| 268 | |
| 269 | static inline int all_zeroes(sysv_zone_t *p, sysv_zone_t *q) |
| 270 | { |
| 271 | while (p < q) |
| 272 | if (*p++) |
| 273 | return 0; |
| 274 | return 1; |
| 275 | } |
| 276 | |
| 277 | static Indirect *find_shared(struct inode *inode, |
| 278 | int depth, |
| 279 | int offsets[], |
| 280 | Indirect chain[], |
| 281 | sysv_zone_t *top) |
| 282 | { |
| 283 | Indirect *partial, *p; |
| 284 | int k, err; |
| 285 | |
| 286 | *top = 0; |
| 287 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
| 288 | ; |
| 289 | |
| 290 | write_lock(&pointers_lock); |
| 291 | partial = get_branch(inode, k, offsets, chain, &err); |
| 292 | if (!partial) |
| 293 | partial = chain + k-1; |
| 294 | /* |
| 295 | * If the branch acquired continuation since we've looked at it - |
| 296 | * fine, it should all survive and (new) top doesn't belong to us. |
| 297 | */ |
| 298 | if (!partial->key && *partial->p) { |
| 299 | write_unlock(&pointers_lock); |
| 300 | goto no_top; |
| 301 | } |
| 302 | for (p=partial; p>chain && all_zeroes((sysv_zone_t*)p->bh->b_data,p->p); p--) |
| 303 | ; |
| 304 | /* |
| 305 | * OK, we've found the last block that must survive. The rest of our |
| 306 | * branch should be detached before unlocking. However, if that rest |
| 307 | * of branch is all ours and does not grow immediately from the inode |
| 308 | * it's easier to cheat and just decrement partial->p. |
| 309 | */ |
| 310 | if (p == chain + k - 1 && p > chain) { |
| 311 | p->p--; |
| 312 | } else { |
| 313 | *top = *p->p; |
| 314 | *p->p = 0; |
| 315 | } |
| 316 | write_unlock(&pointers_lock); |
| 317 | |
| 318 | while (partial > p) { |
| 319 | brelse(partial->bh); |
| 320 | partial--; |
| 321 | } |
| 322 | no_top: |
| 323 | return partial; |
| 324 | } |
| 325 | |
| 326 | static inline void free_data(struct inode *inode, sysv_zone_t *p, sysv_zone_t *q) |
| 327 | { |
| 328 | for ( ; p < q ; p++) { |
| 329 | sysv_zone_t nr = *p; |
| 330 | if (nr) { |
| 331 | *p = 0; |
| 332 | sysv_free_block(inode->i_sb, nr); |
| 333 | mark_inode_dirty(inode); |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | static void free_branches(struct inode *inode, sysv_zone_t *p, sysv_zone_t *q, int depth) |
| 339 | { |
| 340 | struct buffer_head * bh; |
| 341 | struct super_block *sb = inode->i_sb; |
| 342 | |
| 343 | if (depth--) { |
| 344 | for ( ; p < q ; p++) { |
| 345 | int block; |
| 346 | sysv_zone_t nr = *p; |
| 347 | if (!nr) |
| 348 | continue; |
| 349 | *p = 0; |
| 350 | block = block_to_cpu(SYSV_SB(sb), nr); |
| 351 | bh = sb_bread(sb, block); |
| 352 | if (!bh) |
| 353 | continue; |
| 354 | free_branches(inode, (sysv_zone_t*)bh->b_data, |
| 355 | block_end(bh), depth); |
| 356 | bforget(bh); |
| 357 | sysv_free_block(sb, nr); |
| 358 | mark_inode_dirty(inode); |
| 359 | } |
| 360 | } else |
| 361 | free_data(inode, p, q); |
| 362 | } |
| 363 | |
| 364 | void sysv_truncate (struct inode * inode) |
| 365 | { |
| 366 | sysv_zone_t *i_data = SYSV_I(inode)->i_data; |
| 367 | int offsets[DEPTH]; |
| 368 | Indirect chain[DEPTH]; |
| 369 | Indirect *partial; |
| 370 | sysv_zone_t nr = 0; |
| 371 | int n; |
| 372 | long iblock; |
| 373 | unsigned blocksize; |
| 374 | |
| 375 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
| 376 | S_ISLNK(inode->i_mode))) |
| 377 | return; |
| 378 | |
| 379 | blocksize = inode->i_sb->s_blocksize; |
| 380 | iblock = (inode->i_size + blocksize-1) |
| 381 | >> inode->i_sb->s_blocksize_bits; |
| 382 | |
| 383 | block_truncate_page(inode->i_mapping, inode->i_size, get_block); |
| 384 | |
| 385 | n = block_to_path(inode, iblock, offsets); |
| 386 | if (n == 0) |
| 387 | return; |
| 388 | |
| 389 | if (n == 1) { |
| 390 | free_data(inode, i_data+offsets[0], i_data + DIRECT); |
| 391 | goto do_indirects; |
| 392 | } |
| 393 | |
| 394 | partial = find_shared(inode, n, offsets, chain, &nr); |
| 395 | /* Kill the top of shared branch (already detached) */ |
| 396 | if (nr) { |
| 397 | if (partial == chain) |
| 398 | mark_inode_dirty(inode); |
| 399 | else |
| 400 | dirty_indirect(partial->bh, inode); |
| 401 | free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); |
| 402 | } |
| 403 | /* Clear the ends of indirect blocks on the shared branch */ |
| 404 | while (partial > chain) { |
| 405 | free_branches(inode, partial->p + 1, block_end(partial->bh), |
| 406 | (chain+n-1) - partial); |
| 407 | dirty_indirect(partial->bh, inode); |
| 408 | brelse (partial->bh); |
| 409 | partial--; |
| 410 | } |
| 411 | do_indirects: |
| 412 | /* Kill the remaining (whole) subtrees (== subtrees deeper than...) */ |
| 413 | while (n < DEPTH) { |
| 414 | nr = i_data[DIRECT + n - 1]; |
| 415 | if (nr) { |
| 416 | i_data[DIRECT + n - 1] = 0; |
| 417 | mark_inode_dirty(inode); |
| 418 | free_branches(inode, &nr, &nr+1, n); |
| 419 | } |
| 420 | n++; |
| 421 | } |
Deepa Dinamani | 02027d4 | 2016-09-14 07:48:05 -0700 | [diff] [blame] | 422 | inode->i_mtime = inode->i_ctime = current_time(inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 423 | if (IS_SYNC(inode)) |
| 424 | sysv_sync_inode (inode); |
| 425 | else |
| 426 | mark_inode_dirty(inode); |
| 427 | } |
| 428 | |
| 429 | static unsigned sysv_nblocks(struct super_block *s, loff_t size) |
| 430 | { |
| 431 | struct sysv_sb_info *sbi = SYSV_SB(s); |
| 432 | int ptrs_bits = sbi->s_ind_per_block_bits; |
| 433 | unsigned blocks, res, direct = DIRECT, i = DEPTH; |
| 434 | blocks = (size + s->s_blocksize - 1) >> s->s_blocksize_bits; |
| 435 | res = blocks; |
| 436 | while (--i && blocks > direct) { |
| 437 | blocks = ((blocks - direct - 1) >> ptrs_bits) + 1; |
| 438 | res += blocks; |
| 439 | direct = 1; |
| 440 | } |
| 441 | return blocks; |
| 442 | } |
| 443 | |
Christian Brauner | 549c729 | 2021-01-21 14:19:43 +0100 | [diff] [blame] | 444 | int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path, |
| 445 | struct kstat *stat, u32 request_mask, unsigned int flags) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 446 | { |
David Howells | a528d35 | 2017-01-31 16:46:22 +0000 | [diff] [blame] | 447 | struct super_block *s = path->dentry->d_sb; |
Christian Brauner | 0d56a45 | 2021-01-21 14:19:30 +0100 | [diff] [blame] | 448 | generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 449 | stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); |
| 450 | stat->blksize = s->s_blocksize; |
| 451 | return 0; |
| 452 | } |
| 453 | |
| 454 | static int sysv_writepage(struct page *page, struct writeback_control *wbc) |
| 455 | { |
| 456 | return block_write_full_page(page,get_block,wbc); |
| 457 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 458 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 459 | static int sysv_readpage(struct file *file, struct page *page) |
| 460 | { |
| 461 | return block_read_full_page(page,get_block); |
| 462 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 463 | |
Christoph Hellwig | f4e420d | 2010-06-04 11:29:56 +0200 | [diff] [blame] | 464 | int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 465 | { |
Christoph Hellwig | 6e1db88 | 2010-06-04 11:29:57 +0200 | [diff] [blame] | 466 | return __block_write_begin(page, pos, len, get_block); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 467 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 468 | |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 469 | static void sysv_write_failed(struct address_space *mapping, loff_t to) |
| 470 | { |
| 471 | struct inode *inode = mapping->host; |
| 472 | |
| 473 | if (to > inode->i_size) { |
Kirill A. Shutemov | 7caef26 | 2013-09-12 15:13:56 -0700 | [diff] [blame] | 474 | truncate_pagecache(inode, inode->i_size); |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 475 | sysv_truncate(inode); |
| 476 | } |
| 477 | } |
| 478 | |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 479 | static int sysv_write_begin(struct file *file, struct address_space *mapping, |
| 480 | loff_t pos, unsigned len, unsigned flags, |
| 481 | struct page **pagep, void **fsdata) |
| 482 | { |
Christoph Hellwig | 155130a | 2010-06-04 11:29:58 +0200 | [diff] [blame] | 483 | int ret; |
| 484 | |
| 485 | ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 486 | if (unlikely(ret)) |
| 487 | sysv_write_failed(mapping, pos + len); |
Christoph Hellwig | 155130a | 2010-06-04 11:29:58 +0200 | [diff] [blame] | 488 | |
| 489 | return ret; |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 490 | } |
| 491 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 492 | static sector_t sysv_bmap(struct address_space *mapping, sector_t block) |
| 493 | { |
| 494 | return generic_block_bmap(mapping,block,get_block); |
| 495 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 496 | |
Christoph Hellwig | f5e54d6 | 2006-06-28 04:26:44 -0700 | [diff] [blame] | 497 | const struct address_space_operations sysv_aops = { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 498 | .readpage = sysv_readpage, |
| 499 | .writepage = sysv_writepage, |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 500 | .write_begin = sysv_write_begin, |
| 501 | .write_end = generic_write_end, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 502 | .bmap = sysv_bmap |
| 503 | }; |