Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) International Business Machines Corp., 2000-2004 |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License as published by |
| 6 | * the Free Software Foundation; either version 2 of the License, or |
| 7 | * (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
| 12 | * the GNU General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software |
| 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 17 | */ |
| 18 | |
| 19 | #include <linux/fs.h> |
| 20 | #include <linux/buffer_head.h> |
| 21 | #include <linux/quotaops.h> |
| 22 | #include "jfs_incore.h" |
| 23 | #include "jfs_filsys.h" |
| 24 | #include "jfs_metapage.h" |
| 25 | #include "jfs_dinode.h" |
| 26 | #include "jfs_imap.h" |
| 27 | #include "jfs_dmap.h" |
| 28 | #include "jfs_superblock.h" |
| 29 | #include "jfs_txnmgr.h" |
| 30 | #include "jfs_debug.h" |
| 31 | |
| 32 | #define BITSPERPAGE (PSIZE << 3) |
| 33 | #define L2MEGABYTE 20 |
| 34 | #define MEGABYTE (1 << L2MEGABYTE) |
| 35 | #define MEGABYTE32 (MEGABYTE << 5) |
| 36 | |
| 37 | /* convert block number to bmap file page number */ |
| 38 | #define BLKTODMAPN(b)\ |
| 39 | (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) |
| 40 | |
| 41 | /* |
| 42 | * jfs_extendfs() |
| 43 | * |
| 44 | * function: extend file system; |
| 45 | * |
| 46 | * |-------------------------------|----------|----------| |
| 47 | * file system space fsck inline log |
| 48 | * workspace space |
| 49 | * |
| 50 | * input: |
| 51 | * new LVSize: in LV blocks (required) |
| 52 | * new LogSize: in LV blocks (optional) |
| 53 | * new FSSize: in LV blocks (optional) |
| 54 | * |
| 55 | * new configuration: |
| 56 | * 1. set new LogSize as specified or default from new LVSize; |
| 57 | * 2. compute new FSCKSize from new LVSize; |
| 58 | * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where |
| 59 | * assert(new FSSize >= old FSSize), |
| 60 | * i.e., file system must not be shrinked; |
| 61 | */ |
| 62 | int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) |
| 63 | { |
| 64 | int rc = 0; |
| 65 | struct jfs_sb_info *sbi = JFS_SBI(sb); |
| 66 | struct inode *ipbmap = sbi->ipbmap; |
| 67 | struct inode *ipbmap2; |
| 68 | struct inode *ipimap = sbi->ipimap; |
| 69 | struct jfs_log *log = sbi->log; |
| 70 | struct bmap *bmp = sbi->bmap; |
| 71 | s64 newLogAddress, newFSCKAddress; |
| 72 | int newFSCKSize; |
| 73 | s64 newMapSize = 0, mapSize; |
| 74 | s64 XAddress, XSize, nblocks, xoff, xaddr, t64; |
| 75 | s64 oldLVSize; |
| 76 | s64 newFSSize; |
| 77 | s64 VolumeSize; |
| 78 | int newNpages = 0, nPages, newPage, xlen, t32; |
| 79 | int tid; |
| 80 | int log_formatted = 0; |
| 81 | struct inode *iplist[1]; |
| 82 | struct jfs_superblock *j_sb, *j_sb2; |
| 83 | uint old_agsize; |
| 84 | struct buffer_head *bh, *bh2; |
| 85 | |
| 86 | /* If the volume hasn't grown, get out now */ |
| 87 | |
| 88 | if (sbi->mntflag & JFS_INLINELOG) |
| 89 | oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd); |
| 90 | else |
| 91 | oldLVSize = addressPXD(&sbi->fsckpxd) + |
| 92 | lengthPXD(&sbi->fsckpxd); |
| 93 | |
| 94 | if (oldLVSize >= newLVSize) { |
| 95 | printk(KERN_WARNING |
| 96 | "jfs_extendfs: volume hasn't grown, returning\n"); |
| 97 | goto out; |
| 98 | } |
| 99 | |
| 100 | VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; |
| 101 | |
| 102 | if (VolumeSize) { |
| 103 | if (newLVSize > VolumeSize) { |
| 104 | printk(KERN_WARNING "jfs_extendfs: invalid size\n"); |
| 105 | rc = -EINVAL; |
| 106 | goto out; |
| 107 | } |
| 108 | } else { |
| 109 | /* check the device */ |
| 110 | bh = sb_bread(sb, newLVSize - 1); |
| 111 | if (!bh) { |
| 112 | printk(KERN_WARNING "jfs_extendfs: invalid size\n"); |
| 113 | rc = -EINVAL; |
| 114 | goto out; |
| 115 | } |
| 116 | bforget(bh); |
| 117 | } |
| 118 | |
| 119 | /* Can't extend write-protected drive */ |
| 120 | |
| 121 | if (isReadOnly(ipbmap)) { |
| 122 | printk(KERN_WARNING "jfs_extendfs: read-only file system\n"); |
| 123 | rc = -EROFS; |
| 124 | goto out; |
| 125 | } |
| 126 | |
| 127 | /* |
| 128 | * reconfigure LV spaces |
| 129 | * --------------------- |
| 130 | * |
| 131 | * validate new size, or, if not specified, determine new size |
| 132 | */ |
| 133 | |
| 134 | /* |
| 135 | * reconfigure inline log space: |
| 136 | */ |
| 137 | if ((sbi->mntflag & JFS_INLINELOG)) { |
| 138 | if (newLogSize == 0) { |
| 139 | /* |
| 140 | * no size specified: default to 1/256 of aggregate |
| 141 | * size; rounded up to a megabyte boundary; |
| 142 | */ |
| 143 | newLogSize = newLVSize >> 8; |
| 144 | t32 = (1 << (20 - sbi->l2bsize)) - 1; |
| 145 | newLogSize = (newLogSize + t32) & ~t32; |
| 146 | newLogSize = |
| 147 | min(newLogSize, MEGABYTE32 >> sbi->l2bsize); |
| 148 | } else { |
| 149 | /* |
| 150 | * convert the newLogSize to fs blocks. |
| 151 | * |
| 152 | * Since this is given in megabytes, it will always be |
| 153 | * an even number of pages. |
| 154 | */ |
| 155 | newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize; |
| 156 | } |
| 157 | |
| 158 | } else |
| 159 | newLogSize = 0; |
| 160 | |
| 161 | newLogAddress = newLVSize - newLogSize; |
| 162 | |
| 163 | /* |
| 164 | * reconfigure fsck work space: |
| 165 | * |
| 166 | * configure it to the end of the logical volume regardless of |
| 167 | * whether file system extends to the end of the aggregate; |
| 168 | * Need enough 4k pages to cover: |
| 169 | * - 1 bit per block in aggregate rounded up to BPERDMAP boundary |
| 170 | * - 1 extra page to handle control page and intermediate level pages |
| 171 | * - 50 extra pages for the chkdsk service log |
| 172 | */ |
| 173 | t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) |
| 174 | << L2BPERDMAP; |
| 175 | t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50; |
| 176 | newFSCKSize = t32 << sbi->l2nbperpage; |
| 177 | newFSCKAddress = newLogAddress - newFSCKSize; |
| 178 | |
| 179 | /* |
| 180 | * compute new file system space; |
| 181 | */ |
| 182 | newFSSize = newLVSize - newLogSize - newFSCKSize; |
| 183 | |
| 184 | /* file system cannot be shrinked */ |
| 185 | if (newFSSize < bmp->db_mapsize) { |
| 186 | rc = -EINVAL; |
| 187 | goto out; |
| 188 | } |
| 189 | |
| 190 | /* |
| 191 | * If we're expanding enough that the inline log does not overlap |
| 192 | * the old one, we can format the new log before we quiesce the |
| 193 | * filesystem. |
| 194 | */ |
| 195 | if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) { |
| 196 | if ((rc = lmLogFormat(log, newLogAddress, newLogSize))) |
| 197 | goto out; |
| 198 | log_formatted = 1; |
| 199 | } |
| 200 | /* |
| 201 | * quiesce file system |
| 202 | * |
| 203 | * (prepare to move the inline log and to prevent map update) |
| 204 | * |
| 205 | * block any new transactions and wait for completion of |
| 206 | * all wip transactions and flush modified pages s.t. |
| 207 | * on-disk file system is in consistent state and |
| 208 | * log is not required for recovery. |
| 209 | */ |
| 210 | txQuiesce(sb); |
| 211 | |
| 212 | if (sbi->mntflag & JFS_INLINELOG) { |
| 213 | /* |
| 214 | * deactivate old inline log |
| 215 | */ |
| 216 | lmLogShutdown(log); |
| 217 | |
| 218 | /* |
| 219 | * mark on-disk super block for fs in transition; |
| 220 | * |
| 221 | * update on-disk superblock for the new space configuration |
| 222 | * of inline log space and fsck work space descriptors: |
| 223 | * N.B. FS descriptor is NOT updated; |
| 224 | * |
| 225 | * crash recovery: |
| 226 | * logredo(): if FM_EXTENDFS, return to fsck() for cleanup; |
| 227 | * fsck(): if FM_EXTENDFS, reformat inline log and fsck |
| 228 | * workspace from superblock inline log descriptor and fsck |
| 229 | * workspace descriptor; |
| 230 | */ |
| 231 | |
| 232 | /* read in superblock */ |
| 233 | if ((rc = readSuper(sb, &bh))) |
| 234 | goto error_out; |
| 235 | j_sb = (struct jfs_superblock *)bh->b_data; |
| 236 | |
| 237 | /* mark extendfs() in progress */ |
| 238 | j_sb->s_state |= cpu_to_le32(FM_EXTENDFS); |
| 239 | j_sb->s_xsize = cpu_to_le64(newFSSize); |
| 240 | PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress); |
| 241 | PXDlength(&j_sb->s_xfsckpxd, newFSCKSize); |
| 242 | PXDaddress(&j_sb->s_xlogpxd, newLogAddress); |
| 243 | PXDlength(&j_sb->s_xlogpxd, newLogSize); |
| 244 | |
| 245 | /* synchronously update superblock */ |
| 246 | mark_buffer_dirty(bh); |
| 247 | sync_dirty_buffer(bh); |
| 248 | brelse(bh); |
| 249 | |
| 250 | /* |
| 251 | * format new inline log synchronously; |
| 252 | * |
| 253 | * crash recovery: if log move in progress, |
| 254 | * reformat log and exit success; |
| 255 | */ |
| 256 | if (!log_formatted) |
| 257 | if ((rc = lmLogFormat(log, newLogAddress, newLogSize))) |
| 258 | goto error_out; |
| 259 | |
| 260 | /* |
| 261 | * activate new log |
| 262 | */ |
| 263 | log->base = newLogAddress; |
| 264 | log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits); |
| 265 | if ((rc = lmLogInit(log))) |
| 266 | goto error_out; |
| 267 | } |
| 268 | |
| 269 | /* |
| 270 | * extend block allocation map |
| 271 | * --------------------------- |
| 272 | * |
| 273 | * extendfs() for new extension, retry after crash recovery; |
| 274 | * |
| 275 | * note: both logredo() and fsck() rebuild map from |
| 276 | * the bitmap and configuration parameter from superblock |
| 277 | * (disregarding all other control information in the map); |
| 278 | * |
| 279 | * superblock: |
| 280 | * s_size: aggregate size in physical blocks; |
| 281 | */ |
| 282 | /* |
| 283 | * compute the new block allocation map configuration |
| 284 | * |
| 285 | * map dinode: |
| 286 | * di_size: map file size in byte; |
| 287 | * di_nblocks: number of blocks allocated for map file; |
| 288 | * di_mapsize: number of blocks in aggregate (covered by map); |
| 289 | * map control page: |
| 290 | * db_mapsize: number of blocks in aggregate (covered by map); |
| 291 | */ |
| 292 | newMapSize = newFSSize; |
| 293 | /* number of data pages of new bmap file: |
| 294 | * roundup new size to full dmap page boundary and |
| 295 | * add 1 extra dmap page for next extendfs() |
| 296 | */ |
| 297 | t64 = (newMapSize - 1) + BPERDMAP; |
| 298 | newNpages = BLKTODMAPN(t64) + 1; |
| 299 | |
| 300 | /* |
| 301 | * extend map from current map (WITHOUT growing mapfile) |
| 302 | * |
| 303 | * map new extension with unmapped part of the last partial |
| 304 | * dmap page, if applicable, and extra page(s) allocated |
| 305 | * at end of bmap by mkfs() or previous extendfs(); |
| 306 | */ |
| 307 | extendBmap: |
| 308 | /* compute number of blocks requested to extend */ |
| 309 | mapSize = bmp->db_mapsize; |
| 310 | XAddress = mapSize; /* eXtension Address */ |
| 311 | XSize = newMapSize - mapSize; /* eXtension Size */ |
| 312 | old_agsize = bmp->db_agsize; /* We need to know if this changes */ |
| 313 | |
| 314 | /* compute number of blocks that can be extended by current mapfile */ |
| 315 | t64 = dbMapFileSizeToMapSize(ipbmap); |
| 316 | if (mapSize > t64) { |
| 317 | printk(KERN_ERR "jfs_extendfs: mapSize (0x%Lx) > t64 (0x%Lx)\n", |
| 318 | (long long) mapSize, (long long) t64); |
| 319 | rc = -EIO; |
| 320 | goto error_out; |
| 321 | } |
| 322 | nblocks = min(t64 - mapSize, XSize); |
| 323 | |
| 324 | /* |
| 325 | * update map pages for new extension: |
| 326 | * |
| 327 | * update/init dmap and bubble up the control hierarchy |
| 328 | * incrementally fold up dmaps into upper levels; |
| 329 | * update bmap control page; |
| 330 | */ |
| 331 | if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) |
| 332 | goto error_out; |
| 333 | /* |
| 334 | * the map now has extended to cover additional nblocks: |
| 335 | * dn_mapsize = oldMapsize + nblocks; |
| 336 | */ |
| 337 | /* ipbmap->i_mapsize += nblocks; */ |
| 338 | XSize -= nblocks; |
| 339 | |
| 340 | /* |
| 341 | * grow map file to cover remaining extension |
| 342 | * and/or one extra dmap page for next extendfs(); |
| 343 | * |
| 344 | * allocate new map pages and its backing blocks, and |
| 345 | * update map file xtree |
| 346 | */ |
| 347 | /* compute number of data pages of current bmap file */ |
| 348 | nPages = ipbmap->i_size >> L2PSIZE; |
| 349 | |
| 350 | /* need to grow map file ? */ |
| 351 | if (nPages == newNpages) |
| 352 | goto finalizeBmap; |
| 353 | |
| 354 | /* |
| 355 | * grow bmap file for the new map pages required: |
| 356 | * |
| 357 | * allocate growth at the start of newly extended region; |
| 358 | * bmap file only grows sequentially, i.e., both data pages |
| 359 | * and possibly xtree index pages may grow in append mode, |
| 360 | * s.t. logredo() can reconstruct pre-extension state |
| 361 | * by washing away bmap file of pages outside s_size boundary; |
| 362 | */ |
| 363 | /* |
| 364 | * journal map file growth as if a regular file growth: |
| 365 | * (note: bmap is created with di_mode = IFJOURNAL|IFREG); |
| 366 | * |
| 367 | * journaling of bmap file growth is not required since |
| 368 | * logredo() do/can not use log records of bmap file growth |
| 369 | * but it provides careful write semantics, pmap update, etc.; |
| 370 | */ |
| 371 | /* synchronous write of data pages: bmap data pages are |
| 372 | * cached in meta-data cache, and not written out |
| 373 | * by txCommit(); |
| 374 | */ |
| 375 | filemap_fdatawait(ipbmap->i_mapping); |
| 376 | filemap_fdatawrite(ipbmap->i_mapping); |
| 377 | filemap_fdatawait(ipbmap->i_mapping); |
| 378 | diWriteSpecial(ipbmap, 0); |
| 379 | |
| 380 | newPage = nPages; /* first new page number */ |
| 381 | xoff = newPage << sbi->l2nbperpage; |
| 382 | xlen = (newNpages - nPages) << sbi->l2nbperpage; |
| 383 | xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1); |
| 384 | xaddr = XAddress; |
| 385 | |
| 386 | tid = txBegin(sb, COMMIT_FORCE); |
| 387 | |
| 388 | if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) { |
| 389 | txEnd(tid); |
| 390 | goto error_out; |
| 391 | } |
| 392 | /* update bmap file size */ |
| 393 | ipbmap->i_size += xlen << sbi->l2bsize; |
| 394 | inode_add_bytes(ipbmap, xlen << sbi->l2bsize); |
| 395 | |
| 396 | iplist[0] = ipbmap; |
| 397 | rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); |
| 398 | |
| 399 | txEnd(tid); |
| 400 | |
| 401 | if (rc) |
| 402 | goto error_out; |
| 403 | |
| 404 | /* |
| 405 | * map file has been grown now to cover extension to further out; |
| 406 | * di_size = new map file size; |
| 407 | * |
| 408 | * if huge extension, the previous extension based on previous |
| 409 | * map file size may not have been sufficient to cover whole extension |
| 410 | * (it could have been used up for new map pages), |
| 411 | * but the newly grown map file now covers lot bigger new free space |
| 412 | * available for further extension of map; |
| 413 | */ |
| 414 | /* any more blocks to extend ? */ |
| 415 | if (XSize) |
| 416 | goto extendBmap; |
| 417 | |
| 418 | finalizeBmap: |
| 419 | /* finalize bmap */ |
| 420 | dbFinalizeBmap(ipbmap); |
| 421 | |
| 422 | /* |
| 423 | * update inode allocation map |
| 424 | * --------------------------- |
| 425 | * |
| 426 | * move iag lists from old to new iag; |
| 427 | * agstart field is not updated for logredo() to reconstruct |
| 428 | * iag lists if system crash occurs. |
| 429 | * (computation of ag number from agstart based on agsize |
| 430 | * will correctly identify the new ag); |
| 431 | */ |
| 432 | /* if new AG size the same as old AG size, done! */ |
| 433 | if (bmp->db_agsize != old_agsize) { |
| 434 | if ((rc = diExtendFS(ipimap, ipbmap))) |
| 435 | goto error_out; |
| 436 | |
| 437 | /* finalize imap */ |
| 438 | if ((rc = diSync(ipimap))) |
| 439 | goto error_out; |
| 440 | } |
| 441 | |
| 442 | /* |
| 443 | * finalize |
| 444 | * -------- |
| 445 | * |
| 446 | * extension is committed when on-disk super block is |
| 447 | * updated with new descriptors: logredo will recover |
| 448 | * crash before it to pre-extension state; |
| 449 | */ |
| 450 | |
| 451 | /* sync log to skip log replay of bmap file growth transaction; */ |
| 452 | /* lmLogSync(log, 1); */ |
| 453 | |
| 454 | /* |
| 455 | * synchronous write bmap global control page; |
| 456 | * for crash before completion of write |
| 457 | * logredo() will recover to pre-extendfs state; |
| 458 | * for crash after completion of write, |
| 459 | * logredo() will recover post-extendfs state; |
| 460 | */ |
| 461 | if ((rc = dbSync(ipbmap))) |
| 462 | goto error_out; |
| 463 | |
| 464 | /* |
| 465 | * copy primary bmap inode to secondary bmap inode |
| 466 | */ |
| 467 | |
| 468 | ipbmap2 = diReadSpecial(sb, BMAP_I, 1); |
| 469 | if (ipbmap2 == NULL) { |
| 470 | printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n"); |
| 471 | goto error_out; |
| 472 | } |
| 473 | memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288); |
| 474 | ipbmap2->i_size = ipbmap->i_size; |
| 475 | ipbmap2->i_blocks = ipbmap->i_blocks; |
| 476 | |
| 477 | diWriteSpecial(ipbmap2, 1); |
| 478 | diFreeSpecial(ipbmap2); |
| 479 | |
| 480 | /* |
| 481 | * update superblock |
| 482 | */ |
| 483 | if ((rc = readSuper(sb, &bh))) |
| 484 | goto error_out; |
| 485 | j_sb = (struct jfs_superblock *)bh->b_data; |
| 486 | |
| 487 | /* mark extendfs() completion */ |
| 488 | j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS); |
| 489 | j_sb->s_size = cpu_to_le64(bmp->db_mapsize << |
| 490 | le16_to_cpu(j_sb->s_l2bfactor)); |
| 491 | j_sb->s_agsize = cpu_to_le32(bmp->db_agsize); |
| 492 | |
| 493 | /* update inline log space descriptor */ |
| 494 | if (sbi->mntflag & JFS_INLINELOG) { |
| 495 | PXDaddress(&(j_sb->s_logpxd), newLogAddress); |
| 496 | PXDlength(&(j_sb->s_logpxd), newLogSize); |
| 497 | } |
| 498 | |
| 499 | /* record log's mount serial number */ |
| 500 | j_sb->s_logserial = cpu_to_le32(log->serial); |
| 501 | |
| 502 | /* update fsck work space descriptor */ |
| 503 | PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress); |
| 504 | PXDlength(&(j_sb->s_fsckpxd), newFSCKSize); |
| 505 | j_sb->s_fscklog = 1; |
| 506 | /* sb->s_fsckloglen remains the same */ |
| 507 | |
| 508 | /* Update secondary superblock */ |
| 509 | bh2 = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits); |
| 510 | if (bh2) { |
| 511 | j_sb2 = (struct jfs_superblock *)bh2->b_data; |
| 512 | memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock)); |
| 513 | |
| 514 | mark_buffer_dirty(bh); |
| 515 | sync_dirty_buffer(bh2); |
| 516 | brelse(bh2); |
| 517 | } |
| 518 | |
| 519 | /* write primary superblock */ |
| 520 | mark_buffer_dirty(bh); |
| 521 | sync_dirty_buffer(bh); |
| 522 | brelse(bh); |
| 523 | |
| 524 | goto resume; |
| 525 | |
| 526 | error_out: |
| 527 | jfs_error(sb, "jfs_extendfs"); |
| 528 | |
| 529 | resume: |
| 530 | /* |
| 531 | * resume file system transactions |
| 532 | */ |
| 533 | txResume(sb); |
| 534 | |
| 535 | out: |
| 536 | return rc; |
| 537 | } |