Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
| 3 | */ |
| 4 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 5 | #include <linux/time.h> |
| 6 | #include <linux/reiserfs_fs.h> |
| 7 | #include <linux/reiserfs_acl.h> |
| 8 | #include <linux/reiserfs_xattr.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <asm/uaccess.h> |
| 10 | #include <linux/pagemap.h> |
| 11 | #include <linux/swap.h> |
| 12 | #include <linux/writeback.h> |
| 13 | #include <linux/blkdev.h> |
| 14 | #include <linux/buffer_head.h> |
| 15 | #include <linux/quotaops.h> |
| 16 | |
| 17 | /* |
| 18 | ** We pack the tails of files on file close, not at the time they are written. |
| 19 | ** This implies an unnecessary copy of the tail and an unnecessary indirect item |
| 20 | ** insertion/balancing, for files that are written in one write. |
| 21 | ** It avoids unnecessary tail packings (balances) for files that are written in |
| 22 | ** multiple writes and are small enough to have tails. |
| 23 | ** |
| 24 | ** file_release is called by the VFS layer when the file is closed. If |
| 25 | ** this is the last open file descriptor, and the file |
| 26 | ** small enough to have a tail, and the tail is currently in an |
| 27 | ** unformatted node, the tail is converted back into a direct item. |
| 28 | ** |
| 29 | ** We use reiserfs_truncate_file to pack the tail, since it already has |
| 30 | ** all the conditions coded. |
| 31 | */ |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | { |
| 34 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 35 | struct reiserfs_transaction_handle th; |
| 36 | int err; |
| 37 | int jbegin_failure = 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 38 | |
Eric Sesterhenn | 14a6144 | 2006-10-03 23:36:38 +0200 | [diff] [blame] | 39 | BUG_ON(!S_ISREG(inode->i_mode)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 40 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 41 | /* fast out for when nothing needs to be done */ |
| 42 | if ((atomic_read(&inode->i_count) > 1 || |
| 43 | !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || |
| 44 | !tail_has_to_be_packed(inode)) && |
| 45 | REISERFS_I(inode)->i_prealloc_count <= 0) { |
| 46 | return 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 47 | } |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 48 | |
Jes Sorensen | 1b1dcc1 | 2006-01-09 15:59:24 -0800 | [diff] [blame] | 49 | mutex_lock(&inode->i_mutex); |
Vladimir Saveliev | de14569 | 2007-01-22 20:40:46 -0800 | [diff] [blame] | 50 | |
| 51 | mutex_lock(&(REISERFS_I(inode)->i_mmap)); |
| 52 | if (REISERFS_I(inode)->i_flags & i_ever_mapped) |
| 53 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
| 54 | |
Chris Mason | b5f3953 | 2006-08-05 12:15:08 -0700 | [diff] [blame] | 55 | reiserfs_write_lock(inode->i_sb); |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 56 | /* freeing preallocation only involves relogging blocks that |
| 57 | * are already in the current transaction. preallocation gets |
| 58 | * freed at the end of each transaction, so it is impossible for |
| 59 | * us to log any additional blocks (including quota blocks) |
| 60 | */ |
| 61 | err = journal_begin(&th, inode->i_sb, 1); |
| 62 | if (err) { |
| 63 | /* uh oh, we can't allow the inode to go away while there |
| 64 | * is still preallocation blocks pending. Try to join the |
| 65 | * aborted transaction |
| 66 | */ |
| 67 | jbegin_failure = err; |
| 68 | err = journal_join_abort(&th, inode->i_sb, 1); |
| 69 | |
| 70 | if (err) { |
| 71 | /* hmpf, our choices here aren't good. We can pin the inode |
| 72 | * which will disallow unmount from every happening, we can |
| 73 | * do nothing, which will corrupt random memory on unmount, |
| 74 | * or we can forcibly remove the file from the preallocation |
| 75 | * list, which will leak blocks on disk. Lets pin the inode |
| 76 | * and let the admin know what is going on. |
| 77 | */ |
| 78 | igrab(inode); |
| 79 | reiserfs_warning(inode->i_sb, |
| 80 | "pinning inode %lu because the " |
Alexey Dobriyan | 533221f | 2006-11-25 11:09:30 -0800 | [diff] [blame] | 81 | "preallocation can't be freed", |
| 82 | inode->i_ino); |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 83 | goto out; |
| 84 | } |
| 85 | } |
| 86 | reiserfs_update_inode_transaction(inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 87 | |
| 88 | #ifdef REISERFS_PREALLOCATE |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 89 | reiserfs_discard_prealloc(&th, inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 90 | #endif |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 91 | err = journal_end(&th, inode->i_sb, 1); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 92 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 93 | /* copy back the error code from journal_begin */ |
| 94 | if (!err) |
| 95 | err = jbegin_failure; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 96 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 97 | if (!err && atomic_read(&inode->i_count) <= 1 && |
| 98 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && |
| 99 | tail_has_to_be_packed(inode)) { |
| 100 | /* if regular file is released by last holder and it has been |
| 101 | appended (we append by unformatted node only) or its direct |
| 102 | item(s) had to be converted, then it may have to be |
| 103 | indirect2direct converted */ |
| 104 | err = reiserfs_truncate_file(inode, 0); |
| 105 | } |
| 106 | out: |
Vladimir Saveliev | de14569 | 2007-01-22 20:40:46 -0800 | [diff] [blame] | 107 | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); |
Jes Sorensen | 1b1dcc1 | 2006-01-09 15:59:24 -0800 | [diff] [blame] | 108 | mutex_unlock(&inode->i_mutex); |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 109 | reiserfs_write_unlock(inode->i_sb); |
| 110 | return err; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 111 | } |
| 112 | |
Vladimir Saveliev | de14569 | 2007-01-22 20:40:46 -0800 | [diff] [blame] | 113 | static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 114 | { |
| 115 | struct inode *inode; |
| 116 | |
| 117 | inode = file->f_path.dentry->d_inode; |
| 118 | mutex_lock(&(REISERFS_I(inode)->i_mmap)); |
| 119 | REISERFS_I(inode)->i_flags |= i_ever_mapped; |
| 120 | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); |
| 121 | |
| 122 | return generic_file_mmap(file, vma); |
| 123 | } |
| 124 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 125 | static void reiserfs_vfs_truncate_file(struct inode *inode) |
| 126 | { |
| 127 | reiserfs_truncate_file(inode, 1); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | /* Sync a reiserfs file. */ |
| 131 | |
| 132 | /* |
| 133 | * FIXME: sync_mapping_buffers() never has anything to sync. Can |
| 134 | * be removed... |
| 135 | */ |
| 136 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 137 | static int reiserfs_sync_file(struct file *p_s_filp, |
| 138 | struct dentry *p_s_dentry, int datasync) |
| 139 | { |
| 140 | struct inode *p_s_inode = p_s_dentry->d_inode; |
| 141 | int n_err; |
| 142 | int barrier_done; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 143 | |
Eric Sesterhenn | 14a6144 | 2006-10-03 23:36:38 +0200 | [diff] [blame] | 144 | BUG_ON(!S_ISREG(p_s_inode->i_mode)); |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 145 | n_err = sync_mapping_buffers(p_s_inode->i_mapping); |
| 146 | reiserfs_write_lock(p_s_inode->i_sb); |
| 147 | barrier_done = reiserfs_commit_for_inode(p_s_inode); |
| 148 | reiserfs_write_unlock(p_s_inode->i_sb); |
Chris Mason | 25736b1 | 2006-09-29 01:59:54 -0700 | [diff] [blame] | 149 | if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb)) |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 150 | blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); |
| 151 | if (barrier_done < 0) |
| 152 | return barrier_done; |
| 153 | return (n_err < 0) ? -EIO : 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | } |
| 155 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 156 | /* taken fs/buffer.c:__block_commit_write */ |
| 157 | int reiserfs_commit_page(struct inode *inode, struct page *page, |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 158 | unsigned from, unsigned to) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 159 | { |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 160 | unsigned block_start, block_end; |
| 161 | int partial = 0; |
| 162 | unsigned blocksize; |
| 163 | struct buffer_head *bh, *head; |
| 164 | unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; |
| 165 | int new; |
| 166 | int logit = reiserfs_file_data_log(inode); |
| 167 | struct super_block *s = inode->i_sb; |
| 168 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; |
| 169 | struct reiserfs_transaction_handle th; |
| 170 | int ret = 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 171 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 172 | th.t_trans_id = 0; |
| 173 | blocksize = 1 << inode->i_blkbits; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 174 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 175 | if (logit) { |
| 176 | reiserfs_write_lock(s); |
| 177 | ret = journal_begin(&th, s, bh_per_page + 1); |
| 178 | if (ret) |
| 179 | goto drop_write_lock; |
| 180 | reiserfs_update_inode_transaction(inode); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | } |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 182 | for (bh = head = page_buffers(page), block_start = 0; |
| 183 | bh != head || !block_start; |
| 184 | block_start = block_end, bh = bh->b_this_page) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 185 | |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 186 | new = buffer_new(bh); |
| 187 | clear_buffer_new(bh); |
| 188 | block_end = block_start + blocksize; |
| 189 | if (block_end <= from || block_start >= to) { |
| 190 | if (!buffer_uptodate(bh)) |
| 191 | partial = 1; |
| 192 | } else { |
| 193 | set_buffer_uptodate(bh); |
| 194 | if (logit) { |
| 195 | reiserfs_prepare_for_journal(s, bh, 1); |
| 196 | journal_mark_dirty(&th, s, bh); |
| 197 | } else if (!buffer_dirty(bh)) { |
| 198 | mark_buffer_dirty(bh); |
| 199 | /* do data=ordered on any page past the end |
| 200 | * of file and any buffer marked BH_New. |
| 201 | */ |
| 202 | if (reiserfs_data_ordered(inode->i_sb) && |
| 203 | (new || page->index >= i_size_index)) { |
| 204 | reiserfs_add_ordered_list(inode, bh); |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | if (logit) { |
| 210 | ret = journal_end(&th, s, bh_per_page + 1); |
| 211 | drop_write_lock: |
| 212 | reiserfs_write_unlock(s); |
| 213 | } |
| 214 | /* |
| 215 | * If this is a partial write which happened to make all buffers |
| 216 | * uptodate then we can optimize away a bogus readpage() for |
| 217 | * the next read(). Here we 'discover' whether the page went |
| 218 | * uptodate as a result of this (potentially partial) write. |
| 219 | */ |
| 220 | if (!partial) |
| 221 | SetPageUptodate(page); |
| 222 | return ret; |
| 223 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 224 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | /* Write @count bytes at position @ppos in a file indicated by @file |
| 226 | from the buffer @buf. |
| 227 | |
| 228 | generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want |
| 229 | something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was |
| 230 | written for (ext2/3). This is for several reasons: |
| 231 | |
| 232 | * It has no understanding of any filesystem specific optimizations. |
| 233 | |
| 234 | * It enters the filesystem repeatedly for each page that is written. |
| 235 | |
| 236 | * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key |
| 237 | * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time |
| 238 | * to reiserfs which allows for fewer tree traversals. |
| 239 | |
| 240 | * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. |
| 241 | |
| 242 | * Asking the block allocation code for blocks one at a time is slightly less efficient. |
| 243 | |
| 244 | All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to |
| 245 | use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make |
| 246 | things right finally. |
| 247 | |
| 248 | Future Features: providing search_by_key with hints. |
| 249 | |
| 250 | */ |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 251 | static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ |
| 252 | const char __user * buf, /* pointer to user supplied data |
| 253 | (in userspace) */ |
| 254 | size_t count, /* amount of bytes to write */ |
| 255 | loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to |
| 256 | * new current position before returning. */ |
| 257 | ) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 258 | { |
Josef Sipek | 1fc5adb | 2006-12-08 02:37:33 -0800 | [diff] [blame] | 259 | struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 260 | /* To simplify coding at this time, we store |
| 261 | locked pages in array for now */ |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 262 | struct reiserfs_transaction_handle th; |
| 263 | th.t_trans_id = 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 264 | |
Jeff Mahoney | fa385be | 2006-02-01 03:06:51 -0800 | [diff] [blame] | 265 | /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items |
| 266 | * lying around (most of the disk, in fact). Despite the filesystem |
| 267 | * now being a v3.6 format, the old items still can't support large |
| 268 | * file sizes. Catch this case here, as the rest of the VFS layer is |
| 269 | * oblivious to the different limitations between old and new items. |
| 270 | * reiserfs_setattr catches this for truncates. This chunk is lifted |
| 271 | * from generic_write_checks. */ |
| 272 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && |
| 273 | *ppos + count > MAX_NON_LFS) { |
| 274 | if (*ppos >= MAX_NON_LFS) { |
Jeff Mahoney | fa385be | 2006-02-01 03:06:51 -0800 | [diff] [blame] | 275 | return -EFBIG; |
| 276 | } |
| 277 | if (count > MAX_NON_LFS - (unsigned long)*ppos) |
| 278 | count = MAX_NON_LFS - (unsigned long)*ppos; |
| 279 | } |
| 280 | |
Vladimir Saveliev | 797b4cf | 2007-10-16 01:25:12 -0700 | [diff] [blame] | 281 | return do_sync_write(file, buf, count, ppos); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | } |
| 283 | |
Arjan van de Ven | 4b6f5d2 | 2006-03-28 01:56:42 -0800 | [diff] [blame] | 284 | const struct file_operations reiserfs_file_operations = { |
Badari Pulavarty | 027445c | 2006-09-30 23:28:46 -0700 | [diff] [blame] | 285 | .read = do_sync_read, |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 286 | .write = reiserfs_file_write, |
| 287 | .ioctl = reiserfs_ioctl, |
David Howells | 52b499c | 2006-08-29 19:06:18 +0100 | [diff] [blame] | 288 | #ifdef CONFIG_COMPAT |
| 289 | .compat_ioctl = reiserfs_compat_ioctl, |
| 290 | #endif |
Vladimir Saveliev | de14569 | 2007-01-22 20:40:46 -0800 | [diff] [blame] | 291 | .mmap = reiserfs_file_mmap, |
Jeff Mahoney | 5a2618e | 2006-09-30 23:28:44 -0700 | [diff] [blame] | 292 | .open = generic_file_open, |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 293 | .release = reiserfs_file_release, |
| 294 | .fsync = reiserfs_sync_file, |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 295 | .aio_read = generic_file_aio_read, |
Alexey Dobriyan | 9637f28 | 2006-06-26 00:24:57 -0700 | [diff] [blame] | 296 | .aio_write = generic_file_aio_write, |
Jens Axboe | 5274f05 | 2006-03-30 15:15:30 +0200 | [diff] [blame] | 297 | .splice_read = generic_file_splice_read, |
| 298 | .splice_write = generic_file_splice_write, |
Christoph Hellwig | 91efc16 | 2008-09-08 19:42:50 +0200 | [diff] [blame^] | 299 | .llseek = generic_file_llseek, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 300 | }; |
| 301 | |
Arjan van de Ven | c5ef1c4 | 2007-02-12 00:55:40 -0800 | [diff] [blame] | 302 | const struct inode_operations reiserfs_file_inode_operations = { |
Linus Torvalds | bd4c625 | 2005-07-12 20:21:28 -0700 | [diff] [blame] | 303 | .truncate = reiserfs_vfs_truncate_file, |
| 304 | .setattr = reiserfs_setattr, |
| 305 | .setxattr = reiserfs_setxattr, |
| 306 | .getxattr = reiserfs_getxattr, |
| 307 | .listxattr = reiserfs_listxattr, |
| 308 | .removexattr = reiserfs_removexattr, |
| 309 | .permission = reiserfs_permission, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 310 | }; |