Chris Mason | 6cbd557 | 2007-06-12 09:07:21 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public |
| 6 | * License v2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, |
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 11 | * General Public License for more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public |
| 14 | * License along with this program; if not, write to the |
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ |
| 18 | |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 19 | #include <linux/module.h> |
| 20 | #include <linux/buffer_head.h> |
| 21 | #include <linux/fs.h> |
| 22 | #include <linux/pagemap.h> |
| 23 | #include <linux/highmem.h> |
| 24 | #include <linux/time.h> |
| 25 | #include <linux/init.h> |
| 26 | #include <linux/string.h> |
| 27 | #include <linux/smp_lock.h> |
| 28 | #include <linux/backing-dev.h> |
| 29 | #include <linux/mpage.h> |
| 30 | #include <linux/swap.h> |
| 31 | #include <linux/writeback.h> |
| 32 | #include <linux/statfs.h> |
| 33 | #include <linux/compat.h> |
| 34 | #include "ctree.h" |
| 35 | #include "disk-io.h" |
| 36 | #include "transaction.h" |
| 37 | #include "btrfs_inode.h" |
| 38 | #include "ioctl.h" |
| 39 | #include "print-tree.h" |
| 40 | |
| 41 | |
| 42 | static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, |
| 43 | struct page **prepared_pages, |
| 44 | const char __user * buf) |
| 45 | { |
| 46 | long page_fault = 0; |
| 47 | int i; |
| 48 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
| 49 | |
| 50 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { |
| 51 | size_t count = min_t(size_t, |
| 52 | PAGE_CACHE_SIZE - offset, write_bytes); |
| 53 | struct page *page = prepared_pages[i]; |
| 54 | fault_in_pages_readable(buf, count); |
| 55 | |
| 56 | /* Copy data from userspace to the current page */ |
| 57 | kmap(page); |
| 58 | page_fault = __copy_from_user(page_address(page) + offset, |
| 59 | buf, count); |
| 60 | /* Flush processor's dcache for this page */ |
| 61 | flush_dcache_page(page); |
| 62 | kunmap(page); |
| 63 | buf += count; |
| 64 | write_bytes -= count; |
| 65 | |
| 66 | if (page_fault) |
| 67 | break; |
| 68 | } |
| 69 | return page_fault ? -EFAULT : 0; |
| 70 | } |
| 71 | |
| 72 | static void btrfs_drop_pages(struct page **pages, size_t num_pages) |
| 73 | { |
| 74 | size_t i; |
| 75 | for (i = 0; i < num_pages; i++) { |
| 76 | if (!pages[i]) |
| 77 | break; |
| 78 | unlock_page(pages[i]); |
| 79 | mark_page_accessed(pages[i]); |
| 80 | page_cache_release(pages[i]); |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | static int dirty_and_release_pages(struct btrfs_trans_handle *trans, |
| 85 | struct btrfs_root *root, |
| 86 | struct file *file, |
| 87 | struct page **pages, |
| 88 | size_t num_pages, |
| 89 | loff_t pos, |
| 90 | size_t write_bytes) |
| 91 | { |
| 92 | int i; |
| 93 | int offset; |
| 94 | int err = 0; |
| 95 | int ret; |
| 96 | int this_write; |
| 97 | struct inode *inode = file->f_path.dentry->d_inode; |
| 98 | struct buffer_head *bh; |
| 99 | struct btrfs_file_extent_item *ei; |
| 100 | |
| 101 | for (i = 0; i < num_pages; i++) { |
| 102 | offset = pos & (PAGE_CACHE_SIZE -1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame] | 103 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 104 | /* FIXME, one block at a time */ |
| 105 | |
| 106 | mutex_lock(&root->fs_info->fs_mutex); |
| 107 | trans = btrfs_start_transaction(root, 1); |
| 108 | btrfs_set_trans_block_group(trans, inode); |
| 109 | |
| 110 | bh = page_buffers(pages[i]); |
| 111 | |
| 112 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { |
| 113 | struct btrfs_key key; |
| 114 | struct btrfs_path *path; |
Chris Mason | 3408878 | 2007-06-12 11:36:58 -0400 | [diff] [blame^] | 115 | char *ptr, *kaddr; |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 116 | u32 datasize; |
| 117 | |
| 118 | /* create an inline extent, and copy the data in */ |
| 119 | path = btrfs_alloc_path(); |
| 120 | BUG_ON(!path); |
| 121 | key.objectid = inode->i_ino; |
| 122 | key.offset = pages[i]->index << PAGE_CACHE_SHIFT; |
| 123 | key.flags = 0; |
| 124 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
| 125 | BUG_ON(write_bytes >= PAGE_CACHE_SIZE); |
| 126 | datasize = offset + |
| 127 | btrfs_file_extent_calc_inline_size(write_bytes); |
| 128 | |
| 129 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
| 130 | datasize); |
| 131 | BUG_ON(ret); |
| 132 | ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), |
| 133 | path->slots[0], struct btrfs_file_extent_item); |
| 134 | btrfs_set_file_extent_generation(ei, trans->transid); |
| 135 | btrfs_set_file_extent_type(ei, |
| 136 | BTRFS_FILE_EXTENT_INLINE); |
| 137 | ptr = btrfs_file_extent_inline_start(ei); |
Chris Mason | 3408878 | 2007-06-12 11:36:58 -0400 | [diff] [blame^] | 138 | kaddr = kmap_atomic(bh->b_page, KM_USER0); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 139 | btrfs_memcpy(root, path->nodes[0]->b_data, |
Chris Mason | 3408878 | 2007-06-12 11:36:58 -0400 | [diff] [blame^] | 140 | ptr, kaddr + bh_offset(bh), |
| 141 | offset + write_bytes); |
| 142 | kunmap_atomic(kaddr, KM_USER0); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 143 | mark_buffer_dirty(path->nodes[0]); |
| 144 | btrfs_free_path(path); |
| 145 | } else if (buffer_mapped(bh)) { |
| 146 | /* csum the file data */ |
| 147 | btrfs_csum_file_block(trans, root, inode->i_ino, |
| 148 | pages[i]->index << PAGE_CACHE_SHIFT, |
| 149 | kmap(pages[i]), PAGE_CACHE_SIZE); |
| 150 | kunmap(pages[i]); |
| 151 | } |
| 152 | SetPageChecked(pages[i]); |
| 153 | ret = btrfs_end_transaction(trans, root); |
| 154 | BUG_ON(ret); |
| 155 | mutex_unlock(&root->fs_info->fs_mutex); |
| 156 | |
| 157 | ret = btrfs_commit_write(file, pages[i], offset, |
| 158 | offset + this_write); |
| 159 | pos += this_write; |
| 160 | if (ret) { |
| 161 | err = ret; |
| 162 | goto failed; |
| 163 | } |
| 164 | WARN_ON(this_write > write_bytes); |
| 165 | write_bytes -= this_write; |
| 166 | } |
| 167 | failed: |
| 168 | return err; |
| 169 | } |
| 170 | |
| 171 | /* |
| 172 | * this is very complex, but the basic idea is to drop all extents |
| 173 | * in the range start - end. hint_block is filled in with a block number |
| 174 | * that would be a good hint to the block allocator for this file. |
| 175 | * |
| 176 | * If an extent intersects the range but is not entirely inside the range |
| 177 | * it is either truncated or split. Anything entirely inside the range |
| 178 | * is deleted from the tree. |
| 179 | */ |
| 180 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 181 | struct btrfs_root *root, struct inode *inode, |
| 182 | u64 start, u64 end, u64 *hint_block) |
| 183 | { |
| 184 | int ret; |
| 185 | struct btrfs_key key; |
| 186 | struct btrfs_leaf *leaf; |
| 187 | int slot; |
| 188 | struct btrfs_file_extent_item *extent; |
| 189 | u64 extent_end = 0; |
| 190 | int keep; |
| 191 | struct btrfs_file_extent_item old; |
| 192 | struct btrfs_path *path; |
| 193 | u64 search_start = start; |
| 194 | int bookend; |
| 195 | int found_type; |
| 196 | int found_extent; |
| 197 | int found_inline; |
| 198 | |
| 199 | path = btrfs_alloc_path(); |
| 200 | if (!path) |
| 201 | return -ENOMEM; |
| 202 | while(1) { |
| 203 | btrfs_release_path(root, path); |
| 204 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, |
| 205 | search_start, -1); |
| 206 | if (ret < 0) |
| 207 | goto out; |
| 208 | if (ret > 0) { |
| 209 | if (path->slots[0] == 0) { |
| 210 | ret = 0; |
| 211 | goto out; |
| 212 | } |
| 213 | path->slots[0]--; |
| 214 | } |
| 215 | keep = 0; |
| 216 | bookend = 0; |
| 217 | found_extent = 0; |
| 218 | found_inline = 0; |
| 219 | extent = NULL; |
| 220 | leaf = btrfs_buffer_leaf(path->nodes[0]); |
| 221 | slot = path->slots[0]; |
| 222 | btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); |
| 223 | if (key.offset >= end || key.objectid != inode->i_ino) { |
| 224 | ret = 0; |
| 225 | goto out; |
| 226 | } |
| 227 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { |
| 228 | ret = 0; |
| 229 | goto out; |
| 230 | } |
| 231 | extent = btrfs_item_ptr(leaf, slot, |
| 232 | struct btrfs_file_extent_item); |
| 233 | found_type = btrfs_file_extent_type(extent); |
| 234 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 235 | extent_end = key.offset + |
| 236 | (btrfs_file_extent_num_blocks(extent) << |
| 237 | inode->i_blkbits); |
| 238 | found_extent = 1; |
| 239 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 240 | found_inline = 1; |
| 241 | extent_end = key.offset + |
| 242 | btrfs_file_extent_inline_len(leaf->items + slot); |
| 243 | } |
| 244 | |
| 245 | /* we found nothing we can drop */ |
| 246 | if (!found_extent && !found_inline) { |
| 247 | ret = 0; |
| 248 | goto out; |
| 249 | } |
| 250 | |
| 251 | /* we found nothing inside the range */ |
| 252 | if (search_start >= extent_end) { |
| 253 | ret = 0; |
| 254 | goto out; |
| 255 | } |
| 256 | |
| 257 | /* FIXME, there's only one inline extent allowed right now */ |
| 258 | if (found_inline) { |
| 259 | u64 mask = root->blocksize - 1; |
| 260 | search_start = (extent_end + mask) & ~mask; |
| 261 | } else |
| 262 | search_start = extent_end; |
| 263 | |
| 264 | if (end < extent_end && end >= key.offset) { |
| 265 | if (found_extent) { |
| 266 | u64 disk_blocknr = |
| 267 | btrfs_file_extent_disk_blocknr(extent); |
| 268 | u64 disk_num_blocks = |
| 269 | btrfs_file_extent_disk_num_blocks(extent); |
| 270 | memcpy(&old, extent, sizeof(old)); |
| 271 | if (disk_blocknr != 0) { |
| 272 | ret = btrfs_inc_extent_ref(trans, root, |
| 273 | disk_blocknr, disk_num_blocks); |
| 274 | BUG_ON(ret); |
| 275 | } |
| 276 | } |
| 277 | WARN_ON(found_inline); |
| 278 | bookend = 1; |
| 279 | } |
| 280 | |
| 281 | /* truncate existing extent */ |
| 282 | if (start > key.offset) { |
| 283 | u64 new_num; |
| 284 | u64 old_num; |
| 285 | keep = 1; |
| 286 | WARN_ON(start & (root->blocksize - 1)); |
| 287 | if (found_extent) { |
| 288 | new_num = (start - key.offset) >> |
| 289 | inode->i_blkbits; |
| 290 | old_num = btrfs_file_extent_num_blocks(extent); |
| 291 | *hint_block = |
| 292 | btrfs_file_extent_disk_blocknr(extent); |
| 293 | if (btrfs_file_extent_disk_blocknr(extent)) { |
| 294 | inode->i_blocks -= |
| 295 | (old_num - new_num) << 3; |
| 296 | } |
| 297 | btrfs_set_file_extent_num_blocks(extent, |
| 298 | new_num); |
| 299 | mark_buffer_dirty(path->nodes[0]); |
| 300 | } else { |
| 301 | WARN_ON(1); |
| 302 | } |
| 303 | } |
| 304 | /* delete the entire extent */ |
| 305 | if (!keep) { |
| 306 | u64 disk_blocknr = 0; |
| 307 | u64 disk_num_blocks = 0; |
| 308 | u64 extent_num_blocks = 0; |
| 309 | if (found_extent) { |
| 310 | disk_blocknr = |
| 311 | btrfs_file_extent_disk_blocknr(extent); |
| 312 | disk_num_blocks = |
| 313 | btrfs_file_extent_disk_num_blocks(extent); |
| 314 | extent_num_blocks = |
| 315 | btrfs_file_extent_num_blocks(extent); |
| 316 | *hint_block = |
| 317 | btrfs_file_extent_disk_blocknr(extent); |
| 318 | } |
| 319 | ret = btrfs_del_item(trans, root, path); |
| 320 | BUG_ON(ret); |
| 321 | btrfs_release_path(root, path); |
| 322 | extent = NULL; |
| 323 | if (found_extent && disk_blocknr != 0) { |
| 324 | inode->i_blocks -= extent_num_blocks << 3; |
| 325 | ret = btrfs_free_extent(trans, root, |
| 326 | disk_blocknr, |
| 327 | disk_num_blocks, 0); |
| 328 | } |
| 329 | |
| 330 | BUG_ON(ret); |
| 331 | if (!bookend && search_start >= end) { |
| 332 | ret = 0; |
| 333 | goto out; |
| 334 | } |
| 335 | if (!bookend) |
| 336 | continue; |
| 337 | } |
| 338 | /* create bookend, splitting the extent in two */ |
| 339 | if (bookend && found_extent) { |
| 340 | struct btrfs_key ins; |
| 341 | ins.objectid = inode->i_ino; |
| 342 | ins.offset = end; |
| 343 | ins.flags = 0; |
| 344 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
| 345 | |
| 346 | btrfs_release_path(root, path); |
| 347 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
| 348 | sizeof(*extent)); |
| 349 | BUG_ON(ret); |
| 350 | extent = btrfs_item_ptr( |
| 351 | btrfs_buffer_leaf(path->nodes[0]), |
| 352 | path->slots[0], |
| 353 | struct btrfs_file_extent_item); |
| 354 | btrfs_set_file_extent_disk_blocknr(extent, |
| 355 | btrfs_file_extent_disk_blocknr(&old)); |
| 356 | btrfs_set_file_extent_disk_num_blocks(extent, |
| 357 | btrfs_file_extent_disk_num_blocks(&old)); |
| 358 | |
| 359 | btrfs_set_file_extent_offset(extent, |
| 360 | btrfs_file_extent_offset(&old) + |
| 361 | ((end - key.offset) >> inode->i_blkbits)); |
| 362 | WARN_ON(btrfs_file_extent_num_blocks(&old) < |
| 363 | (extent_end - end) >> inode->i_blkbits); |
| 364 | btrfs_set_file_extent_num_blocks(extent, |
| 365 | (extent_end - end) >> inode->i_blkbits); |
| 366 | |
| 367 | btrfs_set_file_extent_type(extent, |
| 368 | BTRFS_FILE_EXTENT_REG); |
| 369 | btrfs_set_file_extent_generation(extent, |
| 370 | btrfs_file_extent_generation(&old)); |
| 371 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 372 | if (btrfs_file_extent_disk_blocknr(&old) != 0) { |
| 373 | inode->i_blocks += |
| 374 | btrfs_file_extent_num_blocks(extent) << 3; |
| 375 | } |
| 376 | ret = 0; |
| 377 | goto out; |
| 378 | } |
| 379 | } |
| 380 | out: |
| 381 | btrfs_free_path(path); |
| 382 | return ret; |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * this gets pages into the page cache and locks them down |
| 387 | */ |
| 388 | static int prepare_pages(struct btrfs_root *root, |
| 389 | struct file *file, |
| 390 | struct page **pages, |
| 391 | size_t num_pages, |
| 392 | loff_t pos, |
| 393 | unsigned long first_index, |
| 394 | unsigned long last_index, |
| 395 | size_t write_bytes, |
| 396 | u64 alloc_extent_start) |
| 397 | { |
| 398 | int i; |
| 399 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
| 400 | struct inode *inode = file->f_path.dentry->d_inode; |
| 401 | int offset; |
| 402 | int err = 0; |
| 403 | int this_write; |
| 404 | struct buffer_head *bh; |
| 405 | struct buffer_head *head; |
| 406 | loff_t isize = i_size_read(inode); |
| 407 | |
| 408 | memset(pages, 0, num_pages * sizeof(struct page *)); |
| 409 | |
| 410 | for (i = 0; i < num_pages; i++) { |
| 411 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
| 412 | if (!pages[i]) { |
| 413 | err = -ENOMEM; |
| 414 | goto failed_release; |
| 415 | } |
| 416 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); |
| 417 | wait_on_page_writeback(pages[i]); |
| 418 | offset = pos & (PAGE_CACHE_SIZE -1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame] | 419 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 420 | if (!page_has_buffers(pages[i])) { |
| 421 | create_empty_buffers(pages[i], |
| 422 | root->fs_info->sb->s_blocksize, |
| 423 | (1 << BH_Uptodate)); |
| 424 | } |
| 425 | head = page_buffers(pages[i]); |
| 426 | bh = head; |
| 427 | do { |
| 428 | err = btrfs_map_bh_to_logical(root, bh, |
| 429 | alloc_extent_start); |
| 430 | BUG_ON(err); |
| 431 | if (err) |
| 432 | goto failed_truncate; |
| 433 | bh = bh->b_this_page; |
| 434 | if (alloc_extent_start) |
| 435 | alloc_extent_start++; |
| 436 | } while (bh != head); |
| 437 | pos += this_write; |
| 438 | WARN_ON(this_write > write_bytes); |
| 439 | write_bytes -= this_write; |
| 440 | } |
| 441 | return 0; |
| 442 | |
| 443 | failed_release: |
| 444 | btrfs_drop_pages(pages, num_pages); |
| 445 | return err; |
| 446 | |
| 447 | failed_truncate: |
| 448 | btrfs_drop_pages(pages, num_pages); |
| 449 | if (pos > isize) |
| 450 | vmtruncate(inode, isize); |
| 451 | return err; |
| 452 | } |
| 453 | |
| 454 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, |
| 455 | size_t count, loff_t *ppos) |
| 456 | { |
| 457 | loff_t pos; |
| 458 | size_t num_written = 0; |
| 459 | int err = 0; |
| 460 | int ret = 0; |
| 461 | struct inode *inode = file->f_path.dentry->d_inode; |
| 462 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 463 | struct page *pages[8]; |
| 464 | struct page *pinned[2]; |
| 465 | unsigned long first_index; |
| 466 | unsigned long last_index; |
| 467 | u64 start_pos; |
| 468 | u64 num_blocks; |
| 469 | u64 alloc_extent_start; |
| 470 | u64 hint_block; |
| 471 | struct btrfs_trans_handle *trans; |
| 472 | struct btrfs_key ins; |
| 473 | pinned[0] = NULL; |
| 474 | pinned[1] = NULL; |
| 475 | if (file->f_flags & O_DIRECT) |
| 476 | return -EINVAL; |
| 477 | pos = *ppos; |
| 478 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 479 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 480 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 481 | if (err) |
| 482 | goto out; |
| 483 | if (count == 0) |
| 484 | goto out; |
| 485 | err = remove_suid(file->f_path.dentry); |
| 486 | if (err) |
| 487 | goto out; |
| 488 | file_update_time(file); |
| 489 | |
| 490 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); |
| 491 | num_blocks = (count + pos - start_pos + root->blocksize - 1) >> |
| 492 | inode->i_blkbits; |
| 493 | |
| 494 | mutex_lock(&inode->i_mutex); |
| 495 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 496 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
| 497 | |
| 498 | /* |
| 499 | * there are lots of better ways to do this, but this code |
| 500 | * makes sure the first and last page in the file range are |
| 501 | * up to date and ready for cow |
| 502 | */ |
| 503 | if ((pos & (PAGE_CACHE_SIZE - 1))) { |
| 504 | pinned[0] = grab_cache_page(inode->i_mapping, first_index); |
| 505 | if (!PageUptodate(pinned[0])) { |
| 506 | ret = mpage_readpage(pinned[0], btrfs_get_block); |
| 507 | BUG_ON(ret); |
| 508 | wait_on_page_locked(pinned[0]); |
| 509 | } else { |
| 510 | unlock_page(pinned[0]); |
| 511 | } |
| 512 | } |
| 513 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { |
| 514 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
| 515 | if (!PageUptodate(pinned[1])) { |
| 516 | ret = mpage_readpage(pinned[1], btrfs_get_block); |
| 517 | BUG_ON(ret); |
| 518 | wait_on_page_locked(pinned[1]); |
| 519 | } else { |
| 520 | unlock_page(pinned[1]); |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | mutex_lock(&root->fs_info->fs_mutex); |
| 525 | trans = btrfs_start_transaction(root, 1); |
| 526 | if (!trans) { |
| 527 | err = -ENOMEM; |
| 528 | mutex_unlock(&root->fs_info->fs_mutex); |
| 529 | goto out_unlock; |
| 530 | } |
| 531 | btrfs_set_trans_block_group(trans, inode); |
| 532 | /* FIXME blocksize != 4096 */ |
| 533 | inode->i_blocks += num_blocks << 3; |
| 534 | hint_block = 0; |
| 535 | |
| 536 | /* FIXME...EIEIO, ENOSPC and more */ |
| 537 | |
| 538 | /* step one, delete the existing extents in this range */ |
| 539 | if (start_pos < inode->i_size) { |
| 540 | /* FIXME blocksize != pagesize */ |
| 541 | ret = btrfs_drop_extents(trans, root, inode, |
| 542 | start_pos, |
| 543 | (pos + count + root->blocksize -1) & |
| 544 | ~((u64)root->blocksize - 1), |
| 545 | &hint_block); |
| 546 | BUG_ON(ret); |
| 547 | } |
| 548 | |
| 549 | /* insert any holes we need to create */ |
| 550 | if (inode->i_size < start_pos) { |
| 551 | u64 last_pos_in_file; |
| 552 | u64 hole_size; |
| 553 | u64 mask = root->blocksize - 1; |
| 554 | last_pos_in_file = (inode->i_size + mask) & ~mask; |
| 555 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; |
| 556 | hole_size >>= inode->i_blkbits; |
| 557 | if (last_pos_in_file < start_pos) { |
| 558 | ret = btrfs_insert_file_extent(trans, root, |
| 559 | inode->i_ino, |
| 560 | last_pos_in_file, |
| 561 | 0, 0, hole_size); |
| 562 | } |
| 563 | BUG_ON(ret); |
| 564 | } |
| 565 | |
| 566 | /* |
| 567 | * either allocate an extent for the new bytes or setup the key |
| 568 | * to show we are doing inline data in the extent |
| 569 | */ |
| 570 | if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || |
| 571 | pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { |
| 572 | ret = btrfs_alloc_extent(trans, root, inode->i_ino, |
| 573 | num_blocks, hint_block, (u64)-1, |
| 574 | &ins, 1); |
| 575 | BUG_ON(ret); |
| 576 | ret = btrfs_insert_file_extent(trans, root, inode->i_ino, |
| 577 | start_pos, ins.objectid, ins.offset, |
| 578 | ins.offset); |
| 579 | BUG_ON(ret); |
| 580 | } else { |
| 581 | ins.offset = 0; |
| 582 | ins.objectid = 0; |
| 583 | } |
| 584 | BUG_ON(ret); |
| 585 | alloc_extent_start = ins.objectid; |
| 586 | ret = btrfs_end_transaction(trans, root); |
| 587 | mutex_unlock(&root->fs_info->fs_mutex); |
| 588 | |
| 589 | while(count > 0) { |
| 590 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
Chris Mason | 84f54cf | 2007-06-12 07:43:08 -0400 | [diff] [blame] | 591 | size_t write_bytes = min(count, |
| 592 | (size_t)PAGE_CACHE_SIZE - offset); |
Chris Mason | 39279cc | 2007-06-12 06:35:45 -0400 | [diff] [blame] | 593 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
| 594 | PAGE_CACHE_SHIFT; |
| 595 | |
| 596 | memset(pages, 0, sizeof(pages)); |
| 597 | ret = prepare_pages(root, file, pages, num_pages, |
| 598 | pos, first_index, last_index, |
| 599 | write_bytes, alloc_extent_start); |
| 600 | BUG_ON(ret); |
| 601 | |
| 602 | /* FIXME blocks != pagesize */ |
| 603 | if (alloc_extent_start) |
| 604 | alloc_extent_start += num_pages; |
| 605 | ret = btrfs_copy_from_user(pos, num_pages, |
| 606 | write_bytes, pages, buf); |
| 607 | BUG_ON(ret); |
| 608 | |
| 609 | ret = dirty_and_release_pages(NULL, root, file, pages, |
| 610 | num_pages, pos, write_bytes); |
| 611 | BUG_ON(ret); |
| 612 | btrfs_drop_pages(pages, num_pages); |
| 613 | |
| 614 | buf += write_bytes; |
| 615 | count -= write_bytes; |
| 616 | pos += write_bytes; |
| 617 | num_written += write_bytes; |
| 618 | |
| 619 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 620 | btrfs_btree_balance_dirty(root); |
| 621 | cond_resched(); |
| 622 | } |
| 623 | out_unlock: |
| 624 | mutex_unlock(&inode->i_mutex); |
| 625 | out: |
| 626 | if (pinned[0]) |
| 627 | page_cache_release(pinned[0]); |
| 628 | if (pinned[1]) |
| 629 | page_cache_release(pinned[1]); |
| 630 | *ppos = pos; |
| 631 | current->backing_dev_info = NULL; |
| 632 | mark_inode_dirty(inode); |
| 633 | return num_written ? num_written : err; |
| 634 | } |
| 635 | |
| 636 | /* |
| 637 | * FIXME, do this by stuffing the csum we want in the info hanging off |
| 638 | * page->private. For now, verify file csums on read |
| 639 | */ |
| 640 | static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, |
| 641 | unsigned long offset, unsigned long size) |
| 642 | { |
| 643 | char *kaddr; |
| 644 | unsigned long left, count = desc->count; |
| 645 | struct inode *inode = page->mapping->host; |
| 646 | |
| 647 | if (size > count) |
| 648 | size = count; |
| 649 | |
| 650 | if (!PageChecked(page)) { |
| 651 | /* FIXME, do it per block */ |
| 652 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 653 | int ret; |
| 654 | struct buffer_head *bh; |
| 655 | |
| 656 | if (page_has_buffers(page)) { |
| 657 | bh = page_buffers(page); |
| 658 | if (!buffer_mapped(bh)) { |
| 659 | SetPageChecked(page); |
| 660 | goto checked; |
| 661 | } |
| 662 | } |
| 663 | |
| 664 | ret = btrfs_csum_verify_file_block(root, |
| 665 | page->mapping->host->i_ino, |
| 666 | page->index << PAGE_CACHE_SHIFT, |
| 667 | kmap(page), PAGE_CACHE_SIZE); |
| 668 | if (ret) { |
| 669 | if (ret != -ENOENT) { |
| 670 | printk("failed to verify ino %lu page %lu ret %d\n", |
| 671 | page->mapping->host->i_ino, |
| 672 | page->index, ret); |
| 673 | memset(page_address(page), 1, PAGE_CACHE_SIZE); |
| 674 | flush_dcache_page(page); |
| 675 | } |
| 676 | } |
| 677 | SetPageChecked(page); |
| 678 | kunmap(page); |
| 679 | } |
| 680 | checked: |
| 681 | /* |
| 682 | * Faults on the destination of a read are common, so do it before |
| 683 | * taking the kmap. |
| 684 | */ |
| 685 | if (!fault_in_pages_writeable(desc->arg.buf, size)) { |
| 686 | kaddr = kmap_atomic(page, KM_USER0); |
| 687 | left = __copy_to_user_inatomic(desc->arg.buf, |
| 688 | kaddr + offset, size); |
| 689 | kunmap_atomic(kaddr, KM_USER0); |
| 690 | if (left == 0) |
| 691 | goto success; |
| 692 | } |
| 693 | |
| 694 | /* Do it the slow way */ |
| 695 | kaddr = kmap(page); |
| 696 | left = __copy_to_user(desc->arg.buf, kaddr + offset, size); |
| 697 | kunmap(page); |
| 698 | |
| 699 | if (left) { |
| 700 | size -= left; |
| 701 | desc->error = -EFAULT; |
| 702 | } |
| 703 | success: |
| 704 | desc->count = count - size; |
| 705 | desc->written += size; |
| 706 | desc->arg.buf += size; |
| 707 | return size; |
| 708 | } |
| 709 | |
| 710 | /** |
| 711 | * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify |
| 712 | * @iocb: kernel I/O control block |
| 713 | * @iov: io vector request |
| 714 | * @nr_segs: number of segments in the iovec |
| 715 | * @pos: current file position |
| 716 | */ |
| 717 | static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, |
| 718 | unsigned long nr_segs, loff_t pos) |
| 719 | { |
| 720 | struct file *filp = iocb->ki_filp; |
| 721 | ssize_t retval; |
| 722 | unsigned long seg; |
| 723 | size_t count; |
| 724 | loff_t *ppos = &iocb->ki_pos; |
| 725 | |
| 726 | count = 0; |
| 727 | for (seg = 0; seg < nr_segs; seg++) { |
| 728 | const struct iovec *iv = &iov[seg]; |
| 729 | |
| 730 | /* |
| 731 | * If any segment has a negative length, or the cumulative |
| 732 | * length ever wraps negative then return -EINVAL. |
| 733 | */ |
| 734 | count += iv->iov_len; |
| 735 | if (unlikely((ssize_t)(count|iv->iov_len) < 0)) |
| 736 | return -EINVAL; |
| 737 | if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) |
| 738 | continue; |
| 739 | if (seg == 0) |
| 740 | return -EFAULT; |
| 741 | nr_segs = seg; |
| 742 | count -= iv->iov_len; /* This segment is no good */ |
| 743 | break; |
| 744 | } |
| 745 | retval = 0; |
| 746 | if (count) { |
| 747 | for (seg = 0; seg < nr_segs; seg++) { |
| 748 | read_descriptor_t desc; |
| 749 | |
| 750 | desc.written = 0; |
| 751 | desc.arg.buf = iov[seg].iov_base; |
| 752 | desc.count = iov[seg].iov_len; |
| 753 | if (desc.count == 0) |
| 754 | continue; |
| 755 | desc.error = 0; |
| 756 | do_generic_file_read(filp, ppos, &desc, |
| 757 | btrfs_read_actor); |
| 758 | retval += desc.written; |
| 759 | if (desc.error) { |
| 760 | retval = retval ?: desc.error; |
| 761 | break; |
| 762 | } |
| 763 | } |
| 764 | } |
| 765 | return retval; |
| 766 | } |
| 767 | |
| 768 | static int btrfs_sync_file(struct file *file, |
| 769 | struct dentry *dentry, int datasync) |
| 770 | { |
| 771 | struct inode *inode = dentry->d_inode; |
| 772 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 773 | int ret; |
| 774 | struct btrfs_trans_handle *trans; |
| 775 | |
| 776 | /* |
| 777 | * FIXME, use inode generation number to check if we can skip the |
| 778 | * commit |
| 779 | */ |
| 780 | mutex_lock(&root->fs_info->fs_mutex); |
| 781 | trans = btrfs_start_transaction(root, 1); |
| 782 | if (!trans) { |
| 783 | ret = -ENOMEM; |
| 784 | goto out; |
| 785 | } |
| 786 | ret = btrfs_commit_transaction(trans, root); |
| 787 | mutex_unlock(&root->fs_info->fs_mutex); |
| 788 | out: |
| 789 | return ret > 0 ? EIO : ret; |
| 790 | } |
| 791 | |
| 792 | struct file_operations btrfs_file_operations = { |
| 793 | .llseek = generic_file_llseek, |
| 794 | .read = do_sync_read, |
| 795 | .aio_read = btrfs_file_aio_read, |
| 796 | .write = btrfs_file_write, |
| 797 | .mmap = generic_file_mmap, |
| 798 | .open = generic_file_open, |
| 799 | .ioctl = btrfs_ioctl, |
| 800 | .fsync = btrfs_sync_file, |
| 801 | #ifdef CONFIG_COMPAT |
| 802 | .compat_ioctl = btrfs_compat_ioctl, |
| 803 | #endif |
| 804 | }; |
| 805 | |