blob: 39edb551dca6d2c9cf4a91d5fab55e35e892feaf [file] [log] [blame]
Chris Masond1310b22008-01-24 16:13:08 -05001#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/version.h>
13#include <linux/writeback.h>
14#include <linux/pagevec.h>
15#include "extent_io.h"
16#include "extent_map.h"
David Woodhouse2db04962008-08-07 11:19:43 -040017#include "compat.h"
David Woodhouse902b22f2008-08-20 08:51:49 -040018#include "ctree.h"
19#include "btrfs_inode.h"
Chris Masond1310b22008-01-24 16:13:08 -050020
21/* temporary define until extent_map moves out of btrfs */
22struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
23 unsigned long extra_flags,
24 void (*ctor)(void *, struct kmem_cache *,
25 unsigned long));
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29
30static LIST_HEAD(buffers);
31static LIST_HEAD(states);
Chris Mason4bef0842008-09-08 11:18:08 -040032
Chris Masonb47eda82008-11-10 12:34:40 -050033#define LEAK_DEBUG 0
Chris Mason4bef0842008-09-08 11:18:08 -040034#ifdef LEAK_DEBUG
Chris Masond3977122009-01-05 21:25:51 -050035static DEFINE_SPINLOCK(leak_lock);
Chris Mason4bef0842008-09-08 11:18:08 -040036#endif
Chris Masond1310b22008-01-24 16:13:08 -050037
Chris Masond1310b22008-01-24 16:13:08 -050038#define BUFFER_LRU_MAX 64
39
40struct tree_entry {
41 u64 start;
42 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -050043 struct rb_node rb_node;
44};
45
46struct extent_page_data {
47 struct bio *bio;
48 struct extent_io_tree *tree;
49 get_extent_t *get_extent;
Chris Mason771ed682008-11-06 22:02:51 -050050
51 /* tells writepage not to lock the state bits for this range
52 * it still does the unlocking
53 */
54 int extent_locked;
Chris Masond1310b22008-01-24 16:13:08 -050055};
56
57int __init extent_io_init(void)
58{
59 extent_state_cache = btrfs_cache_create("extent_state",
60 sizeof(struct extent_state), 0,
61 NULL);
62 if (!extent_state_cache)
63 return -ENOMEM;
64
65 extent_buffer_cache = btrfs_cache_create("extent_buffers",
66 sizeof(struct extent_buffer), 0,
67 NULL);
68 if (!extent_buffer_cache)
69 goto free_state_cache;
70 return 0;
71
72free_state_cache:
73 kmem_cache_destroy(extent_state_cache);
74 return -ENOMEM;
75}
76
77void extent_io_exit(void)
78{
79 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -040080 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -050081
82 while (!list_empty(&states)) {
Chris Mason2d2ae542008-03-26 16:24:23 -040083 state = list_entry(states.next, struct extent_state, leak_list);
Chris Masond3977122009-01-05 21:25:51 -050084 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
85 "state %lu in tree %p refs %d\n",
86 (unsigned long long)state->start,
87 (unsigned long long)state->end,
88 state->state, state->tree, atomic_read(&state->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040089 list_del(&state->leak_list);
Chris Masond1310b22008-01-24 16:13:08 -050090 kmem_cache_free(extent_state_cache, state);
91
92 }
93
Chris Mason2d2ae542008-03-26 16:24:23 -040094 while (!list_empty(&buffers)) {
95 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
Chris Masond3977122009-01-05 21:25:51 -050096 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
97 "refs %d\n", (unsigned long long)eb->start,
98 eb->len, atomic_read(&eb->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040099 list_del(&eb->leak_list);
100 kmem_cache_free(extent_buffer_cache, eb);
101 }
Chris Masond1310b22008-01-24 16:13:08 -0500102 if (extent_state_cache)
103 kmem_cache_destroy(extent_state_cache);
104 if (extent_buffer_cache)
105 kmem_cache_destroy(extent_buffer_cache);
106}
107
108void extent_io_tree_init(struct extent_io_tree *tree,
109 struct address_space *mapping, gfp_t mask)
110{
111 tree->state.rb_node = NULL;
Chris Mason6af118ce2008-07-22 11:18:07 -0400112 tree->buffer.rb_node = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500113 tree->ops = NULL;
114 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500115 spin_lock_init(&tree->lock);
Chris Mason6af118ce2008-07-22 11:18:07 -0400116 spin_lock_init(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -0500117 tree->mapping = mapping;
Chris Masond1310b22008-01-24 16:13:08 -0500118}
119EXPORT_SYMBOL(extent_io_tree_init);
120
Christoph Hellwigb2950862008-12-02 09:54:17 -0500121static struct extent_state *alloc_extent_state(gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500122{
123 struct extent_state *state;
Chris Mason4bef0842008-09-08 11:18:08 -0400124#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400125 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400126#endif
Chris Masond1310b22008-01-24 16:13:08 -0500127
128 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400129 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500130 return state;
131 state->state = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500132 state->private = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500133 state->tree = NULL;
Chris Mason4bef0842008-09-08 11:18:08 -0400134#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400135 spin_lock_irqsave(&leak_lock, flags);
136 list_add(&state->leak_list, &states);
137 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400138#endif
Chris Masond1310b22008-01-24 16:13:08 -0500139 atomic_set(&state->refs, 1);
140 init_waitqueue_head(&state->wq);
141 return state;
142}
143EXPORT_SYMBOL(alloc_extent_state);
144
Christoph Hellwigb2950862008-12-02 09:54:17 -0500145static void free_extent_state(struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500146{
Chris Masond1310b22008-01-24 16:13:08 -0500147 if (!state)
148 return;
149 if (atomic_dec_and_test(&state->refs)) {
Chris Mason4bef0842008-09-08 11:18:08 -0400150#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400151 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400152#endif
Chris Mason70dec802008-01-29 09:59:12 -0500153 WARN_ON(state->tree);
Chris Mason4bef0842008-09-08 11:18:08 -0400154#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400155 spin_lock_irqsave(&leak_lock, flags);
156 list_del(&state->leak_list);
157 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400158#endif
Chris Masond1310b22008-01-24 16:13:08 -0500159 kmem_cache_free(extent_state_cache, state);
160 }
161}
162EXPORT_SYMBOL(free_extent_state);
163
164static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
165 struct rb_node *node)
166{
Chris Masond3977122009-01-05 21:25:51 -0500167 struct rb_node **p = &root->rb_node;
168 struct rb_node *parent = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500169 struct tree_entry *entry;
170
Chris Masond3977122009-01-05 21:25:51 -0500171 while (*p) {
Chris Masond1310b22008-01-24 16:13:08 -0500172 parent = *p;
173 entry = rb_entry(parent, struct tree_entry, rb_node);
174
175 if (offset < entry->start)
176 p = &(*p)->rb_left;
177 else if (offset > entry->end)
178 p = &(*p)->rb_right;
179 else
180 return parent;
181 }
182
183 entry = rb_entry(node, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500184 rb_link_node(node, parent, p);
185 rb_insert_color(node, root);
186 return NULL;
187}
188
Chris Mason80ea96b2008-02-01 14:51:59 -0500189static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Chris Masond1310b22008-01-24 16:13:08 -0500190 struct rb_node **prev_ret,
191 struct rb_node **next_ret)
192{
Chris Mason80ea96b2008-02-01 14:51:59 -0500193 struct rb_root *root = &tree->state;
Chris Masond3977122009-01-05 21:25:51 -0500194 struct rb_node *n = root->rb_node;
Chris Masond1310b22008-01-24 16:13:08 -0500195 struct rb_node *prev = NULL;
196 struct rb_node *orig_prev = NULL;
197 struct tree_entry *entry;
198 struct tree_entry *prev_entry = NULL;
199
Chris Masond3977122009-01-05 21:25:51 -0500200 while (n) {
Chris Masond1310b22008-01-24 16:13:08 -0500201 entry = rb_entry(n, struct tree_entry, rb_node);
202 prev = n;
203 prev_entry = entry;
204
205 if (offset < entry->start)
206 n = n->rb_left;
207 else if (offset > entry->end)
208 n = n->rb_right;
Chris Masond3977122009-01-05 21:25:51 -0500209 else
Chris Masond1310b22008-01-24 16:13:08 -0500210 return n;
211 }
212
213 if (prev_ret) {
214 orig_prev = prev;
Chris Masond3977122009-01-05 21:25:51 -0500215 while (prev && offset > prev_entry->end) {
Chris Masond1310b22008-01-24 16:13:08 -0500216 prev = rb_next(prev);
217 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
218 }
219 *prev_ret = prev;
220 prev = orig_prev;
221 }
222
223 if (next_ret) {
224 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500225 while (prev && offset < prev_entry->start) {
Chris Masond1310b22008-01-24 16:13:08 -0500226 prev = rb_prev(prev);
227 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
228 }
229 *next_ret = prev;
230 }
231 return NULL;
232}
233
Chris Mason80ea96b2008-02-01 14:51:59 -0500234static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 u64 offset)
Chris Masond1310b22008-01-24 16:13:08 -0500236{
Chris Mason70dec802008-01-29 09:59:12 -0500237 struct rb_node *prev = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500238 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500239
Chris Mason80ea96b2008-02-01 14:51:59 -0500240 ret = __etree_search(tree, offset, &prev, NULL);
Chris Masond3977122009-01-05 21:25:51 -0500241 if (!ret)
Chris Masond1310b22008-01-24 16:13:08 -0500242 return prev;
243 return ret;
244}
245
Chris Mason6af118ce2008-07-22 11:18:07 -0400246static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
247 u64 offset, struct rb_node *node)
248{
249 struct rb_root *root = &tree->buffer;
Chris Masond3977122009-01-05 21:25:51 -0500250 struct rb_node **p = &root->rb_node;
251 struct rb_node *parent = NULL;
Chris Mason6af118ce2008-07-22 11:18:07 -0400252 struct extent_buffer *eb;
253
Chris Masond3977122009-01-05 21:25:51 -0500254 while (*p) {
Chris Mason6af118ce2008-07-22 11:18:07 -0400255 parent = *p;
256 eb = rb_entry(parent, struct extent_buffer, rb_node);
257
258 if (offset < eb->start)
259 p = &(*p)->rb_left;
260 else if (offset > eb->start)
261 p = &(*p)->rb_right;
262 else
263 return eb;
264 }
265
266 rb_link_node(node, parent, p);
267 rb_insert_color(node, root);
268 return NULL;
269}
270
271static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
272 u64 offset)
273{
274 struct rb_root *root = &tree->buffer;
Chris Masond3977122009-01-05 21:25:51 -0500275 struct rb_node *n = root->rb_node;
Chris Mason6af118ce2008-07-22 11:18:07 -0400276 struct extent_buffer *eb;
277
Chris Masond3977122009-01-05 21:25:51 -0500278 while (n) {
Chris Mason6af118ce2008-07-22 11:18:07 -0400279 eb = rb_entry(n, struct extent_buffer, rb_node);
280 if (offset < eb->start)
281 n = n->rb_left;
282 else if (offset > eb->start)
283 n = n->rb_right;
284 else
285 return eb;
286 }
287 return NULL;
288}
289
Chris Masond1310b22008-01-24 16:13:08 -0500290/*
291 * utility function to look for merge candidates inside a given range.
292 * Any extents with matching state are merged together into a single
293 * extent in the tree. Extents with EXTENT_IO in their state field
294 * are not merged because the end_io handlers need to be able to do
295 * operations on them without sleeping (or doing allocations/splits).
296 *
297 * This should be called with the tree lock held.
298 */
299static int merge_state(struct extent_io_tree *tree,
300 struct extent_state *state)
301{
302 struct extent_state *other;
303 struct rb_node *other_node;
304
Zheng Yan5b21f2e2008-09-26 10:05:38 -0400305 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
Chris Masond1310b22008-01-24 16:13:08 -0500306 return 0;
307
308 other_node = rb_prev(&state->rb_node);
309 if (other_node) {
310 other = rb_entry(other_node, struct extent_state, rb_node);
311 if (other->end == state->start - 1 &&
312 other->state == state->state) {
313 state->start = other->start;
Chris Mason70dec802008-01-29 09:59:12 -0500314 other->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500315 rb_erase(&other->rb_node, &tree->state);
316 free_extent_state(other);
317 }
318 }
319 other_node = rb_next(&state->rb_node);
320 if (other_node) {
321 other = rb_entry(other_node, struct extent_state, rb_node);
322 if (other->start == state->end + 1 &&
323 other->state == state->state) {
324 other->start = state->start;
Chris Mason70dec802008-01-29 09:59:12 -0500325 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500326 rb_erase(&state->rb_node, &tree->state);
327 free_extent_state(state);
328 }
329 }
330 return 0;
331}
332
Chris Mason291d6732008-01-29 15:55:23 -0500333static void set_state_cb(struct extent_io_tree *tree,
334 struct extent_state *state,
335 unsigned long bits)
336{
337 if (tree->ops && tree->ops->set_bit_hook) {
338 tree->ops->set_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500339 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500340 }
341}
342
343static void clear_state_cb(struct extent_io_tree *tree,
344 struct extent_state *state,
345 unsigned long bits)
346{
Liu Huic5844822009-01-05 15:49:55 -0500347 if (tree->ops && tree->ops->clear_bit_hook) {
Chris Mason291d6732008-01-29 15:55:23 -0500348 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500349 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500350 }
351}
352
Chris Masond1310b22008-01-24 16:13:08 -0500353/*
354 * insert an extent_state struct into the tree. 'bits' are set on the
355 * struct before it is inserted.
356 *
357 * This may return -EEXIST if the extent is already there, in which case the
358 * state struct is freed.
359 *
360 * The tree lock is not taken internally. This is a utility function and
361 * probably isn't what you want to call (see set/clear_extent_bit).
362 */
363static int insert_state(struct extent_io_tree *tree,
364 struct extent_state *state, u64 start, u64 end,
365 int bits)
366{
367 struct rb_node *node;
368
369 if (end < start) {
Chris Masond3977122009-01-05 21:25:51 -0500370 printk(KERN_ERR "btrfs end < start %llu %llu\n",
371 (unsigned long long)end,
372 (unsigned long long)start);
Chris Masond1310b22008-01-24 16:13:08 -0500373 WARN_ON(1);
374 }
375 if (bits & EXTENT_DIRTY)
376 tree->dirty_bytes += end - start + 1;
Chris Masonb0c68f82008-01-31 11:05:37 -0500377 set_state_cb(tree, state, bits);
Chris Masond1310b22008-01-24 16:13:08 -0500378 state->state |= bits;
379 state->start = start;
380 state->end = end;
381 node = tree_insert(&tree->state, end, &state->rb_node);
382 if (node) {
383 struct extent_state *found;
384 found = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500385 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
386 "%llu %llu\n", (unsigned long long)found->start,
387 (unsigned long long)found->end,
388 (unsigned long long)start, (unsigned long long)end);
Chris Masond1310b22008-01-24 16:13:08 -0500389 free_extent_state(state);
390 return -EEXIST;
391 }
Chris Mason70dec802008-01-29 09:59:12 -0500392 state->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500393 merge_state(tree, state);
394 return 0;
395}
396
397/*
398 * split a given extent state struct in two, inserting the preallocated
399 * struct 'prealloc' as the newly created second half. 'split' indicates an
400 * offset inside 'orig' where it should be split.
401 *
402 * Before calling,
403 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
404 * are two extent state structs in the tree:
405 * prealloc: [orig->start, split - 1]
406 * orig: [ split, orig->end ]
407 *
408 * The tree locks are not taken by this function. They need to be held
409 * by the caller.
410 */
411static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
412 struct extent_state *prealloc, u64 split)
413{
414 struct rb_node *node;
415 prealloc->start = orig->start;
416 prealloc->end = split - 1;
417 prealloc->state = orig->state;
418 orig->start = split;
419
420 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
421 if (node) {
422 struct extent_state *found;
423 found = rb_entry(node, struct extent_state, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500424 free_extent_state(prealloc);
425 return -EEXIST;
426 }
Chris Mason70dec802008-01-29 09:59:12 -0500427 prealloc->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500428 return 0;
429}
430
431/*
432 * utility function to clear some bits in an extent state struct.
433 * it will optionally wake up any one waiting on this state (wake == 1), or
434 * forcibly remove the state from the tree (delete == 1).
435 *
436 * If no bits are set on the state struct after clearing things, the
437 * struct is freed and removed from the tree
438 */
439static int clear_state_bit(struct extent_io_tree *tree,
440 struct extent_state *state, int bits, int wake,
441 int delete)
442{
443 int ret = state->state & bits;
444
445 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
446 u64 range = state->end - state->start + 1;
447 WARN_ON(range > tree->dirty_bytes);
448 tree->dirty_bytes -= range;
449 }
Chris Mason291d6732008-01-29 15:55:23 -0500450 clear_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500451 state->state &= ~bits;
Chris Masond1310b22008-01-24 16:13:08 -0500452 if (wake)
453 wake_up(&state->wq);
454 if (delete || state->state == 0) {
Chris Mason70dec802008-01-29 09:59:12 -0500455 if (state->tree) {
Chris Masonae9d1282008-02-01 15:42:15 -0500456 clear_state_cb(tree, state, state->state);
Chris Masond1310b22008-01-24 16:13:08 -0500457 rb_erase(&state->rb_node, &tree->state);
Chris Mason70dec802008-01-29 09:59:12 -0500458 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500459 free_extent_state(state);
460 } else {
461 WARN_ON(1);
462 }
463 } else {
464 merge_state(tree, state);
465 }
466 return ret;
467}
468
469/*
470 * clear some bits on a range in the tree. This may require splitting
471 * or inserting elements in the tree, so the gfp mask is used to
472 * indicate which allocations or sleeping are allowed.
473 *
474 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
475 * the given range from the tree regardless of state (ie for truncate).
476 *
477 * the range [start, end] is inclusive.
478 *
479 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
480 * bits were already set, or zero if none of the bits were already set.
481 */
482int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
483 int bits, int wake, int delete, gfp_t mask)
484{
485 struct extent_state *state;
486 struct extent_state *prealloc = NULL;
487 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -0500488 int err;
489 int set = 0;
490
491again:
492 if (!prealloc && (mask & __GFP_WAIT)) {
493 prealloc = alloc_extent_state(mask);
494 if (!prealloc)
495 return -ENOMEM;
496 }
497
Chris Masoncad321a2008-12-17 14:51:42 -0500498 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500499 /*
500 * this search will find the extents that end after
501 * our range starts
502 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500503 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500504 if (!node)
505 goto out;
506 state = rb_entry(node, struct extent_state, rb_node);
507 if (state->start > end)
508 goto out;
509 WARN_ON(state->end < start);
510
511 /*
512 * | ---- desired range ---- |
513 * | state | or
514 * | ------------- state -------------- |
515 *
516 * We need to split the extent we found, and may flip
517 * bits on second half.
518 *
519 * If the extent we found extends past our range, we
520 * just split and search again. It'll get split again
521 * the next time though.
522 *
523 * If the extent we found is inside our range, we clear
524 * the desired bit on it.
525 */
526
527 if (state->start < start) {
Chris Mason70dec802008-01-29 09:59:12 -0500528 if (!prealloc)
529 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500530 err = split_state(tree, state, prealloc, start);
531 BUG_ON(err == -EEXIST);
532 prealloc = NULL;
533 if (err)
534 goto out;
535 if (state->end <= end) {
536 start = state->end + 1;
537 set |= clear_state_bit(tree, state, bits,
538 wake, delete);
539 } else {
540 start = state->start;
541 }
542 goto search_again;
543 }
544 /*
545 * | ---- desired range ---- |
546 * | state |
547 * We need to split the extent, and clear the bit
548 * on the first half
549 */
550 if (state->start <= end && state->end > end) {
Chris Mason70dec802008-01-29 09:59:12 -0500551 if (!prealloc)
552 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500553 err = split_state(tree, state, prealloc, end + 1);
554 BUG_ON(err == -EEXIST);
555
556 if (wake)
557 wake_up(&state->wq);
558 set |= clear_state_bit(tree, prealloc, bits,
559 wake, delete);
560 prealloc = NULL;
561 goto out;
562 }
563
564 start = state->end + 1;
565 set |= clear_state_bit(tree, state, bits, wake, delete);
566 goto search_again;
567
568out:
Chris Masoncad321a2008-12-17 14:51:42 -0500569 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500570 if (prealloc)
571 free_extent_state(prealloc);
572
573 return set;
574
575search_again:
576 if (start > end)
577 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500578 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500579 if (mask & __GFP_WAIT)
580 cond_resched();
581 goto again;
582}
583EXPORT_SYMBOL(clear_extent_bit);
584
585static int wait_on_state(struct extent_io_tree *tree,
586 struct extent_state *state)
Christoph Hellwig641f5212008-12-02 06:36:10 -0500587 __releases(tree->lock)
588 __acquires(tree->lock)
Chris Masond1310b22008-01-24 16:13:08 -0500589{
590 DEFINE_WAIT(wait);
591 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Masoncad321a2008-12-17 14:51:42 -0500592 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500593 schedule();
Chris Masoncad321a2008-12-17 14:51:42 -0500594 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500595 finish_wait(&state->wq, &wait);
596 return 0;
597}
598
599/*
600 * waits for one or more bits to clear on a range in the state tree.
601 * The range [start, end] is inclusive.
602 * The tree lock is taken by this function
603 */
604int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
605{
606 struct extent_state *state;
607 struct rb_node *node;
608
Chris Masoncad321a2008-12-17 14:51:42 -0500609 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500610again:
611 while (1) {
612 /*
613 * this search will find all the extents that end after
614 * our range starts
615 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500616 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500617 if (!node)
618 break;
619
620 state = rb_entry(node, struct extent_state, rb_node);
621
622 if (state->start > end)
623 goto out;
624
625 if (state->state & bits) {
626 start = state->start;
627 atomic_inc(&state->refs);
628 wait_on_state(tree, state);
629 free_extent_state(state);
630 goto again;
631 }
632 start = state->end + 1;
633
634 if (start > end)
635 break;
636
637 if (need_resched()) {
Chris Masoncad321a2008-12-17 14:51:42 -0500638 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500639 cond_resched();
Chris Masoncad321a2008-12-17 14:51:42 -0500640 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500641 }
642 }
643out:
Chris Masoncad321a2008-12-17 14:51:42 -0500644 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500645 return 0;
646}
647EXPORT_SYMBOL(wait_extent_bit);
648
649static void set_state_bits(struct extent_io_tree *tree,
650 struct extent_state *state,
651 int bits)
652{
653 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
654 u64 range = state->end - state->start + 1;
655 tree->dirty_bytes += range;
656 }
Chris Mason291d6732008-01-29 15:55:23 -0500657 set_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500658 state->state |= bits;
Chris Masond1310b22008-01-24 16:13:08 -0500659}
660
661/*
662 * set some bits on a range in the tree. This may require allocations
663 * or sleeping, so the gfp mask is used to indicate what is allowed.
664 *
665 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
666 * range already has the desired bits set. The start of the existing
667 * range is returned in failed_start in this case.
668 *
669 * [start, end] is inclusive
670 * This takes the tree lock.
671 */
Chris Masond3977122009-01-05 21:25:51 -0500672static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
673 int bits, int exclusive, u64 *failed_start,
674 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500675{
676 struct extent_state *state;
677 struct extent_state *prealloc = NULL;
678 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -0500679 int err = 0;
680 int set;
681 u64 last_start;
682 u64 last_end;
683again:
684 if (!prealloc && (mask & __GFP_WAIT)) {
685 prealloc = alloc_extent_state(mask);
686 if (!prealloc)
687 return -ENOMEM;
688 }
689
Chris Masoncad321a2008-12-17 14:51:42 -0500690 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500691 /*
692 * this search will find all the extents that end after
693 * our range starts.
694 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500695 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500696 if (!node) {
697 err = insert_state(tree, prealloc, start, end, bits);
698 prealloc = NULL;
699 BUG_ON(err == -EEXIST);
700 goto out;
701 }
702
703 state = rb_entry(node, struct extent_state, rb_node);
704 last_start = state->start;
705 last_end = state->end;
706
707 /*
708 * | ---- desired range ---- |
709 * | state |
710 *
711 * Just lock what we found and keep going
712 */
713 if (state->start == start && state->end <= end) {
714 set = state->state & bits;
715 if (set && exclusive) {
716 *failed_start = state->start;
717 err = -EEXIST;
718 goto out;
719 }
720 set_state_bits(tree, state, bits);
721 start = state->end + 1;
722 merge_state(tree, state);
723 goto search_again;
724 }
725
726 /*
727 * | ---- desired range ---- |
728 * | state |
729 * or
730 * | ------------- state -------------- |
731 *
732 * We need to split the extent we found, and may flip bits on
733 * second half.
734 *
735 * If the extent we found extends past our
736 * range, we just split and search again. It'll get split
737 * again the next time though.
738 *
739 * If the extent we found is inside our range, we set the
740 * desired bit on it.
741 */
742 if (state->start < start) {
743 set = state->state & bits;
744 if (exclusive && set) {
745 *failed_start = start;
746 err = -EEXIST;
747 goto out;
748 }
749 err = split_state(tree, state, prealloc, start);
750 BUG_ON(err == -EEXIST);
751 prealloc = NULL;
752 if (err)
753 goto out;
754 if (state->end <= end) {
755 set_state_bits(tree, state, bits);
756 start = state->end + 1;
757 merge_state(tree, state);
758 } else {
759 start = state->start;
760 }
761 goto search_again;
762 }
763 /*
764 * | ---- desired range ---- |
765 * | state | or | state |
766 *
767 * There's a hole, we need to insert something in it and
768 * ignore the extent we found.
769 */
770 if (state->start > start) {
771 u64 this_end;
772 if (end < last_start)
773 this_end = end;
774 else
Chris Masond3977122009-01-05 21:25:51 -0500775 this_end = last_start - 1;
Chris Masond1310b22008-01-24 16:13:08 -0500776 err = insert_state(tree, prealloc, start, this_end,
777 bits);
778 prealloc = NULL;
779 BUG_ON(err == -EEXIST);
780 if (err)
781 goto out;
782 start = this_end + 1;
783 goto search_again;
784 }
785 /*
786 * | ---- desired range ---- |
787 * | state |
788 * We need to split the extent, and set the bit
789 * on the first half
790 */
791 if (state->start <= end && state->end > end) {
792 set = state->state & bits;
793 if (exclusive && set) {
794 *failed_start = start;
795 err = -EEXIST;
796 goto out;
797 }
798 err = split_state(tree, state, prealloc, end + 1);
799 BUG_ON(err == -EEXIST);
800
801 set_state_bits(tree, prealloc, bits);
802 merge_state(tree, prealloc);
803 prealloc = NULL;
804 goto out;
805 }
806
807 goto search_again;
808
809out:
Chris Masoncad321a2008-12-17 14:51:42 -0500810 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500811 if (prealloc)
812 free_extent_state(prealloc);
813
814 return err;
815
816search_again:
817 if (start > end)
818 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500819 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500820 if (mask & __GFP_WAIT)
821 cond_resched();
822 goto again;
823}
824EXPORT_SYMBOL(set_extent_bit);
825
826/* wrappers around set/clear extent bit */
827int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
828 gfp_t mask)
829{
830 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
831 mask);
832}
833EXPORT_SYMBOL(set_extent_dirty);
834
Chris Masone6dcd2d2008-07-17 12:53:50 -0400835int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
839}
840EXPORT_SYMBOL(set_extent_ordered);
841
Chris Masond1310b22008-01-24 16:13:08 -0500842int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
843 int bits, gfp_t mask)
844{
845 return set_extent_bit(tree, start, end, bits, 0, NULL,
846 mask);
847}
848EXPORT_SYMBOL(set_extent_bits);
849
850int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
851 int bits, gfp_t mask)
852{
853 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
854}
855EXPORT_SYMBOL(clear_extent_bits);
856
857int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
858 gfp_t mask)
859{
860 return set_extent_bit(tree, start, end,
Chris Masone6dcd2d2008-07-17 12:53:50 -0400861 EXTENT_DELALLOC | EXTENT_DIRTY,
862 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500863}
864EXPORT_SYMBOL(set_extent_delalloc);
865
866int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end,
870 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
871}
872EXPORT_SYMBOL(clear_extent_dirty);
873
Chris Masone6dcd2d2008-07-17 12:53:50 -0400874int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
875 gfp_t mask)
876{
877 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
878}
879EXPORT_SYMBOL(clear_extent_ordered);
880
Chris Masond1310b22008-01-24 16:13:08 -0500881int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
882 gfp_t mask)
883{
884 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
885 mask);
886}
887EXPORT_SYMBOL(set_extent_new);
888
Christoph Hellwigb2950862008-12-02 09:54:17 -0500889static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
Chris Masond1310b22008-01-24 16:13:08 -0500890 gfp_t mask)
891{
892 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
893}
Chris Masond1310b22008-01-24 16:13:08 -0500894
895int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
896 gfp_t mask)
897{
898 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
899 mask);
900}
901EXPORT_SYMBOL(set_extent_uptodate);
902
Chris Masond3977122009-01-05 21:25:51 -0500903static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
904 u64 end, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500905{
906 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
907}
Chris Masond1310b22008-01-24 16:13:08 -0500908
Christoph Hellwigb2950862008-12-02 09:54:17 -0500909static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
Chris Masond1310b22008-01-24 16:13:08 -0500910 gfp_t mask)
911{
912 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
913 0, NULL, mask);
914}
Chris Masond1310b22008-01-24 16:13:08 -0500915
Chris Masond3977122009-01-05 21:25:51 -0500916static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
917 u64 end, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500918{
919 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
920}
Chris Masond1310b22008-01-24 16:13:08 -0500921
922int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
923{
924 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
925}
926EXPORT_SYMBOL(wait_on_extent_writeback);
927
Chris Masond352ac62008-09-29 15:18:18 -0400928/*
929 * either insert or lock state struct between start and end use mask to tell
930 * us if waiting is desired.
931 */
Chris Masond1310b22008-01-24 16:13:08 -0500932int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
933{
934 int err;
935 u64 failed_start;
936 while (1) {
937 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
938 &failed_start, mask);
939 if (err == -EEXIST && (mask & __GFP_WAIT)) {
940 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
941 start = failed_start;
942 } else {
943 break;
944 }
945 WARN_ON(start > end);
946 }
947 return err;
948}
949EXPORT_SYMBOL(lock_extent);
950
Josef Bacik25179202008-10-29 14:49:05 -0400951int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
952 gfp_t mask)
953{
954 int err;
955 u64 failed_start;
956
957 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
958 &failed_start, mask);
Yan Zheng66435582008-10-30 14:19:50 -0400959 if (err == -EEXIST) {
960 if (failed_start > start)
961 clear_extent_bit(tree, start, failed_start - 1,
962 EXTENT_LOCKED, 1, 0, mask);
Josef Bacik25179202008-10-29 14:49:05 -0400963 return 0;
Yan Zheng66435582008-10-30 14:19:50 -0400964 }
Josef Bacik25179202008-10-29 14:49:05 -0400965 return 1;
966}
967EXPORT_SYMBOL(try_lock_extent);
968
Chris Masond1310b22008-01-24 16:13:08 -0500969int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
970 gfp_t mask)
971{
972 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
973}
974EXPORT_SYMBOL(unlock_extent);
975
976/*
977 * helper function to set pages and extents in the tree dirty
978 */
979int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
980{
981 unsigned long index = start >> PAGE_CACHE_SHIFT;
982 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
983 struct page *page;
984
985 while (index <= end_index) {
986 page = find_get_page(tree->mapping, index);
987 BUG_ON(!page);
988 __set_page_dirty_nobuffers(page);
989 page_cache_release(page);
990 index++;
991 }
992 set_extent_dirty(tree, start, end, GFP_NOFS);
993 return 0;
994}
995EXPORT_SYMBOL(set_range_dirty);
996
997/*
998 * helper function to set both pages and extents in the tree writeback
999 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05001000static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001001{
1002 unsigned long index = start >> PAGE_CACHE_SHIFT;
1003 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1004 struct page *page;
1005
1006 while (index <= end_index) {
1007 page = find_get_page(tree->mapping, index);
1008 BUG_ON(!page);
1009 set_page_writeback(page);
1010 page_cache_release(page);
1011 index++;
1012 }
1013 set_extent_writeback(tree, start, end, GFP_NOFS);
1014 return 0;
1015}
Chris Masond1310b22008-01-24 16:13:08 -05001016
Chris Masond352ac62008-09-29 15:18:18 -04001017/*
1018 * find the first offset in the io tree with 'bits' set. zero is
1019 * returned if we find something, and *start_ret and *end_ret are
1020 * set to reflect the state struct that was found.
1021 *
1022 * If nothing was found, 1 is returned, < 0 on error
1023 */
Chris Masond1310b22008-01-24 16:13:08 -05001024int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1025 u64 *start_ret, u64 *end_ret, int bits)
1026{
1027 struct rb_node *node;
1028 struct extent_state *state;
1029 int ret = 1;
1030
Chris Masoncad321a2008-12-17 14:51:42 -05001031 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001032 /*
1033 * this search will find all the extents that end after
1034 * our range starts.
1035 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001036 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001037 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001038 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001039
Chris Masond3977122009-01-05 21:25:51 -05001040 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001041 state = rb_entry(node, struct extent_state, rb_node);
1042 if (state->end >= start && (state->state & bits)) {
1043 *start_ret = state->start;
1044 *end_ret = state->end;
1045 ret = 0;
1046 break;
1047 }
1048 node = rb_next(node);
1049 if (!node)
1050 break;
1051 }
1052out:
Chris Masoncad321a2008-12-17 14:51:42 -05001053 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001054 return ret;
1055}
1056EXPORT_SYMBOL(find_first_extent_bit);
1057
Chris Masond352ac62008-09-29 15:18:18 -04001058/* find the first state struct with 'bits' set after 'start', and
1059 * return it. tree->lock must be held. NULL will returned if
1060 * nothing was found after 'start'
1061 */
Chris Masond7fc6402008-02-18 12:12:38 -05001062struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1063 u64 start, int bits)
1064{
1065 struct rb_node *node;
1066 struct extent_state *state;
1067
1068 /*
1069 * this search will find all the extents that end after
1070 * our range starts.
1071 */
1072 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001073 if (!node)
Chris Masond7fc6402008-02-18 12:12:38 -05001074 goto out;
Chris Masond7fc6402008-02-18 12:12:38 -05001075
Chris Masond3977122009-01-05 21:25:51 -05001076 while (1) {
Chris Masond7fc6402008-02-18 12:12:38 -05001077 state = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -05001078 if (state->end >= start && (state->state & bits))
Chris Masond7fc6402008-02-18 12:12:38 -05001079 return state;
Chris Masond3977122009-01-05 21:25:51 -05001080
Chris Masond7fc6402008-02-18 12:12:38 -05001081 node = rb_next(node);
1082 if (!node)
1083 break;
1084 }
1085out:
1086 return NULL;
1087}
1088EXPORT_SYMBOL(find_first_extent_bit_state);
1089
Chris Masond352ac62008-09-29 15:18:18 -04001090/*
1091 * find a contiguous range of bytes in the file marked as delalloc, not
1092 * more than 'max_bytes'. start and end are used to return the range,
1093 *
1094 * 1 is returned if we find something, 0 if nothing was in the tree
1095 */
Chris Masonc8b97812008-10-29 14:49:59 -04001096static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1097 u64 *start, u64 *end, u64 max_bytes)
Chris Masond1310b22008-01-24 16:13:08 -05001098{
1099 struct rb_node *node;
1100 struct extent_state *state;
1101 u64 cur_start = *start;
1102 u64 found = 0;
1103 u64 total_bytes = 0;
1104
Chris Masoncad321a2008-12-17 14:51:42 -05001105 spin_lock(&tree->lock);
Chris Masonc8b97812008-10-29 14:49:59 -04001106
Chris Masond1310b22008-01-24 16:13:08 -05001107 /*
1108 * this search will find all the extents that end after
1109 * our range starts.
1110 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001111 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001112 if (!node) {
Chris Mason3b951512008-04-17 11:29:12 -04001113 if (!found)
1114 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001115 goto out;
1116 }
1117
Chris Masond3977122009-01-05 21:25:51 -05001118 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001119 state = rb_entry(node, struct extent_state, rb_node);
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001120 if (found && (state->start != cur_start ||
1121 (state->state & EXTENT_BOUNDARY))) {
Chris Masond1310b22008-01-24 16:13:08 -05001122 goto out;
1123 }
1124 if (!(state->state & EXTENT_DELALLOC)) {
1125 if (!found)
1126 *end = state->end;
1127 goto out;
1128 }
Chris Masond1310b22008-01-24 16:13:08 -05001129 if (!found)
1130 *start = state->start;
1131 found++;
1132 *end = state->end;
1133 cur_start = state->end + 1;
1134 node = rb_next(node);
1135 if (!node)
1136 break;
1137 total_bytes += state->end - state->start + 1;
1138 if (total_bytes >= max_bytes)
1139 break;
1140 }
1141out:
Chris Masoncad321a2008-12-17 14:51:42 -05001142 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001143 return found;
1144}
1145
Chris Masonc8b97812008-10-29 14:49:59 -04001146static noinline int __unlock_for_delalloc(struct inode *inode,
1147 struct page *locked_page,
1148 u64 start, u64 end)
1149{
1150 int ret;
1151 struct page *pages[16];
1152 unsigned long index = start >> PAGE_CACHE_SHIFT;
1153 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1154 unsigned long nr_pages = end_index - index + 1;
1155 int i;
1156
1157 if (index == locked_page->index && end_index == index)
1158 return 0;
1159
Chris Masond3977122009-01-05 21:25:51 -05001160 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001161 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001162 min_t(unsigned long, nr_pages,
1163 ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001164 for (i = 0; i < ret; i++) {
1165 if (pages[i] != locked_page)
1166 unlock_page(pages[i]);
1167 page_cache_release(pages[i]);
1168 }
1169 nr_pages -= ret;
1170 index += ret;
1171 cond_resched();
1172 }
1173 return 0;
1174}
1175
1176static noinline int lock_delalloc_pages(struct inode *inode,
1177 struct page *locked_page,
1178 u64 delalloc_start,
1179 u64 delalloc_end)
1180{
1181 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1182 unsigned long start_index = index;
1183 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1184 unsigned long pages_locked = 0;
1185 struct page *pages[16];
1186 unsigned long nrpages;
1187 int ret;
1188 int i;
1189
1190 /* the caller is responsible for locking the start index */
1191 if (index == locked_page->index && index == end_index)
1192 return 0;
1193
1194 /* skip the page at the start index */
1195 nrpages = end_index - index + 1;
Chris Masond3977122009-01-05 21:25:51 -05001196 while (nrpages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001197 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001198 min_t(unsigned long,
1199 nrpages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001200 if (ret == 0) {
1201 ret = -EAGAIN;
1202 goto done;
1203 }
1204 /* now we have an array of pages, lock them all */
1205 for (i = 0; i < ret; i++) {
1206 /*
1207 * the caller is taking responsibility for
1208 * locked_page
1209 */
Chris Mason771ed682008-11-06 22:02:51 -05001210 if (pages[i] != locked_page) {
Chris Masonc8b97812008-10-29 14:49:59 -04001211 lock_page(pages[i]);
Chris Masonf2b1c412008-11-10 07:31:30 -05001212 if (!PageDirty(pages[i]) ||
1213 pages[i]->mapping != inode->i_mapping) {
Chris Mason771ed682008-11-06 22:02:51 -05001214 ret = -EAGAIN;
1215 unlock_page(pages[i]);
1216 page_cache_release(pages[i]);
1217 goto done;
1218 }
1219 }
Chris Masonc8b97812008-10-29 14:49:59 -04001220 page_cache_release(pages[i]);
Chris Mason771ed682008-11-06 22:02:51 -05001221 pages_locked++;
Chris Masonc8b97812008-10-29 14:49:59 -04001222 }
Chris Masonc8b97812008-10-29 14:49:59 -04001223 nrpages -= ret;
1224 index += ret;
1225 cond_resched();
1226 }
1227 ret = 0;
1228done:
1229 if (ret && pages_locked) {
1230 __unlock_for_delalloc(inode, locked_page,
1231 delalloc_start,
1232 ((u64)(start_index + pages_locked - 1)) <<
1233 PAGE_CACHE_SHIFT);
1234 }
1235 return ret;
1236}
1237
1238/*
1239 * find a contiguous range of bytes in the file marked as delalloc, not
1240 * more than 'max_bytes'. start and end are used to return the range,
1241 *
1242 * 1 is returned if we find something, 0 if nothing was in the tree
1243 */
1244static noinline u64 find_lock_delalloc_range(struct inode *inode,
1245 struct extent_io_tree *tree,
1246 struct page *locked_page,
1247 u64 *start, u64 *end,
1248 u64 max_bytes)
1249{
1250 u64 delalloc_start;
1251 u64 delalloc_end;
1252 u64 found;
1253 int ret;
1254 int loops = 0;
1255
1256again:
1257 /* step one, find a bunch of delalloc bytes starting at start */
1258 delalloc_start = *start;
1259 delalloc_end = 0;
1260 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1261 max_bytes);
Chris Mason70b99e62008-10-31 12:46:39 -04001262 if (!found || delalloc_end <= *start) {
Chris Masonc8b97812008-10-29 14:49:59 -04001263 *start = delalloc_start;
1264 *end = delalloc_end;
1265 return found;
1266 }
1267
1268 /*
Chris Mason70b99e62008-10-31 12:46:39 -04001269 * start comes from the offset of locked_page. We have to lock
1270 * pages in order, so we can't process delalloc bytes before
1271 * locked_page
1272 */
Chris Masond3977122009-01-05 21:25:51 -05001273 if (delalloc_start < *start)
Chris Mason70b99e62008-10-31 12:46:39 -04001274 delalloc_start = *start;
Chris Mason70b99e62008-10-31 12:46:39 -04001275
1276 /*
Chris Masonc8b97812008-10-29 14:49:59 -04001277 * make sure to limit the number of pages we try to lock down
1278 * if we're looping.
1279 */
Chris Masond3977122009-01-05 21:25:51 -05001280 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
Chris Mason771ed682008-11-06 22:02:51 -05001281 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
Chris Masond3977122009-01-05 21:25:51 -05001282
Chris Masonc8b97812008-10-29 14:49:59 -04001283 /* step two, lock all the pages after the page that has start */
1284 ret = lock_delalloc_pages(inode, locked_page,
1285 delalloc_start, delalloc_end);
1286 if (ret == -EAGAIN) {
1287 /* some of the pages are gone, lets avoid looping by
1288 * shortening the size of the delalloc range we're searching
1289 */
1290 if (!loops) {
1291 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1292 max_bytes = PAGE_CACHE_SIZE - offset;
1293 loops = 1;
1294 goto again;
1295 } else {
1296 found = 0;
1297 goto out_failed;
1298 }
1299 }
1300 BUG_ON(ret);
1301
1302 /* step three, lock the state bits for the whole range */
1303 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1304
1305 /* then test to make sure it is all still delalloc */
1306 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1307 EXTENT_DELALLOC, 1);
1308 if (!ret) {
1309 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
1310 __unlock_for_delalloc(inode, locked_page,
1311 delalloc_start, delalloc_end);
1312 cond_resched();
1313 goto again;
1314 }
1315 *start = delalloc_start;
1316 *end = delalloc_end;
1317out_failed:
1318 return found;
1319}
1320
1321int extent_clear_unlock_delalloc(struct inode *inode,
1322 struct extent_io_tree *tree,
1323 u64 start, u64 end, struct page *locked_page,
Chris Mason771ed682008-11-06 22:02:51 -05001324 int unlock_pages,
1325 int clear_unlock,
1326 int clear_delalloc, int clear_dirty,
1327 int set_writeback,
Chris Masonc8b97812008-10-29 14:49:59 -04001328 int end_writeback)
1329{
1330 int ret;
1331 struct page *pages[16];
1332 unsigned long index = start >> PAGE_CACHE_SHIFT;
1333 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1334 unsigned long nr_pages = end_index - index + 1;
1335 int i;
Chris Mason771ed682008-11-06 22:02:51 -05001336 int clear_bits = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001337
Chris Mason771ed682008-11-06 22:02:51 -05001338 if (clear_unlock)
1339 clear_bits |= EXTENT_LOCKED;
Chris Masonc8b97812008-10-29 14:49:59 -04001340 if (clear_dirty)
1341 clear_bits |= EXTENT_DIRTY;
1342
Chris Mason771ed682008-11-06 22:02:51 -05001343 if (clear_delalloc)
1344 clear_bits |= EXTENT_DELALLOC;
1345
Chris Masonc8b97812008-10-29 14:49:59 -04001346 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
Chris Mason771ed682008-11-06 22:02:51 -05001347 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
1348 return 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001349
Chris Masond3977122009-01-05 21:25:51 -05001350 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001351 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001352 min_t(unsigned long,
1353 nr_pages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001354 for (i = 0; i < ret; i++) {
1355 if (pages[i] == locked_page) {
1356 page_cache_release(pages[i]);
1357 continue;
1358 }
1359 if (clear_dirty)
1360 clear_page_dirty_for_io(pages[i]);
1361 if (set_writeback)
1362 set_page_writeback(pages[i]);
1363 if (end_writeback)
1364 end_page_writeback(pages[i]);
Chris Mason771ed682008-11-06 22:02:51 -05001365 if (unlock_pages)
1366 unlock_page(pages[i]);
Chris Masonc8b97812008-10-29 14:49:59 -04001367 page_cache_release(pages[i]);
1368 }
1369 nr_pages -= ret;
1370 index += ret;
1371 cond_resched();
1372 }
1373 return 0;
1374}
1375EXPORT_SYMBOL(extent_clear_unlock_delalloc);
1376
Chris Masond352ac62008-09-29 15:18:18 -04001377/*
1378 * count the number of bytes in the tree that have a given bit(s)
1379 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1380 * cached. The total number found is returned.
1381 */
Chris Masond1310b22008-01-24 16:13:08 -05001382u64 count_range_bits(struct extent_io_tree *tree,
1383 u64 *start, u64 search_end, u64 max_bytes,
1384 unsigned long bits)
1385{
1386 struct rb_node *node;
1387 struct extent_state *state;
1388 u64 cur_start = *start;
1389 u64 total_bytes = 0;
1390 int found = 0;
1391
1392 if (search_end <= cur_start) {
Chris Masond1310b22008-01-24 16:13:08 -05001393 WARN_ON(1);
1394 return 0;
1395 }
1396
Chris Masoncad321a2008-12-17 14:51:42 -05001397 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001398 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1399 total_bytes = tree->dirty_bytes;
1400 goto out;
1401 }
1402 /*
1403 * this search will find all the extents that end after
1404 * our range starts.
1405 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001406 node = tree_search(tree, cur_start);
Chris Masond3977122009-01-05 21:25:51 -05001407 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001408 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001409
Chris Masond3977122009-01-05 21:25:51 -05001410 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001411 state = rb_entry(node, struct extent_state, rb_node);
1412 if (state->start > search_end)
1413 break;
1414 if (state->end >= cur_start && (state->state & bits)) {
1415 total_bytes += min(search_end, state->end) + 1 -
1416 max(cur_start, state->start);
1417 if (total_bytes >= max_bytes)
1418 break;
1419 if (!found) {
1420 *start = state->start;
1421 found = 1;
1422 }
1423 }
1424 node = rb_next(node);
1425 if (!node)
1426 break;
1427 }
1428out:
Chris Masoncad321a2008-12-17 14:51:42 -05001429 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001430 return total_bytes;
1431}
Christoph Hellwigb2950862008-12-02 09:54:17 -05001432
1433#if 0
Chris Masond1310b22008-01-24 16:13:08 -05001434/*
1435 * helper function to lock both pages and extents in the tree.
1436 * pages must be locked first.
1437 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05001438static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001439{
1440 unsigned long index = start >> PAGE_CACHE_SHIFT;
1441 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1442 struct page *page;
1443 int err;
1444
1445 while (index <= end_index) {
1446 page = grab_cache_page(tree->mapping, index);
1447 if (!page) {
1448 err = -ENOMEM;
1449 goto failed;
1450 }
1451 if (IS_ERR(page)) {
1452 err = PTR_ERR(page);
1453 goto failed;
1454 }
1455 index++;
1456 }
1457 lock_extent(tree, start, end, GFP_NOFS);
1458 return 0;
1459
1460failed:
1461 /*
1462 * we failed above in getting the page at 'index', so we undo here
1463 * up to but not including the page at 'index'
1464 */
1465 end_index = index;
1466 index = start >> PAGE_CACHE_SHIFT;
1467 while (index < end_index) {
1468 page = find_get_page(tree->mapping, index);
1469 unlock_page(page);
1470 page_cache_release(page);
1471 index++;
1472 }
1473 return err;
1474}
Chris Masond1310b22008-01-24 16:13:08 -05001475
1476/*
1477 * helper function to unlock both pages and extents in the tree.
1478 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05001479static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001480{
1481 unsigned long index = start >> PAGE_CACHE_SHIFT;
1482 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1483 struct page *page;
1484
1485 while (index <= end_index) {
1486 page = find_get_page(tree->mapping, index);
1487 unlock_page(page);
1488 page_cache_release(page);
1489 index++;
1490 }
1491 unlock_extent(tree, start, end, GFP_NOFS);
1492 return 0;
1493}
Christoph Hellwigb2950862008-12-02 09:54:17 -05001494#endif
Chris Masond1310b22008-01-24 16:13:08 -05001495
Chris Masond352ac62008-09-29 15:18:18 -04001496/*
1497 * set the private field for a given byte offset in the tree. If there isn't
1498 * an extent_state there already, this does nothing.
1499 */
Chris Masond1310b22008-01-24 16:13:08 -05001500int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1501{
1502 struct rb_node *node;
1503 struct extent_state *state;
1504 int ret = 0;
1505
Chris Masoncad321a2008-12-17 14:51:42 -05001506 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001507 /*
1508 * this search will find all the extents that end after
1509 * our range starts.
1510 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001511 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001512 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001513 ret = -ENOENT;
1514 goto out;
1515 }
1516 state = rb_entry(node, struct extent_state, rb_node);
1517 if (state->start != start) {
1518 ret = -ENOENT;
1519 goto out;
1520 }
1521 state->private = private;
1522out:
Chris Masoncad321a2008-12-17 14:51:42 -05001523 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001524 return ret;
1525}
1526
1527int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1528{
1529 struct rb_node *node;
1530 struct extent_state *state;
1531 int ret = 0;
1532
Chris Masoncad321a2008-12-17 14:51:42 -05001533 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001534 /*
1535 * this search will find all the extents that end after
1536 * our range starts.
1537 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001538 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001539 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001540 ret = -ENOENT;
1541 goto out;
1542 }
1543 state = rb_entry(node, struct extent_state, rb_node);
1544 if (state->start != start) {
1545 ret = -ENOENT;
1546 goto out;
1547 }
1548 *private = state->private;
1549out:
Chris Masoncad321a2008-12-17 14:51:42 -05001550 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001551 return ret;
1552}
1553
1554/*
1555 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05001556 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05001557 * has the bits set. Otherwise, 1 is returned if any bit in the
1558 * range is found set.
1559 */
1560int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1561 int bits, int filled)
1562{
1563 struct extent_state *state = NULL;
1564 struct rb_node *node;
1565 int bitset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001566
Chris Masoncad321a2008-12-17 14:51:42 -05001567 spin_lock(&tree->lock);
Chris Mason80ea96b2008-02-01 14:51:59 -05001568 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05001569 while (node && start <= end) {
1570 state = rb_entry(node, struct extent_state, rb_node);
1571
1572 if (filled && state->start > start) {
1573 bitset = 0;
1574 break;
1575 }
1576
1577 if (state->start > end)
1578 break;
1579
1580 if (state->state & bits) {
1581 bitset = 1;
1582 if (!filled)
1583 break;
1584 } else if (filled) {
1585 bitset = 0;
1586 break;
1587 }
1588 start = state->end + 1;
1589 if (start > end)
1590 break;
1591 node = rb_next(node);
1592 if (!node) {
1593 if (filled)
1594 bitset = 0;
1595 break;
1596 }
1597 }
Chris Masoncad321a2008-12-17 14:51:42 -05001598 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001599 return bitset;
1600}
1601EXPORT_SYMBOL(test_range_bit);
1602
1603/*
1604 * helper function to set a given page up to date if all the
1605 * extents in the tree for that page are up to date
1606 */
1607static int check_page_uptodate(struct extent_io_tree *tree,
1608 struct page *page)
1609{
1610 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1611 u64 end = start + PAGE_CACHE_SIZE - 1;
1612 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1613 SetPageUptodate(page);
1614 return 0;
1615}
1616
1617/*
1618 * helper function to unlock a page if all the extents in the tree
1619 * for that page are unlocked
1620 */
1621static int check_page_locked(struct extent_io_tree *tree,
1622 struct page *page)
1623{
1624 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1625 u64 end = start + PAGE_CACHE_SIZE - 1;
1626 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1627 unlock_page(page);
1628 return 0;
1629}
1630
1631/*
1632 * helper function to end page writeback if all the extents
1633 * in the tree for that page are done with writeback
1634 */
1635static int check_page_writeback(struct extent_io_tree *tree,
1636 struct page *page)
1637{
1638 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1639 u64 end = start + PAGE_CACHE_SIZE - 1;
1640 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1641 end_page_writeback(page);
1642 return 0;
1643}
1644
1645/* lots and lots of room for performance fixes in the end_bio funcs */
1646
1647/*
1648 * after a writepage IO is done, we need to:
1649 * clear the uptodate bits on error
1650 * clear the writeback bits in the extent tree for this IO
1651 * end_page_writeback if the page has no more pending IO
1652 *
1653 * Scheduling is not allowed, so the extent state tree is expected
1654 * to have one and only one object corresponding to this IO.
1655 */
Chris Masond1310b22008-01-24 16:13:08 -05001656static void end_bio_extent_writepage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001657{
Chris Mason1259ab72008-05-12 13:39:03 -04001658 int uptodate = err == 0;
Chris Masond1310b22008-01-24 16:13:08 -05001659 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001660 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001661 u64 start;
1662 u64 end;
1663 int whole_page;
Chris Mason1259ab72008-05-12 13:39:03 -04001664 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05001665
Chris Masond1310b22008-01-24 16:13:08 -05001666 do {
1667 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001668 tree = &BTRFS_I(page->mapping->host)->io_tree;
1669
Chris Masond1310b22008-01-24 16:13:08 -05001670 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1671 bvec->bv_offset;
1672 end = start + bvec->bv_len - 1;
1673
1674 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1675 whole_page = 1;
1676 else
1677 whole_page = 0;
1678
1679 if (--bvec >= bio->bi_io_vec)
1680 prefetchw(&bvec->bv_page->flags);
Chris Mason1259ab72008-05-12 13:39:03 -04001681 if (tree->ops && tree->ops->writepage_end_io_hook) {
1682 ret = tree->ops->writepage_end_io_hook(page, start,
David Woodhouse902b22f2008-08-20 08:51:49 -04001683 end, NULL, uptodate);
Chris Mason1259ab72008-05-12 13:39:03 -04001684 if (ret)
1685 uptodate = 0;
1686 }
1687
1688 if (!uptodate && tree->ops &&
1689 tree->ops->writepage_io_failed_hook) {
1690 ret = tree->ops->writepage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001691 start, end, NULL);
Chris Mason1259ab72008-05-12 13:39:03 -04001692 if (ret == 0) {
Chris Mason1259ab72008-05-12 13:39:03 -04001693 uptodate = (err == 0);
1694 continue;
1695 }
1696 }
1697
Chris Masond1310b22008-01-24 16:13:08 -05001698 if (!uptodate) {
1699 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1700 ClearPageUptodate(page);
1701 SetPageError(page);
1702 }
Chris Mason70dec802008-01-29 09:59:12 -05001703
David Woodhouse902b22f2008-08-20 08:51:49 -04001704 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001705
1706 if (whole_page)
1707 end_page_writeback(page);
1708 else
1709 check_page_writeback(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05001710 } while (bvec >= bio->bi_io_vec);
Chris Mason2b1f55b2008-09-24 11:48:04 -04001711
Chris Masond1310b22008-01-24 16:13:08 -05001712 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001713}
1714
1715/*
1716 * after a readpage IO is done, we need to:
1717 * clear the uptodate bits on error
1718 * set the uptodate bits if things worked
1719 * set the page up to date if all extents in the tree are uptodate
1720 * clear the lock bit in the extent tree
1721 * unlock the page if there are no other extents locked for it
1722 *
1723 * Scheduling is not allowed, so the extent state tree is expected
1724 * to have one and only one object corresponding to this IO.
1725 */
Chris Masond1310b22008-01-24 16:13:08 -05001726static void end_bio_extent_readpage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001727{
1728 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1729 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001730 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001731 u64 start;
1732 u64 end;
1733 int whole_page;
1734 int ret;
1735
Chris Masond20f7042008-12-08 16:58:54 -05001736 if (err)
1737 uptodate = 0;
1738
Chris Masond1310b22008-01-24 16:13:08 -05001739 do {
1740 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001741 tree = &BTRFS_I(page->mapping->host)->io_tree;
1742
Chris Masond1310b22008-01-24 16:13:08 -05001743 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1744 bvec->bv_offset;
1745 end = start + bvec->bv_len - 1;
1746
1747 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1748 whole_page = 1;
1749 else
1750 whole_page = 0;
1751
1752 if (--bvec >= bio->bi_io_vec)
1753 prefetchw(&bvec->bv_page->flags);
1754
1755 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
Chris Mason70dec802008-01-29 09:59:12 -05001756 ret = tree->ops->readpage_end_io_hook(page, start, end,
David Woodhouse902b22f2008-08-20 08:51:49 -04001757 NULL);
Chris Masond1310b22008-01-24 16:13:08 -05001758 if (ret)
1759 uptodate = 0;
1760 }
Chris Mason7e383262008-04-09 16:28:12 -04001761 if (!uptodate && tree->ops &&
1762 tree->ops->readpage_io_failed_hook) {
1763 ret = tree->ops->readpage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001764 start, end, NULL);
Chris Mason7e383262008-04-09 16:28:12 -04001765 if (ret == 0) {
Chris Mason3b951512008-04-17 11:29:12 -04001766 uptodate =
1767 test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masond20f7042008-12-08 16:58:54 -05001768 if (err)
1769 uptodate = 0;
Chris Mason7e383262008-04-09 16:28:12 -04001770 continue;
1771 }
1772 }
Chris Mason70dec802008-01-29 09:59:12 -05001773
Chris Mason771ed682008-11-06 22:02:51 -05001774 if (uptodate) {
David Woodhouse902b22f2008-08-20 08:51:49 -04001775 set_extent_uptodate(tree, start, end,
1776 GFP_ATOMIC);
Chris Mason771ed682008-11-06 22:02:51 -05001777 }
David Woodhouse902b22f2008-08-20 08:51:49 -04001778 unlock_extent(tree, start, end, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001779
Chris Mason70dec802008-01-29 09:59:12 -05001780 if (whole_page) {
1781 if (uptodate) {
1782 SetPageUptodate(page);
1783 } else {
1784 ClearPageUptodate(page);
1785 SetPageError(page);
1786 }
Chris Masond1310b22008-01-24 16:13:08 -05001787 unlock_page(page);
Chris Mason70dec802008-01-29 09:59:12 -05001788 } else {
1789 if (uptodate) {
1790 check_page_uptodate(tree, page);
1791 } else {
1792 ClearPageUptodate(page);
1793 SetPageError(page);
1794 }
Chris Masond1310b22008-01-24 16:13:08 -05001795 check_page_locked(tree, page);
Chris Mason70dec802008-01-29 09:59:12 -05001796 }
Chris Masond1310b22008-01-24 16:13:08 -05001797 } while (bvec >= bio->bi_io_vec);
1798
1799 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001800}
1801
1802/*
1803 * IO done from prepare_write is pretty simple, we just unlock
1804 * the structs in the extent tree when done, and set the uptodate bits
1805 * as appropriate.
1806 */
Chris Masond1310b22008-01-24 16:13:08 -05001807static void end_bio_extent_preparewrite(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001808{
1809 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1810 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001811 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001812 u64 start;
1813 u64 end;
1814
Chris Masond1310b22008-01-24 16:13:08 -05001815 do {
1816 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001817 tree = &BTRFS_I(page->mapping->host)->io_tree;
1818
Chris Masond1310b22008-01-24 16:13:08 -05001819 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1820 bvec->bv_offset;
1821 end = start + bvec->bv_len - 1;
1822
1823 if (--bvec >= bio->bi_io_vec)
1824 prefetchw(&bvec->bv_page->flags);
1825
1826 if (uptodate) {
1827 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1828 } else {
1829 ClearPageUptodate(page);
1830 SetPageError(page);
1831 }
1832
1833 unlock_extent(tree, start, end, GFP_ATOMIC);
1834
1835 } while (bvec >= bio->bi_io_vec);
1836
1837 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001838}
1839
1840static struct bio *
1841extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1842 gfp_t gfp_flags)
1843{
1844 struct bio *bio;
1845
1846 bio = bio_alloc(gfp_flags, nr_vecs);
1847
1848 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1849 while (!bio && (nr_vecs /= 2))
1850 bio = bio_alloc(gfp_flags, nr_vecs);
1851 }
1852
1853 if (bio) {
Chris Masone1c4b742008-04-22 13:26:46 -04001854 bio->bi_size = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001855 bio->bi_bdev = bdev;
1856 bio->bi_sector = first_sector;
1857 }
1858 return bio;
1859}
1860
Chris Masonc8b97812008-10-29 14:49:59 -04001861static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1862 unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001863{
Chris Masond1310b22008-01-24 16:13:08 -05001864 int ret = 0;
Chris Mason70dec802008-01-29 09:59:12 -05001865 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1866 struct page *page = bvec->bv_page;
1867 struct extent_io_tree *tree = bio->bi_private;
Chris Mason70dec802008-01-29 09:59:12 -05001868 u64 start;
1869 u64 end;
1870
1871 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1872 end = start + bvec->bv_len - 1;
1873
David Woodhouse902b22f2008-08-20 08:51:49 -04001874 bio->bi_private = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05001875
1876 bio_get(bio);
1877
Chris Mason065631f2008-02-20 12:07:25 -05001878 if (tree->ops && tree->ops->submit_bio_hook)
Chris Masonf1885912008-04-09 16:28:12 -04001879 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
Chris Masonc8b97812008-10-29 14:49:59 -04001880 mirror_num, bio_flags);
Chris Mason0b86a832008-03-24 15:01:56 -04001881 else
1882 submit_bio(rw, bio);
Chris Masond1310b22008-01-24 16:13:08 -05001883 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1884 ret = -EOPNOTSUPP;
1885 bio_put(bio);
1886 return ret;
1887}
1888
1889static int submit_extent_page(int rw, struct extent_io_tree *tree,
1890 struct page *page, sector_t sector,
1891 size_t size, unsigned long offset,
1892 struct block_device *bdev,
1893 struct bio **bio_ret,
1894 unsigned long max_pages,
Chris Masonf1885912008-04-09 16:28:12 -04001895 bio_end_io_t end_io_func,
Chris Masonc8b97812008-10-29 14:49:59 -04001896 int mirror_num,
1897 unsigned long prev_bio_flags,
1898 unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001899{
1900 int ret = 0;
1901 struct bio *bio;
1902 int nr;
Chris Masonc8b97812008-10-29 14:49:59 -04001903 int contig = 0;
1904 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
1905 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
Chris Mason5b050f02008-11-11 09:34:41 -05001906 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
Chris Masond1310b22008-01-24 16:13:08 -05001907
1908 if (bio_ret && *bio_ret) {
1909 bio = *bio_ret;
Chris Masonc8b97812008-10-29 14:49:59 -04001910 if (old_compressed)
1911 contig = bio->bi_sector == sector;
1912 else
1913 contig = bio->bi_sector + (bio->bi_size >> 9) ==
1914 sector;
1915
1916 if (prev_bio_flags != bio_flags || !contig ||
Chris Mason239b14b2008-03-24 15:02:07 -04001917 (tree->ops && tree->ops->merge_bio_hook &&
Chris Masonc8b97812008-10-29 14:49:59 -04001918 tree->ops->merge_bio_hook(page, offset, page_size, bio,
1919 bio_flags)) ||
1920 bio_add_page(bio, page, page_size, offset) < page_size) {
1921 ret = submit_one_bio(rw, bio, mirror_num,
1922 prev_bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05001923 bio = NULL;
1924 } else {
1925 return 0;
1926 }
1927 }
Chris Masonc8b97812008-10-29 14:49:59 -04001928 if (this_compressed)
1929 nr = BIO_MAX_PAGES;
1930 else
1931 nr = bio_get_nr_vecs(bdev);
1932
Chris Masond1310b22008-01-24 16:13:08 -05001933 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
Chris Mason70dec802008-01-29 09:59:12 -05001934
Chris Masonc8b97812008-10-29 14:49:59 -04001935 bio_add_page(bio, page, page_size, offset);
Chris Masond1310b22008-01-24 16:13:08 -05001936 bio->bi_end_io = end_io_func;
1937 bio->bi_private = tree;
Chris Mason70dec802008-01-29 09:59:12 -05001938
Chris Masond3977122009-01-05 21:25:51 -05001939 if (bio_ret)
Chris Masond1310b22008-01-24 16:13:08 -05001940 *bio_ret = bio;
Chris Masond3977122009-01-05 21:25:51 -05001941 else
Chris Masonc8b97812008-10-29 14:49:59 -04001942 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05001943
1944 return ret;
1945}
1946
1947void set_page_extent_mapped(struct page *page)
1948{
1949 if (!PagePrivate(page)) {
1950 SetPagePrivate(page);
Chris Masond1310b22008-01-24 16:13:08 -05001951 page_cache_get(page);
Chris Mason6af118ce2008-07-22 11:18:07 -04001952 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05001953 }
1954}
Chris Mason771ed682008-11-06 22:02:51 -05001955EXPORT_SYMBOL(set_page_extent_mapped);
Chris Masond1310b22008-01-24 16:13:08 -05001956
Christoph Hellwigb2950862008-12-02 09:54:17 -05001957static void set_page_extent_head(struct page *page, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05001958{
1959 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1960}
1961
1962/*
1963 * basic readpage implementation. Locked extent state structs are inserted
1964 * into the tree that are removed when the IO is done (by the end_io
1965 * handlers)
1966 */
1967static int __extent_read_full_page(struct extent_io_tree *tree,
1968 struct page *page,
1969 get_extent_t *get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04001970 struct bio **bio, int mirror_num,
1971 unsigned long *bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001972{
1973 struct inode *inode = page->mapping->host;
1974 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1975 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1976 u64 end;
1977 u64 cur = start;
1978 u64 extent_offset;
1979 u64 last_byte = i_size_read(inode);
1980 u64 block_start;
1981 u64 cur_end;
1982 sector_t sector;
1983 struct extent_map *em;
1984 struct block_device *bdev;
1985 int ret;
1986 int nr = 0;
1987 size_t page_offset = 0;
1988 size_t iosize;
Chris Masonc8b97812008-10-29 14:49:59 -04001989 size_t disk_io_size;
Chris Masond1310b22008-01-24 16:13:08 -05001990 size_t blocksize = inode->i_sb->s_blocksize;
Chris Masonc8b97812008-10-29 14:49:59 -04001991 unsigned long this_bio_flag = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001992
1993 set_page_extent_mapped(page);
1994
1995 end = page_end;
1996 lock_extent(tree, start, end, GFP_NOFS);
1997
Chris Masonc8b97812008-10-29 14:49:59 -04001998 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
1999 char *userpage;
2000 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2001
2002 if (zero_offset) {
2003 iosize = PAGE_CACHE_SIZE - zero_offset;
2004 userpage = kmap_atomic(page, KM_USER0);
2005 memset(userpage + zero_offset, 0, iosize);
2006 flush_dcache_page(page);
2007 kunmap_atomic(userpage, KM_USER0);
2008 }
2009 }
Chris Masond1310b22008-01-24 16:13:08 -05002010 while (cur <= end) {
2011 if (cur >= last_byte) {
2012 char *userpage;
2013 iosize = PAGE_CACHE_SIZE - page_offset;
2014 userpage = kmap_atomic(page, KM_USER0);
2015 memset(userpage + page_offset, 0, iosize);
2016 flush_dcache_page(page);
2017 kunmap_atomic(userpage, KM_USER0);
2018 set_extent_uptodate(tree, cur, cur + iosize - 1,
2019 GFP_NOFS);
2020 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2021 break;
2022 }
2023 em = get_extent(inode, page, page_offset, cur,
2024 end - cur + 1, 0);
2025 if (IS_ERR(em) || !em) {
2026 SetPageError(page);
2027 unlock_extent(tree, cur, end, GFP_NOFS);
2028 break;
2029 }
Chris Masond1310b22008-01-24 16:13:08 -05002030 extent_offset = cur - em->start;
2031 BUG_ON(extent_map_end(em) <= cur);
2032 BUG_ON(end < cur);
2033
Chris Masonc8b97812008-10-29 14:49:59 -04002034 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2035 this_bio_flag = EXTENT_BIO_COMPRESSED;
2036
Chris Masond1310b22008-01-24 16:13:08 -05002037 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2038 cur_end = min(extent_map_end(em) - 1, end);
2039 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002040 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2041 disk_io_size = em->block_len;
2042 sector = em->block_start >> 9;
2043 } else {
2044 sector = (em->block_start + extent_offset) >> 9;
2045 disk_io_size = iosize;
2046 }
Chris Masond1310b22008-01-24 16:13:08 -05002047 bdev = em->bdev;
2048 block_start = em->block_start;
Yan Zhengd899e052008-10-30 14:25:28 -04002049 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2050 block_start = EXTENT_MAP_HOLE;
Chris Masond1310b22008-01-24 16:13:08 -05002051 free_extent_map(em);
2052 em = NULL;
2053
2054 /* we've found a hole, just zero and go on */
2055 if (block_start == EXTENT_MAP_HOLE) {
2056 char *userpage;
2057 userpage = kmap_atomic(page, KM_USER0);
2058 memset(userpage + page_offset, 0, iosize);
2059 flush_dcache_page(page);
2060 kunmap_atomic(userpage, KM_USER0);
2061
2062 set_extent_uptodate(tree, cur, cur + iosize - 1,
2063 GFP_NOFS);
2064 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2065 cur = cur + iosize;
2066 page_offset += iosize;
2067 continue;
2068 }
2069 /* the get_extent function already copied into the page */
2070 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
Chris Masona1b32a52008-09-05 16:09:51 -04002071 check_page_uptodate(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05002072 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2073 cur = cur + iosize;
2074 page_offset += iosize;
2075 continue;
2076 }
Chris Mason70dec802008-01-29 09:59:12 -05002077 /* we have an inline extent but it didn't get marked up
2078 * to date. Error out
2079 */
2080 if (block_start == EXTENT_MAP_INLINE) {
2081 SetPageError(page);
2082 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2083 cur = cur + iosize;
2084 page_offset += iosize;
2085 continue;
2086 }
Chris Masond1310b22008-01-24 16:13:08 -05002087
2088 ret = 0;
2089 if (tree->ops && tree->ops->readpage_io_hook) {
2090 ret = tree->ops->readpage_io_hook(page, cur,
2091 cur + iosize - 1);
2092 }
2093 if (!ret) {
Chris Mason89642222008-07-24 09:41:53 -04002094 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2095 pnr -= page->index;
Chris Masond1310b22008-01-24 16:13:08 -05002096 ret = submit_extent_page(READ, tree, page,
Chris Masonc8b97812008-10-29 14:49:59 -04002097 sector, disk_io_size, page_offset,
Chris Mason89642222008-07-24 09:41:53 -04002098 bdev, bio, pnr,
Chris Masonc8b97812008-10-29 14:49:59 -04002099 end_bio_extent_readpage, mirror_num,
2100 *bio_flags,
2101 this_bio_flag);
Chris Mason89642222008-07-24 09:41:53 -04002102 nr++;
Chris Masonc8b97812008-10-29 14:49:59 -04002103 *bio_flags = this_bio_flag;
Chris Masond1310b22008-01-24 16:13:08 -05002104 }
2105 if (ret)
2106 SetPageError(page);
2107 cur = cur + iosize;
2108 page_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002109 }
2110 if (!nr) {
2111 if (!PageError(page))
2112 SetPageUptodate(page);
2113 unlock_page(page);
2114 }
2115 return 0;
2116}
2117
2118int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2119 get_extent_t *get_extent)
2120{
2121 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04002122 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002123 int ret;
2124
Chris Masonc8b97812008-10-29 14:49:59 -04002125 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2126 &bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002127 if (bio)
Chris Masonc8b97812008-10-29 14:49:59 -04002128 submit_one_bio(READ, bio, 0, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002129 return ret;
2130}
2131EXPORT_SYMBOL(extent_read_full_page);
2132
2133/*
2134 * the writepage semantics are similar to regular writepage. extent
2135 * records are inserted to lock ranges in the tree, and as dirty areas
2136 * are found, they are marked writeback. Then the lock bits are removed
2137 * and the end_io handler clears the writeback ranges
2138 */
2139static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2140 void *data)
2141{
2142 struct inode *inode = page->mapping->host;
2143 struct extent_page_data *epd = data;
2144 struct extent_io_tree *tree = epd->tree;
2145 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2146 u64 delalloc_start;
2147 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2148 u64 end;
2149 u64 cur = start;
2150 u64 extent_offset;
2151 u64 last_byte = i_size_read(inode);
2152 u64 block_start;
2153 u64 iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04002154 u64 unlock_start;
Chris Masond1310b22008-01-24 16:13:08 -05002155 sector_t sector;
2156 struct extent_map *em;
2157 struct block_device *bdev;
2158 int ret;
2159 int nr = 0;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002160 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002161 size_t blocksize;
2162 loff_t i_size = i_size_read(inode);
2163 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2164 u64 nr_delalloc;
2165 u64 delalloc_end;
Chris Masonc8b97812008-10-29 14:49:59 -04002166 int page_started;
2167 int compressed;
Chris Mason771ed682008-11-06 22:02:51 -05002168 unsigned long nr_written = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002169
2170 WARN_ON(!PageLocked(page));
Chris Mason7f3c74f2008-07-18 12:01:11 -04002171 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
Chris Mason211c17f2008-05-15 09:13:45 -04002172 if (page->index > end_index ||
Chris Mason7f3c74f2008-07-18 12:01:11 -04002173 (page->index == end_index && !pg_offset)) {
Chris Mason39be25c2008-11-10 11:50:50 -05002174 page->mapping->a_ops->invalidatepage(page, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002175 unlock_page(page);
2176 return 0;
2177 }
2178
2179 if (page->index == end_index) {
2180 char *userpage;
2181
Chris Masond1310b22008-01-24 16:13:08 -05002182 userpage = kmap_atomic(page, KM_USER0);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002183 memset(userpage + pg_offset, 0,
2184 PAGE_CACHE_SIZE - pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05002185 kunmap_atomic(userpage, KM_USER0);
Chris Mason211c17f2008-05-15 09:13:45 -04002186 flush_dcache_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002187 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002188 pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002189
2190 set_page_extent_mapped(page);
2191
2192 delalloc_start = start;
2193 delalloc_end = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002194 page_started = 0;
Chris Mason771ed682008-11-06 22:02:51 -05002195 if (!epd->extent_locked) {
Chris Masond3977122009-01-05 21:25:51 -05002196 while (delalloc_end < page_end) {
Chris Mason771ed682008-11-06 22:02:51 -05002197 nr_delalloc = find_lock_delalloc_range(inode, tree,
Chris Masonc8b97812008-10-29 14:49:59 -04002198 page,
2199 &delalloc_start,
Chris Masond1310b22008-01-24 16:13:08 -05002200 &delalloc_end,
2201 128 * 1024 * 1024);
Chris Mason771ed682008-11-06 22:02:51 -05002202 if (nr_delalloc == 0) {
2203 delalloc_start = delalloc_end + 1;
2204 continue;
2205 }
2206 tree->ops->fill_delalloc(inode, page, delalloc_start,
2207 delalloc_end, &page_started,
2208 &nr_written);
Chris Masond1310b22008-01-24 16:13:08 -05002209 delalloc_start = delalloc_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002210 }
Chris Masonc8b97812008-10-29 14:49:59 -04002211
Chris Mason771ed682008-11-06 22:02:51 -05002212 /* did the fill delalloc function already unlock and start
2213 * the IO?
2214 */
2215 if (page_started) {
2216 ret = 0;
2217 goto update_nr_written;
2218 }
Chris Masonc8b97812008-10-29 14:49:59 -04002219 }
Chris Masond1310b22008-01-24 16:13:08 -05002220 lock_extent(tree, start, page_end, GFP_NOFS);
Chris Mason771ed682008-11-06 22:02:51 -05002221
Chris Masone6dcd2d2008-07-17 12:53:50 -04002222 unlock_start = start;
Chris Masond1310b22008-01-24 16:13:08 -05002223
Chris Mason247e7432008-07-17 12:53:51 -04002224 if (tree->ops && tree->ops->writepage_start_hook) {
Chris Masonc8b97812008-10-29 14:49:59 -04002225 ret = tree->ops->writepage_start_hook(page, start,
2226 page_end);
Chris Mason247e7432008-07-17 12:53:51 -04002227 if (ret == -EAGAIN) {
2228 unlock_extent(tree, start, page_end, GFP_NOFS);
2229 redirty_page_for_writepage(wbc, page);
2230 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002231 ret = 0;
2232 goto update_nr_written;
Chris Mason247e7432008-07-17 12:53:51 -04002233 }
2234 }
2235
Chris Mason771ed682008-11-06 22:02:51 -05002236 nr_written++;
2237
Chris Masond1310b22008-01-24 16:13:08 -05002238 end = page_end;
Chris Masond3977122009-01-05 21:25:51 -05002239 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2240 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
Chris Masond1310b22008-01-24 16:13:08 -05002241
2242 if (last_byte <= start) {
2243 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002244 unlock_extent(tree, start, page_end, GFP_NOFS);
2245 if (tree->ops && tree->ops->writepage_end_io_hook)
2246 tree->ops->writepage_end_io_hook(page, start,
2247 page_end, NULL, 1);
2248 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002249 goto done;
2250 }
2251
2252 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2253 blocksize = inode->i_sb->s_blocksize;
2254
2255 while (cur <= end) {
2256 if (cur >= last_byte) {
2257 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002258 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2259 if (tree->ops && tree->ops->writepage_end_io_hook)
2260 tree->ops->writepage_end_io_hook(page, cur,
2261 page_end, NULL, 1);
2262 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002263 break;
2264 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002265 em = epd->get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05002266 end - cur + 1, 1);
2267 if (IS_ERR(em) || !em) {
2268 SetPageError(page);
2269 break;
2270 }
2271
2272 extent_offset = cur - em->start;
2273 BUG_ON(extent_map_end(em) <= cur);
2274 BUG_ON(end < cur);
2275 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2276 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2277 sector = (em->block_start + extent_offset) >> 9;
2278 bdev = em->bdev;
2279 block_start = em->block_start;
Chris Masonc8b97812008-10-29 14:49:59 -04002280 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
Chris Masond1310b22008-01-24 16:13:08 -05002281 free_extent_map(em);
2282 em = NULL;
2283
Chris Masonc8b97812008-10-29 14:49:59 -04002284 /*
2285 * compressed and inline extents are written through other
2286 * paths in the FS
2287 */
2288 if (compressed || block_start == EXTENT_MAP_HOLE ||
Chris Masond1310b22008-01-24 16:13:08 -05002289 block_start == EXTENT_MAP_INLINE) {
2290 clear_extent_dirty(tree, cur,
2291 cur + iosize - 1, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002292
Chris Masond3977122009-01-05 21:25:51 -05002293 unlock_extent(tree, unlock_start, cur + iosize - 1,
Chris Masone6dcd2d2008-07-17 12:53:50 -04002294 GFP_NOFS);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002295
Chris Masonc8b97812008-10-29 14:49:59 -04002296 /*
2297 * end_io notification does not happen here for
2298 * compressed extents
2299 */
2300 if (!compressed && tree->ops &&
2301 tree->ops->writepage_end_io_hook)
Chris Masone6dcd2d2008-07-17 12:53:50 -04002302 tree->ops->writepage_end_io_hook(page, cur,
2303 cur + iosize - 1,
2304 NULL, 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002305 else if (compressed) {
2306 /* we don't want to end_page_writeback on
2307 * a compressed extent. this happens
2308 * elsewhere
2309 */
2310 nr++;
2311 }
2312
2313 cur += iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002314 pg_offset += iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04002315 unlock_start = cur;
Chris Masond1310b22008-01-24 16:13:08 -05002316 continue;
2317 }
Chris Masond1310b22008-01-24 16:13:08 -05002318 /* leave this out until we have a page_mkwrite call */
2319 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2320 EXTENT_DIRTY, 0)) {
2321 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002322 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002323 continue;
2324 }
Chris Masonc8b97812008-10-29 14:49:59 -04002325
Chris Masond1310b22008-01-24 16:13:08 -05002326 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2327 if (tree->ops && tree->ops->writepage_io_hook) {
2328 ret = tree->ops->writepage_io_hook(page, cur,
2329 cur + iosize - 1);
2330 } else {
2331 ret = 0;
2332 }
Chris Mason1259ab72008-05-12 13:39:03 -04002333 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05002334 SetPageError(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002335 } else {
Chris Masond1310b22008-01-24 16:13:08 -05002336 unsigned long max_nr = end_index + 1;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002337
Chris Masond1310b22008-01-24 16:13:08 -05002338 set_range_writeback(tree, cur, cur + iosize - 1);
2339 if (!PageWriteback(page)) {
Chris Masond3977122009-01-05 21:25:51 -05002340 printk(KERN_ERR "btrfs warning page %lu not "
2341 "writeback, cur %llu end %llu\n",
2342 page->index, (unsigned long long)cur,
Chris Masond1310b22008-01-24 16:13:08 -05002343 (unsigned long long)end);
2344 }
2345
2346 ret = submit_extent_page(WRITE, tree, page, sector,
Chris Mason7f3c74f2008-07-18 12:01:11 -04002347 iosize, pg_offset, bdev,
Chris Masond1310b22008-01-24 16:13:08 -05002348 &epd->bio, max_nr,
Chris Masonc8b97812008-10-29 14:49:59 -04002349 end_bio_extent_writepage,
2350 0, 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002351 if (ret)
2352 SetPageError(page);
2353 }
2354 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002355 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002356 nr++;
2357 }
2358done:
2359 if (nr == 0) {
2360 /* make sure the mapping tag for page dirty gets cleared */
2361 set_page_writeback(page);
2362 end_page_writeback(page);
2363 }
Chris Masone6dcd2d2008-07-17 12:53:50 -04002364 if (unlock_start <= page_end)
2365 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002366 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002367
2368update_nr_written:
2369 wbc->nr_to_write -= nr_written;
2370 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2371 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2372 page->mapping->writeback_index = page->index + nr_written;
Chris Masond1310b22008-01-24 16:13:08 -05002373 return 0;
2374}
2375
Chris Masond1310b22008-01-24 16:13:08 -05002376/**
Chris Mason4bef0842008-09-08 11:18:08 -04002377 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
Chris Masond1310b22008-01-24 16:13:08 -05002378 * @mapping: address space structure to write
2379 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2380 * @writepage: function called for each page
2381 * @data: data passed to writepage function
2382 *
2383 * If a page is already under I/O, write_cache_pages() skips it, even
2384 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2385 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2386 * and msync() need to guarantee that all the data which was dirty at the time
2387 * the call was made get new I/O started against them. If wbc->sync_mode is
2388 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2389 * existing IO to complete.
2390 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05002391static int extent_write_cache_pages(struct extent_io_tree *tree,
Chris Mason4bef0842008-09-08 11:18:08 -04002392 struct address_space *mapping,
2393 struct writeback_control *wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002394 writepage_t writepage, void *data,
2395 void (*flush_fn)(void *))
Chris Masond1310b22008-01-24 16:13:08 -05002396{
2397 struct backing_dev_info *bdi = mapping->backing_dev_info;
2398 int ret = 0;
2399 int done = 0;
2400 struct pagevec pvec;
2401 int nr_pages;
2402 pgoff_t index;
2403 pgoff_t end; /* Inclusive */
2404 int scanned = 0;
2405 int range_whole = 0;
2406
2407 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2408 wbc->encountered_congestion = 1;
2409 return 0;
2410 }
2411
2412 pagevec_init(&pvec, 0);
2413 if (wbc->range_cyclic) {
2414 index = mapping->writeback_index; /* Start from prev offset */
2415 end = -1;
2416 } else {
2417 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2418 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2419 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2420 range_whole = 1;
2421 scanned = 1;
2422 }
2423retry:
2424 while (!done && (index <= end) &&
2425 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
Chris Masond3977122009-01-05 21:25:51 -05002426 PAGECACHE_TAG_DIRTY, min(end - index,
2427 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
Chris Masond1310b22008-01-24 16:13:08 -05002428 unsigned i;
2429
2430 scanned = 1;
2431 for (i = 0; i < nr_pages; i++) {
2432 struct page *page = pvec.pages[i];
2433
2434 /*
2435 * At this point we hold neither mapping->tree_lock nor
2436 * lock on the page itself: the page may be truncated or
2437 * invalidated (changing page->mapping to NULL), or even
2438 * swizzled back from swapper_space to tmpfs file
2439 * mapping
2440 */
Chris Mason4bef0842008-09-08 11:18:08 -04002441 if (tree->ops && tree->ops->write_cache_pages_lock_hook)
2442 tree->ops->write_cache_pages_lock_hook(page);
2443 else
2444 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002445
2446 if (unlikely(page->mapping != mapping)) {
2447 unlock_page(page);
2448 continue;
2449 }
2450
2451 if (!wbc->range_cyclic && page->index > end) {
2452 done = 1;
2453 unlock_page(page);
2454 continue;
2455 }
2456
Chris Masond2c3f4f2008-11-19 12:44:22 -05002457 if (wbc->sync_mode != WB_SYNC_NONE) {
Chris Mason0e6bd952008-11-20 10:46:35 -05002458 if (PageWriteback(page))
2459 flush_fn(data);
Chris Masond1310b22008-01-24 16:13:08 -05002460 wait_on_page_writeback(page);
Chris Masond2c3f4f2008-11-19 12:44:22 -05002461 }
Chris Masond1310b22008-01-24 16:13:08 -05002462
2463 if (PageWriteback(page) ||
2464 !clear_page_dirty_for_io(page)) {
2465 unlock_page(page);
2466 continue;
2467 }
2468
2469 ret = (*writepage)(page, wbc, data);
2470
2471 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2472 unlock_page(page);
2473 ret = 0;
2474 }
Chris Mason771ed682008-11-06 22:02:51 -05002475 if (ret || wbc->nr_to_write <= 0)
Chris Masond1310b22008-01-24 16:13:08 -05002476 done = 1;
2477 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2478 wbc->encountered_congestion = 1;
2479 done = 1;
2480 }
2481 }
2482 pagevec_release(&pvec);
2483 cond_resched();
2484 }
2485 if (!scanned && !done) {
2486 /*
2487 * We hit the last page and there is more work to be done: wrap
2488 * back to the start of the file
2489 */
2490 scanned = 1;
2491 index = 0;
2492 goto retry;
2493 }
Chris Masond1310b22008-01-24 16:13:08 -05002494 return ret;
2495}
Chris Masond1310b22008-01-24 16:13:08 -05002496
Chris Masond2c3f4f2008-11-19 12:44:22 -05002497static noinline void flush_write_bio(void *data)
2498{
2499 struct extent_page_data *epd = data;
2500 if (epd->bio) {
2501 submit_one_bio(WRITE, epd->bio, 0, 0);
2502 epd->bio = NULL;
2503 }
2504}
2505
Chris Masond1310b22008-01-24 16:13:08 -05002506int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2507 get_extent_t *get_extent,
2508 struct writeback_control *wbc)
2509{
2510 int ret;
2511 struct address_space *mapping = page->mapping;
2512 struct extent_page_data epd = {
2513 .bio = NULL,
2514 .tree = tree,
2515 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05002516 .extent_locked = 0,
Chris Masond1310b22008-01-24 16:13:08 -05002517 };
2518 struct writeback_control wbc_writepages = {
2519 .bdi = wbc->bdi,
2520 .sync_mode = WB_SYNC_NONE,
2521 .older_than_this = NULL,
2522 .nr_to_write = 64,
2523 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2524 .range_end = (loff_t)-1,
2525 };
2526
2527
2528 ret = __extent_writepage(page, wbc, &epd);
2529
Chris Mason4bef0842008-09-08 11:18:08 -04002530 extent_write_cache_pages(tree, mapping, &wbc_writepages,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002531 __extent_writepage, &epd, flush_write_bio);
Chris Masond3977122009-01-05 21:25:51 -05002532 if (epd.bio)
Chris Masonc8b97812008-10-29 14:49:59 -04002533 submit_one_bio(WRITE, epd.bio, 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002534 return ret;
2535}
2536EXPORT_SYMBOL(extent_write_full_page);
2537
Chris Mason771ed682008-11-06 22:02:51 -05002538int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2539 u64 start, u64 end, get_extent_t *get_extent,
2540 int mode)
2541{
2542 int ret = 0;
2543 struct address_space *mapping = inode->i_mapping;
2544 struct page *page;
2545 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
2546 PAGE_CACHE_SHIFT;
2547
2548 struct extent_page_data epd = {
2549 .bio = NULL,
2550 .tree = tree,
2551 .get_extent = get_extent,
2552 .extent_locked = 1,
2553 };
2554 struct writeback_control wbc_writepages = {
2555 .bdi = inode->i_mapping->backing_dev_info,
2556 .sync_mode = mode,
2557 .older_than_this = NULL,
2558 .nr_to_write = nr_pages * 2,
2559 .range_start = start,
2560 .range_end = end + 1,
2561 };
2562
Chris Masond3977122009-01-05 21:25:51 -05002563 while (start <= end) {
Chris Mason771ed682008-11-06 22:02:51 -05002564 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
2565 if (clear_page_dirty_for_io(page))
2566 ret = __extent_writepage(page, &wbc_writepages, &epd);
2567 else {
2568 if (tree->ops && tree->ops->writepage_end_io_hook)
2569 tree->ops->writepage_end_io_hook(page, start,
2570 start + PAGE_CACHE_SIZE - 1,
2571 NULL, 1);
2572 unlock_page(page);
2573 }
2574 page_cache_release(page);
2575 start += PAGE_CACHE_SIZE;
2576 }
2577
2578 if (epd.bio)
2579 submit_one_bio(WRITE, epd.bio, 0, 0);
2580 return ret;
2581}
2582EXPORT_SYMBOL(extent_write_locked_range);
2583
Chris Masond1310b22008-01-24 16:13:08 -05002584
2585int extent_writepages(struct extent_io_tree *tree,
2586 struct address_space *mapping,
2587 get_extent_t *get_extent,
2588 struct writeback_control *wbc)
2589{
2590 int ret = 0;
2591 struct extent_page_data epd = {
2592 .bio = NULL,
2593 .tree = tree,
2594 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05002595 .extent_locked = 0,
Chris Masond1310b22008-01-24 16:13:08 -05002596 };
2597
Chris Mason4bef0842008-09-08 11:18:08 -04002598 ret = extent_write_cache_pages(tree, mapping, wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002599 __extent_writepage, &epd,
2600 flush_write_bio);
Chris Masond3977122009-01-05 21:25:51 -05002601 if (epd.bio)
Chris Masonc8b97812008-10-29 14:49:59 -04002602 submit_one_bio(WRITE, epd.bio, 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002603 return ret;
2604}
2605EXPORT_SYMBOL(extent_writepages);
2606
2607int extent_readpages(struct extent_io_tree *tree,
2608 struct address_space *mapping,
2609 struct list_head *pages, unsigned nr_pages,
2610 get_extent_t get_extent)
2611{
2612 struct bio *bio = NULL;
2613 unsigned page_idx;
2614 struct pagevec pvec;
Chris Masonc8b97812008-10-29 14:49:59 -04002615 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002616
2617 pagevec_init(&pvec, 0);
2618 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2619 struct page *page = list_entry(pages->prev, struct page, lru);
2620
2621 prefetchw(&page->flags);
2622 list_del(&page->lru);
2623 /*
2624 * what we want to do here is call add_to_page_cache_lru,
2625 * but that isn't exported, so we reproduce it here
2626 */
2627 if (!add_to_page_cache(page, mapping,
2628 page->index, GFP_KERNEL)) {
2629
2630 /* open coding of lru_cache_add, also not exported */
2631 page_cache_get(page);
2632 if (!pagevec_add(&pvec, page))
Chris Mason15916de2008-11-19 21:17:22 -05002633 __pagevec_lru_add_file(&pvec);
Chris Masonf1885912008-04-09 16:28:12 -04002634 __extent_read_full_page(tree, page, get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04002635 &bio, 0, &bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002636 }
2637 page_cache_release(page);
2638 }
2639 if (pagevec_count(&pvec))
Chris Mason15916de2008-11-19 21:17:22 -05002640 __pagevec_lru_add_file(&pvec);
Chris Masond1310b22008-01-24 16:13:08 -05002641 BUG_ON(!list_empty(pages));
2642 if (bio)
Chris Masonc8b97812008-10-29 14:49:59 -04002643 submit_one_bio(READ, bio, 0, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002644 return 0;
2645}
2646EXPORT_SYMBOL(extent_readpages);
2647
2648/*
2649 * basic invalidatepage code, this waits on any locked or writeback
2650 * ranges corresponding to the page, and then deletes any extent state
2651 * records from the tree
2652 */
2653int extent_invalidatepage(struct extent_io_tree *tree,
2654 struct page *page, unsigned long offset)
2655{
2656 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2657 u64 end = start + PAGE_CACHE_SIZE - 1;
2658 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2659
Chris Masond3977122009-01-05 21:25:51 -05002660 start += (offset + blocksize - 1) & ~(blocksize - 1);
Chris Masond1310b22008-01-24 16:13:08 -05002661 if (start > end)
2662 return 0;
2663
2664 lock_extent(tree, start, end, GFP_NOFS);
2665 wait_on_extent_writeback(tree, start, end);
2666 clear_extent_bit(tree, start, end,
2667 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2668 1, 1, GFP_NOFS);
2669 return 0;
2670}
2671EXPORT_SYMBOL(extent_invalidatepage);
2672
2673/*
2674 * simple commit_write call, set_range_dirty is used to mark both
2675 * the pages and the extent records as dirty
2676 */
2677int extent_commit_write(struct extent_io_tree *tree,
2678 struct inode *inode, struct page *page,
2679 unsigned from, unsigned to)
2680{
2681 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2682
2683 set_page_extent_mapped(page);
2684 set_page_dirty(page);
2685
2686 if (pos > inode->i_size) {
2687 i_size_write(inode, pos);
2688 mark_inode_dirty(inode);
2689 }
2690 return 0;
2691}
2692EXPORT_SYMBOL(extent_commit_write);
2693
2694int extent_prepare_write(struct extent_io_tree *tree,
2695 struct inode *inode, struct page *page,
2696 unsigned from, unsigned to, get_extent_t *get_extent)
2697{
2698 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2699 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2700 u64 block_start;
2701 u64 orig_block_start;
2702 u64 block_end;
2703 u64 cur_end;
2704 struct extent_map *em;
2705 unsigned blocksize = 1 << inode->i_blkbits;
2706 size_t page_offset = 0;
2707 size_t block_off_start;
2708 size_t block_off_end;
2709 int err = 0;
2710 int iocount = 0;
2711 int ret = 0;
2712 int isnew;
2713
2714 set_page_extent_mapped(page);
2715
2716 block_start = (page_start + from) & ~((u64)blocksize - 1);
2717 block_end = (page_start + to - 1) | (blocksize - 1);
2718 orig_block_start = block_start;
2719
2720 lock_extent(tree, page_start, page_end, GFP_NOFS);
Chris Masond3977122009-01-05 21:25:51 -05002721 while (block_start <= block_end) {
Chris Masond1310b22008-01-24 16:13:08 -05002722 em = get_extent(inode, page, page_offset, block_start,
2723 block_end - block_start + 1, 1);
Chris Masond3977122009-01-05 21:25:51 -05002724 if (IS_ERR(em) || !em)
Chris Masond1310b22008-01-24 16:13:08 -05002725 goto err;
Chris Masond3977122009-01-05 21:25:51 -05002726
Chris Masond1310b22008-01-24 16:13:08 -05002727 cur_end = min(block_end, extent_map_end(em) - 1);
2728 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2729 block_off_end = block_off_start + blocksize;
2730 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2731
2732 if (!PageUptodate(page) && isnew &&
2733 (block_off_end > to || block_off_start < from)) {
2734 void *kaddr;
2735
2736 kaddr = kmap_atomic(page, KM_USER0);
2737 if (block_off_end > to)
2738 memset(kaddr + to, 0, block_off_end - to);
2739 if (block_off_start < from)
2740 memset(kaddr + block_off_start, 0,
2741 from - block_off_start);
2742 flush_dcache_page(page);
2743 kunmap_atomic(kaddr, KM_USER0);
2744 }
2745 if ((em->block_start != EXTENT_MAP_HOLE &&
2746 em->block_start != EXTENT_MAP_INLINE) &&
2747 !isnew && !PageUptodate(page) &&
2748 (block_off_end > to || block_off_start < from) &&
2749 !test_range_bit(tree, block_start, cur_end,
2750 EXTENT_UPTODATE, 1)) {
2751 u64 sector;
2752 u64 extent_offset = block_start - em->start;
2753 size_t iosize;
2754 sector = (em->block_start + extent_offset) >> 9;
2755 iosize = (cur_end - block_start + blocksize) &
2756 ~((u64)blocksize - 1);
2757 /*
2758 * we've already got the extent locked, but we
2759 * need to split the state such that our end_bio
2760 * handler can clear the lock.
2761 */
2762 set_extent_bit(tree, block_start,
2763 block_start + iosize - 1,
2764 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2765 ret = submit_extent_page(READ, tree, page,
2766 sector, iosize, page_offset, em->bdev,
2767 NULL, 1,
Chris Masonc8b97812008-10-29 14:49:59 -04002768 end_bio_extent_preparewrite, 0,
2769 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002770 iocount++;
2771 block_start = block_start + iosize;
2772 } else {
2773 set_extent_uptodate(tree, block_start, cur_end,
2774 GFP_NOFS);
2775 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2776 block_start = cur_end + 1;
2777 }
2778 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2779 free_extent_map(em);
2780 }
2781 if (iocount) {
2782 wait_extent_bit(tree, orig_block_start,
2783 block_end, EXTENT_LOCKED);
2784 }
2785 check_page_uptodate(tree, page);
2786err:
2787 /* FIXME, zero out newly allocated blocks on error */
2788 return err;
2789}
2790EXPORT_SYMBOL(extent_prepare_write);
2791
2792/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04002793 * a helper for releasepage, this tests for areas of the page that
2794 * are locked or under IO and drops the related state bits if it is safe
2795 * to drop the page.
2796 */
2797int try_release_extent_state(struct extent_map_tree *map,
2798 struct extent_io_tree *tree, struct page *page,
2799 gfp_t mask)
2800{
2801 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2802 u64 end = start + PAGE_CACHE_SIZE - 1;
2803 int ret = 1;
2804
Chris Mason211f90e2008-07-18 11:56:15 -04002805 if (test_range_bit(tree, start, end,
2806 EXTENT_IOBITS | EXTENT_ORDERED, 0))
Chris Mason7b13b7b2008-04-18 10:29:50 -04002807 ret = 0;
2808 else {
2809 if ((mask & GFP_NOFS) == GFP_NOFS)
2810 mask = GFP_NOFS;
2811 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2812 1, 1, mask);
2813 }
2814 return ret;
2815}
2816EXPORT_SYMBOL(try_release_extent_state);
2817
2818/*
Chris Masond1310b22008-01-24 16:13:08 -05002819 * a helper for releasepage. As long as there are no locked extents
2820 * in the range corresponding to the page, both state records and extent
2821 * map records are removed
2822 */
2823int try_release_extent_mapping(struct extent_map_tree *map,
Chris Mason70dec802008-01-29 09:59:12 -05002824 struct extent_io_tree *tree, struct page *page,
2825 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05002826{
2827 struct extent_map *em;
2828 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2829 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04002830
Chris Mason70dec802008-01-29 09:59:12 -05002831 if ((mask & __GFP_WAIT) &&
2832 page->mapping->host->i_size > 16 * 1024 * 1024) {
Yan39b56372008-02-15 10:40:50 -05002833 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05002834 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05002835 len = end - start + 1;
Chris Mason70dec802008-01-29 09:59:12 -05002836 spin_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05002837 em = lookup_extent_mapping(map, start, len);
Chris Mason70dec802008-01-29 09:59:12 -05002838 if (!em || IS_ERR(em)) {
2839 spin_unlock(&map->lock);
2840 break;
2841 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002842 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2843 em->start != start) {
Chris Mason70dec802008-01-29 09:59:12 -05002844 spin_unlock(&map->lock);
2845 free_extent_map(em);
2846 break;
2847 }
2848 if (!test_range_bit(tree, em->start,
2849 extent_map_end(em) - 1,
Chris Masonc8b97812008-10-29 14:49:59 -04002850 EXTENT_LOCKED | EXTENT_WRITEBACK |
2851 EXTENT_ORDERED,
2852 0)) {
Chris Mason70dec802008-01-29 09:59:12 -05002853 remove_extent_mapping(map, em);
2854 /* once for the rb tree */
2855 free_extent_map(em);
2856 }
2857 start = extent_map_end(em);
Chris Masond1310b22008-01-24 16:13:08 -05002858 spin_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002859
2860 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05002861 free_extent_map(em);
2862 }
Chris Masond1310b22008-01-24 16:13:08 -05002863 }
Chris Mason7b13b7b2008-04-18 10:29:50 -04002864 return try_release_extent_state(map, tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002865}
2866EXPORT_SYMBOL(try_release_extent_mapping);
2867
2868sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2869 get_extent_t *get_extent)
2870{
2871 struct inode *inode = mapping->host;
2872 u64 start = iblock << inode->i_blkbits;
2873 sector_t sector = 0;
Yan Zhengd899e052008-10-30 14:25:28 -04002874 size_t blksize = (1 << inode->i_blkbits);
Chris Masond1310b22008-01-24 16:13:08 -05002875 struct extent_map *em;
2876
Yan Zhengd899e052008-10-30 14:25:28 -04002877 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2878 GFP_NOFS);
2879 em = get_extent(inode, NULL, 0, start, blksize, 0);
2880 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2881 GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002882 if (!em || IS_ERR(em))
2883 return 0;
2884
Yan Zhengd899e052008-10-30 14:25:28 -04002885 if (em->block_start > EXTENT_MAP_LAST_BYTE)
Chris Masond1310b22008-01-24 16:13:08 -05002886 goto out;
2887
2888 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
Chris Masond1310b22008-01-24 16:13:08 -05002889out:
2890 free_extent_map(em);
2891 return sector;
2892}
2893
Chris Masond1310b22008-01-24 16:13:08 -05002894static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2895 unsigned long i)
2896{
2897 struct page *p;
2898 struct address_space *mapping;
2899
2900 if (i == 0)
2901 return eb->first_page;
2902 i += eb->start >> PAGE_CACHE_SHIFT;
2903 mapping = eb->first_page->mapping;
Chris Mason33958dc2008-07-30 10:29:12 -04002904 if (!mapping)
2905 return NULL;
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002906
2907 /*
2908 * extent_buffer_page is only called after pinning the page
2909 * by increasing the reference count. So we know the page must
2910 * be in the radix tree.
2911 */
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002912 rcu_read_lock();
Chris Masond1310b22008-01-24 16:13:08 -05002913 p = radix_tree_lookup(&mapping->page_tree, i);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002914 rcu_read_unlock();
Chris Mason2b1f55b2008-09-24 11:48:04 -04002915
Chris Masond1310b22008-01-24 16:13:08 -05002916 return p;
2917}
2918
Chris Mason6af118ce2008-07-22 11:18:07 -04002919static inline unsigned long num_extent_pages(u64 start, u64 len)
Chris Masonce9adaa2008-04-09 16:28:12 -04002920{
Chris Mason6af118ce2008-07-22 11:18:07 -04002921 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2922 (start >> PAGE_CACHE_SHIFT);
Chris Mason728131d2008-04-09 16:28:12 -04002923}
2924
Chris Masond1310b22008-01-24 16:13:08 -05002925static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2926 u64 start,
2927 unsigned long len,
2928 gfp_t mask)
2929{
2930 struct extent_buffer *eb = NULL;
Chris Mason4bef0842008-09-08 11:18:08 -04002931#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04002932 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -04002933#endif
Chris Masond1310b22008-01-24 16:13:08 -05002934
Chris Masond1310b22008-01-24 16:13:08 -05002935 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002936 eb->start = start;
2937 eb->len = len;
Chris Masona61e6f22008-07-22 11:18:08 -04002938 mutex_init(&eb->mutex);
Chris Mason4bef0842008-09-08 11:18:08 -04002939#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04002940 spin_lock_irqsave(&leak_lock, flags);
2941 list_add(&eb->leak_list, &buffers);
2942 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04002943#endif
Chris Masond1310b22008-01-24 16:13:08 -05002944 atomic_set(&eb->refs, 1);
2945
2946 return eb;
2947}
2948
2949static void __free_extent_buffer(struct extent_buffer *eb)
2950{
Chris Mason4bef0842008-09-08 11:18:08 -04002951#ifdef LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04002952 unsigned long flags;
2953 spin_lock_irqsave(&leak_lock, flags);
2954 list_del(&eb->leak_list);
2955 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04002956#endif
Chris Masond1310b22008-01-24 16:13:08 -05002957 kmem_cache_free(extent_buffer_cache, eb);
2958}
2959
2960struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2961 u64 start, unsigned long len,
2962 struct page *page0,
2963 gfp_t mask)
2964{
2965 unsigned long num_pages = num_extent_pages(start, len);
2966 unsigned long i;
2967 unsigned long index = start >> PAGE_CACHE_SHIFT;
2968 struct extent_buffer *eb;
Chris Mason6af118ce2008-07-22 11:18:07 -04002969 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002970 struct page *p;
2971 struct address_space *mapping = tree->mapping;
2972 int uptodate = 1;
2973
Chris Mason6af118ce2008-07-22 11:18:07 -04002974 spin_lock(&tree->buffer_lock);
2975 eb = buffer_search(tree, start);
2976 if (eb) {
2977 atomic_inc(&eb->refs);
2978 spin_unlock(&tree->buffer_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002979 mark_page_accessed(eb->first_page);
Chris Mason6af118ce2008-07-22 11:18:07 -04002980 return eb;
2981 }
2982 spin_unlock(&tree->buffer_lock);
2983
Chris Masond1310b22008-01-24 16:13:08 -05002984 eb = __alloc_extent_buffer(tree, start, len, mask);
Peter2b114d12008-04-01 11:21:40 -04002985 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05002986 return NULL;
2987
Chris Masond1310b22008-01-24 16:13:08 -05002988 if (page0) {
2989 eb->first_page = page0;
2990 i = 1;
2991 index++;
2992 page_cache_get(page0);
2993 mark_page_accessed(page0);
2994 set_page_extent_mapped(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002995 set_page_extent_head(page0, len);
Chris Masonf1885912008-04-09 16:28:12 -04002996 uptodate = PageUptodate(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002997 } else {
2998 i = 0;
2999 }
3000 for (; i < num_pages; i++, index++) {
3001 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
3002 if (!p) {
3003 WARN_ON(1);
Chris Mason6af118ce2008-07-22 11:18:07 -04003004 goto free_eb;
Chris Masond1310b22008-01-24 16:13:08 -05003005 }
3006 set_page_extent_mapped(p);
3007 mark_page_accessed(p);
3008 if (i == 0) {
3009 eb->first_page = p;
3010 set_page_extent_head(p, len);
3011 } else {
3012 set_page_private(p, EXTENT_PAGE_PRIVATE);
3013 }
3014 if (!PageUptodate(p))
3015 uptodate = 0;
3016 unlock_page(p);
3017 }
3018 if (uptodate)
3019 eb->flags |= EXTENT_UPTODATE;
3020 eb->flags |= EXTENT_BUFFER_FILLED;
3021
Chris Mason6af118ce2008-07-22 11:18:07 -04003022 spin_lock(&tree->buffer_lock);
3023 exists = buffer_tree_insert(tree, start, &eb->rb_node);
3024 if (exists) {
3025 /* add one reference for the caller */
3026 atomic_inc(&exists->refs);
3027 spin_unlock(&tree->buffer_lock);
3028 goto free_eb;
3029 }
3030 spin_unlock(&tree->buffer_lock);
3031
3032 /* add one reference for the tree */
3033 atomic_inc(&eb->refs);
Chris Masond1310b22008-01-24 16:13:08 -05003034 return eb;
3035
Chris Mason6af118ce2008-07-22 11:18:07 -04003036free_eb:
Chris Masond1310b22008-01-24 16:13:08 -05003037 if (!atomic_dec_and_test(&eb->refs))
Chris Mason6af118ce2008-07-22 11:18:07 -04003038 return exists;
3039 for (index = 1; index < i; index++)
Chris Masond1310b22008-01-24 16:13:08 -05003040 page_cache_release(extent_buffer_page(eb, index));
Chris Mason6af118ce2008-07-22 11:18:07 -04003041 page_cache_release(extent_buffer_page(eb, 0));
Chris Masond1310b22008-01-24 16:13:08 -05003042 __free_extent_buffer(eb);
Chris Mason6af118ce2008-07-22 11:18:07 -04003043 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05003044}
3045EXPORT_SYMBOL(alloc_extent_buffer);
3046
3047struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3048 u64 start, unsigned long len,
3049 gfp_t mask)
3050{
Chris Masond1310b22008-01-24 16:13:08 -05003051 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -05003052
Chris Mason6af118ce2008-07-22 11:18:07 -04003053 spin_lock(&tree->buffer_lock);
3054 eb = buffer_search(tree, start);
3055 if (eb)
3056 atomic_inc(&eb->refs);
3057 spin_unlock(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -05003058
Josef Bacik0f9dd462008-09-23 13:14:11 -04003059 if (eb)
3060 mark_page_accessed(eb->first_page);
3061
Chris Masond1310b22008-01-24 16:13:08 -05003062 return eb;
Chris Masond1310b22008-01-24 16:13:08 -05003063}
3064EXPORT_SYMBOL(find_extent_buffer);
3065
3066void free_extent_buffer(struct extent_buffer *eb)
3067{
Chris Masond1310b22008-01-24 16:13:08 -05003068 if (!eb)
3069 return;
3070
3071 if (!atomic_dec_and_test(&eb->refs))
3072 return;
3073
Chris Mason6af118ce2008-07-22 11:18:07 -04003074 WARN_ON(1);
Chris Masond1310b22008-01-24 16:13:08 -05003075}
3076EXPORT_SYMBOL(free_extent_buffer);
3077
3078int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3079 struct extent_buffer *eb)
3080{
3081 int set;
3082 unsigned long i;
3083 unsigned long num_pages;
3084 struct page *page;
3085
3086 u64 start = eb->start;
3087 u64 end = start + eb->len - 1;
3088
3089 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
3090 num_pages = num_extent_pages(eb->start, eb->len);
3091
3092 for (i = 0; i < num_pages; i++) {
3093 page = extent_buffer_page(eb, i);
Chris Masond2c3f4f2008-11-19 12:44:22 -05003094 if (!set && !PageDirty(page))
3095 continue;
3096
Chris Masona61e6f22008-07-22 11:18:08 -04003097 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003098 if (i == 0)
3099 set_page_extent_head(page, eb->len);
3100 else
3101 set_page_private(page, EXTENT_PAGE_PRIVATE);
3102
3103 /*
3104 * if we're on the last page or the first page and the
3105 * block isn't aligned on a page boundary, do extra checks
3106 * to make sure we don't clean page that is partially dirty
3107 */
3108 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3109 ((i == num_pages - 1) &&
3110 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3111 start = (u64)page->index << PAGE_CACHE_SHIFT;
3112 end = start + PAGE_CACHE_SIZE - 1;
3113 if (test_range_bit(tree, start, end,
3114 EXTENT_DIRTY, 0)) {
Chris Masona61e6f22008-07-22 11:18:08 -04003115 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003116 continue;
3117 }
3118 }
3119 clear_page_dirty_for_io(page);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003120 spin_lock_irq(&page->mapping->tree_lock);
Chris Masond1310b22008-01-24 16:13:08 -05003121 if (!PageDirty(page)) {
3122 radix_tree_tag_clear(&page->mapping->page_tree,
3123 page_index(page),
3124 PAGECACHE_TAG_DIRTY);
3125 }
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003126 spin_unlock_irq(&page->mapping->tree_lock);
Chris Masona61e6f22008-07-22 11:18:08 -04003127 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003128 }
3129 return 0;
3130}
3131EXPORT_SYMBOL(clear_extent_buffer_dirty);
3132
3133int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3134 struct extent_buffer *eb)
3135{
3136 return wait_on_extent_writeback(tree, eb->start,
3137 eb->start + eb->len - 1);
3138}
3139EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
3140
3141int set_extent_buffer_dirty(struct extent_io_tree *tree,
3142 struct extent_buffer *eb)
3143{
3144 unsigned long i;
3145 unsigned long num_pages;
3146
3147 num_pages = num_extent_pages(eb->start, eb->len);
3148 for (i = 0; i < num_pages; i++) {
3149 struct page *page = extent_buffer_page(eb, i);
3150 /* writepage may need to do something special for the
3151 * first page, we have to make sure page->private is
3152 * properly set. releasepage may drop page->private
3153 * on us if the page isn't already dirty.
3154 */
Chris Masona1b32a52008-09-05 16:09:51 -04003155 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003156 if (i == 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003157 set_page_extent_head(page, eb->len);
3158 } else if (PagePrivate(page) &&
3159 page->private != EXTENT_PAGE_PRIVATE) {
Chris Masond1310b22008-01-24 16:13:08 -05003160 set_page_extent_mapped(page);
Chris Masond1310b22008-01-24 16:13:08 -05003161 }
3162 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
Chris Masona1b32a52008-09-05 16:09:51 -04003163 set_extent_dirty(tree, page_offset(page),
Chris Masond3977122009-01-05 21:25:51 -05003164 page_offset(page) + PAGE_CACHE_SIZE - 1,
Chris Masona1b32a52008-09-05 16:09:51 -04003165 GFP_NOFS);
3166 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003167 }
Chris Masona1b32a52008-09-05 16:09:51 -04003168 return 0;
Chris Masond1310b22008-01-24 16:13:08 -05003169}
3170EXPORT_SYMBOL(set_extent_buffer_dirty);
3171
Chris Mason1259ab72008-05-12 13:39:03 -04003172int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3173 struct extent_buffer *eb)
3174{
3175 unsigned long i;
3176 struct page *page;
3177 unsigned long num_pages;
3178
3179 num_pages = num_extent_pages(eb->start, eb->len);
3180 eb->flags &= ~EXTENT_UPTODATE;
3181
3182 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3183 GFP_NOFS);
3184 for (i = 0; i < num_pages; i++) {
3185 page = extent_buffer_page(eb, i);
Chris Mason33958dc2008-07-30 10:29:12 -04003186 if (page)
3187 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04003188 }
3189 return 0;
3190}
3191
Chris Masond1310b22008-01-24 16:13:08 -05003192int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3193 struct extent_buffer *eb)
3194{
3195 unsigned long i;
3196 struct page *page;
3197 unsigned long num_pages;
3198
3199 num_pages = num_extent_pages(eb->start, eb->len);
3200
3201 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3202 GFP_NOFS);
3203 for (i = 0; i < num_pages; i++) {
3204 page = extent_buffer_page(eb, i);
3205 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3206 ((i == num_pages - 1) &&
3207 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3208 check_page_uptodate(tree, page);
3209 continue;
3210 }
3211 SetPageUptodate(page);
3212 }
3213 return 0;
3214}
3215EXPORT_SYMBOL(set_extent_buffer_uptodate);
3216
Chris Masonce9adaa2008-04-09 16:28:12 -04003217int extent_range_uptodate(struct extent_io_tree *tree,
3218 u64 start, u64 end)
3219{
3220 struct page *page;
3221 int ret;
3222 int pg_uptodate = 1;
3223 int uptodate;
3224 unsigned long index;
3225
3226 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
3227 if (ret)
3228 return 1;
Chris Masond3977122009-01-05 21:25:51 -05003229 while (start <= end) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003230 index = start >> PAGE_CACHE_SHIFT;
3231 page = find_get_page(tree->mapping, index);
3232 uptodate = PageUptodate(page);
3233 page_cache_release(page);
3234 if (!uptodate) {
3235 pg_uptodate = 0;
3236 break;
3237 }
3238 start += PAGE_CACHE_SIZE;
3239 }
3240 return pg_uptodate;
3241}
3242
Chris Masond1310b22008-01-24 16:13:08 -05003243int extent_buffer_uptodate(struct extent_io_tree *tree,
Chris Masonce9adaa2008-04-09 16:28:12 -04003244 struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05003245{
Chris Mason728131d2008-04-09 16:28:12 -04003246 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003247 unsigned long num_pages;
3248 unsigned long i;
Chris Mason728131d2008-04-09 16:28:12 -04003249 struct page *page;
3250 int pg_uptodate = 1;
3251
Chris Masond1310b22008-01-24 16:13:08 -05003252 if (eb->flags & EXTENT_UPTODATE)
Chris Mason42352982008-04-28 16:40:52 -04003253 return 1;
Chris Mason728131d2008-04-09 16:28:12 -04003254
Chris Mason42352982008-04-28 16:40:52 -04003255 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05003256 EXTENT_UPTODATE, 1);
Chris Mason42352982008-04-28 16:40:52 -04003257 if (ret)
3258 return ret;
Chris Mason728131d2008-04-09 16:28:12 -04003259
3260 num_pages = num_extent_pages(eb->start, eb->len);
3261 for (i = 0; i < num_pages; i++) {
3262 page = extent_buffer_page(eb, i);
3263 if (!PageUptodate(page)) {
3264 pg_uptodate = 0;
3265 break;
3266 }
3267 }
Chris Mason42352982008-04-28 16:40:52 -04003268 return pg_uptodate;
Chris Masond1310b22008-01-24 16:13:08 -05003269}
3270EXPORT_SYMBOL(extent_buffer_uptodate);
3271
3272int read_extent_buffer_pages(struct extent_io_tree *tree,
3273 struct extent_buffer *eb,
Chris Masona86c12c2008-02-07 10:50:54 -05003274 u64 start, int wait,
Chris Masonf1885912008-04-09 16:28:12 -04003275 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05003276{
3277 unsigned long i;
3278 unsigned long start_i;
3279 struct page *page;
3280 int err;
3281 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003282 int locked_pages = 0;
3283 int all_uptodate = 1;
3284 int inc_all_pages = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003285 unsigned long num_pages;
Chris Masona86c12c2008-02-07 10:50:54 -05003286 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04003287 unsigned long bio_flags = 0;
Chris Masona86c12c2008-02-07 10:50:54 -05003288
Chris Masond1310b22008-01-24 16:13:08 -05003289 if (eb->flags & EXTENT_UPTODATE)
3290 return 0;
3291
Chris Masonce9adaa2008-04-09 16:28:12 -04003292 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05003293 EXTENT_UPTODATE, 1)) {
3294 return 0;
3295 }
3296
3297 if (start) {
3298 WARN_ON(start < eb->start);
3299 start_i = (start >> PAGE_CACHE_SHIFT) -
3300 (eb->start >> PAGE_CACHE_SHIFT);
3301 } else {
3302 start_i = 0;
3303 }
3304
3305 num_pages = num_extent_pages(eb->start, eb->len);
3306 for (i = start_i; i < num_pages; i++) {
3307 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003308 if (!wait) {
David Woodhouse2db04962008-08-07 11:19:43 -04003309 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04003310 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05003311 } else {
3312 lock_page(page);
3313 }
Chris Masonce9adaa2008-04-09 16:28:12 -04003314 locked_pages++;
Chris Masond3977122009-01-05 21:25:51 -05003315 if (!PageUptodate(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04003316 all_uptodate = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003317 }
3318 if (all_uptodate) {
3319 if (start_i == 0)
3320 eb->flags |= EXTENT_UPTODATE;
3321 goto unlock_exit;
3322 }
3323
3324 for (i = start_i; i < num_pages; i++) {
3325 page = extent_buffer_page(eb, i);
3326 if (inc_all_pages)
3327 page_cache_get(page);
3328 if (!PageUptodate(page)) {
3329 if (start_i == 0)
3330 inc_all_pages = 1;
Chris Masonf1885912008-04-09 16:28:12 -04003331 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05003332 err = __extent_read_full_page(tree, page,
Chris Masonf1885912008-04-09 16:28:12 -04003333 get_extent, &bio,
Chris Masonc8b97812008-10-29 14:49:59 -04003334 mirror_num, &bio_flags);
Chris Masond3977122009-01-05 21:25:51 -05003335 if (err)
Chris Masond1310b22008-01-24 16:13:08 -05003336 ret = err;
Chris Masond1310b22008-01-24 16:13:08 -05003337 } else {
3338 unlock_page(page);
3339 }
3340 }
3341
Chris Masona86c12c2008-02-07 10:50:54 -05003342 if (bio)
Chris Masonc8b97812008-10-29 14:49:59 -04003343 submit_one_bio(READ, bio, mirror_num, bio_flags);
Chris Masona86c12c2008-02-07 10:50:54 -05003344
Chris Masond3977122009-01-05 21:25:51 -05003345 if (ret || !wait)
Chris Masond1310b22008-01-24 16:13:08 -05003346 return ret;
Chris Masond3977122009-01-05 21:25:51 -05003347
Chris Masond1310b22008-01-24 16:13:08 -05003348 for (i = start_i; i < num_pages; i++) {
3349 page = extent_buffer_page(eb, i);
3350 wait_on_page_locked(page);
Chris Masond3977122009-01-05 21:25:51 -05003351 if (!PageUptodate(page))
Chris Masond1310b22008-01-24 16:13:08 -05003352 ret = -EIO;
Chris Masond1310b22008-01-24 16:13:08 -05003353 }
Chris Masond3977122009-01-05 21:25:51 -05003354
Chris Masond1310b22008-01-24 16:13:08 -05003355 if (!ret)
3356 eb->flags |= EXTENT_UPTODATE;
3357 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04003358
3359unlock_exit:
3360 i = start_i;
Chris Masond3977122009-01-05 21:25:51 -05003361 while (locked_pages > 0) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003362 page = extent_buffer_page(eb, i);
3363 i++;
3364 unlock_page(page);
3365 locked_pages--;
3366 }
3367 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05003368}
3369EXPORT_SYMBOL(read_extent_buffer_pages);
3370
3371void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3372 unsigned long start,
3373 unsigned long len)
3374{
3375 size_t cur;
3376 size_t offset;
3377 struct page *page;
3378 char *kaddr;
3379 char *dst = (char *)dstv;
3380 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3381 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05003382
3383 WARN_ON(start > eb->len);
3384 WARN_ON(start + len > eb->start + eb->len);
3385
3386 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3387
Chris Masond3977122009-01-05 21:25:51 -05003388 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003389 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003390
3391 cur = min(len, (PAGE_CACHE_SIZE - offset));
3392 kaddr = kmap_atomic(page, KM_USER1);
3393 memcpy(dst, kaddr + offset, cur);
3394 kunmap_atomic(kaddr, KM_USER1);
3395
3396 dst += cur;
3397 len -= cur;
3398 offset = 0;
3399 i++;
3400 }
3401}
3402EXPORT_SYMBOL(read_extent_buffer);
3403
3404int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3405 unsigned long min_len, char **token, char **map,
3406 unsigned long *map_start,
3407 unsigned long *map_len, int km)
3408{
3409 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3410 char *kaddr;
3411 struct page *p;
3412 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3413 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3414 unsigned long end_i = (start_offset + start + min_len - 1) >>
3415 PAGE_CACHE_SHIFT;
3416
3417 if (i != end_i)
3418 return -EINVAL;
3419
3420 if (i == 0) {
3421 offset = start_offset;
3422 *map_start = 0;
3423 } else {
3424 offset = 0;
3425 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3426 }
Chris Masond3977122009-01-05 21:25:51 -05003427
Chris Masond1310b22008-01-24 16:13:08 -05003428 if (start + min_len > eb->len) {
Chris Masond3977122009-01-05 21:25:51 -05003429 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
3430 "wanted %lu %lu\n", (unsigned long long)eb->start,
3431 eb->len, start, min_len);
Chris Masond1310b22008-01-24 16:13:08 -05003432 WARN_ON(1);
3433 }
3434
3435 p = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003436 kaddr = kmap_atomic(p, km);
3437 *token = kaddr;
3438 *map = kaddr + offset;
3439 *map_len = PAGE_CACHE_SIZE - offset;
3440 return 0;
3441}
3442EXPORT_SYMBOL(map_private_extent_buffer);
3443
3444int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3445 unsigned long min_len,
3446 char **token, char **map,
3447 unsigned long *map_start,
3448 unsigned long *map_len, int km)
3449{
3450 int err;
3451 int save = 0;
3452 if (eb->map_token) {
3453 unmap_extent_buffer(eb, eb->map_token, km);
3454 eb->map_token = NULL;
3455 save = 1;
Chris Mason934d3752008-12-08 16:43:10 -05003456 WARN_ON(!mutex_is_locked(&eb->mutex));
Chris Masond1310b22008-01-24 16:13:08 -05003457 }
3458 err = map_private_extent_buffer(eb, start, min_len, token, map,
3459 map_start, map_len, km);
3460 if (!err && save) {
3461 eb->map_token = *token;
3462 eb->kaddr = *map;
3463 eb->map_start = *map_start;
3464 eb->map_len = *map_len;
3465 }
3466 return err;
3467}
3468EXPORT_SYMBOL(map_extent_buffer);
3469
3470void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3471{
3472 kunmap_atomic(token, km);
3473}
3474EXPORT_SYMBOL(unmap_extent_buffer);
3475
3476int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3477 unsigned long start,
3478 unsigned long len)
3479{
3480 size_t cur;
3481 size_t offset;
3482 struct page *page;
3483 char *kaddr;
3484 char *ptr = (char *)ptrv;
3485 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3486 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3487 int ret = 0;
3488
3489 WARN_ON(start > eb->len);
3490 WARN_ON(start + len > eb->start + eb->len);
3491
3492 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3493
Chris Masond3977122009-01-05 21:25:51 -05003494 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003495 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003496
3497 cur = min(len, (PAGE_CACHE_SIZE - offset));
3498
3499 kaddr = kmap_atomic(page, KM_USER0);
3500 ret = memcmp(ptr, kaddr + offset, cur);
3501 kunmap_atomic(kaddr, KM_USER0);
3502 if (ret)
3503 break;
3504
3505 ptr += cur;
3506 len -= cur;
3507 offset = 0;
3508 i++;
3509 }
3510 return ret;
3511}
3512EXPORT_SYMBOL(memcmp_extent_buffer);
3513
3514void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3515 unsigned long start, unsigned long len)
3516{
3517 size_t cur;
3518 size_t offset;
3519 struct page *page;
3520 char *kaddr;
3521 char *src = (char *)srcv;
3522 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3523 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3524
3525 WARN_ON(start > eb->len);
3526 WARN_ON(start + len > eb->start + eb->len);
3527
3528 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3529
Chris Masond3977122009-01-05 21:25:51 -05003530 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003531 page = extent_buffer_page(eb, i);
3532 WARN_ON(!PageUptodate(page));
3533
3534 cur = min(len, PAGE_CACHE_SIZE - offset);
3535 kaddr = kmap_atomic(page, KM_USER1);
3536 memcpy(kaddr + offset, src, cur);
3537 kunmap_atomic(kaddr, KM_USER1);
3538
3539 src += cur;
3540 len -= cur;
3541 offset = 0;
3542 i++;
3543 }
3544}
3545EXPORT_SYMBOL(write_extent_buffer);
3546
3547void memset_extent_buffer(struct extent_buffer *eb, char c,
3548 unsigned long start, unsigned long len)
3549{
3550 size_t cur;
3551 size_t offset;
3552 struct page *page;
3553 char *kaddr;
3554 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3555 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3556
3557 WARN_ON(start > eb->len);
3558 WARN_ON(start + len > eb->start + eb->len);
3559
3560 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3561
Chris Masond3977122009-01-05 21:25:51 -05003562 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003563 page = extent_buffer_page(eb, i);
3564 WARN_ON(!PageUptodate(page));
3565
3566 cur = min(len, PAGE_CACHE_SIZE - offset);
3567 kaddr = kmap_atomic(page, KM_USER0);
3568 memset(kaddr + offset, c, cur);
3569 kunmap_atomic(kaddr, KM_USER0);
3570
3571 len -= cur;
3572 offset = 0;
3573 i++;
3574 }
3575}
3576EXPORT_SYMBOL(memset_extent_buffer);
3577
3578void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3579 unsigned long dst_offset, unsigned long src_offset,
3580 unsigned long len)
3581{
3582 u64 dst_len = dst->len;
3583 size_t cur;
3584 size_t offset;
3585 struct page *page;
3586 char *kaddr;
3587 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3588 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3589
3590 WARN_ON(src->len != dst_len);
3591
3592 offset = (start_offset + dst_offset) &
3593 ((unsigned long)PAGE_CACHE_SIZE - 1);
3594
Chris Masond3977122009-01-05 21:25:51 -05003595 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003596 page = extent_buffer_page(dst, i);
3597 WARN_ON(!PageUptodate(page));
3598
3599 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3600
3601 kaddr = kmap_atomic(page, KM_USER0);
3602 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3603 kunmap_atomic(kaddr, KM_USER0);
3604
3605 src_offset += cur;
3606 len -= cur;
3607 offset = 0;
3608 i++;
3609 }
3610}
3611EXPORT_SYMBOL(copy_extent_buffer);
3612
3613static void move_pages(struct page *dst_page, struct page *src_page,
3614 unsigned long dst_off, unsigned long src_off,
3615 unsigned long len)
3616{
3617 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3618 if (dst_page == src_page) {
3619 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3620 } else {
3621 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3622 char *p = dst_kaddr + dst_off + len;
3623 char *s = src_kaddr + src_off + len;
3624
3625 while (len--)
3626 *--p = *--s;
3627
3628 kunmap_atomic(src_kaddr, KM_USER1);
3629 }
3630 kunmap_atomic(dst_kaddr, KM_USER0);
3631}
3632
3633static void copy_pages(struct page *dst_page, struct page *src_page,
3634 unsigned long dst_off, unsigned long src_off,
3635 unsigned long len)
3636{
3637 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3638 char *src_kaddr;
3639
3640 if (dst_page != src_page)
3641 src_kaddr = kmap_atomic(src_page, KM_USER1);
3642 else
3643 src_kaddr = dst_kaddr;
3644
3645 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3646 kunmap_atomic(dst_kaddr, KM_USER0);
3647 if (dst_page != src_page)
3648 kunmap_atomic(src_kaddr, KM_USER1);
3649}
3650
3651void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3652 unsigned long src_offset, unsigned long len)
3653{
3654 size_t cur;
3655 size_t dst_off_in_page;
3656 size_t src_off_in_page;
3657 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3658 unsigned long dst_i;
3659 unsigned long src_i;
3660
3661 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003662 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3663 "len %lu dst len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003664 BUG_ON(1);
3665 }
3666 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003667 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3668 "len %lu dst len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003669 BUG_ON(1);
3670 }
3671
Chris Masond3977122009-01-05 21:25:51 -05003672 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003673 dst_off_in_page = (start_offset + dst_offset) &
3674 ((unsigned long)PAGE_CACHE_SIZE - 1);
3675 src_off_in_page = (start_offset + src_offset) &
3676 ((unsigned long)PAGE_CACHE_SIZE - 1);
3677
3678 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3679 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3680
3681 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3682 src_off_in_page));
3683 cur = min_t(unsigned long, cur,
3684 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3685
3686 copy_pages(extent_buffer_page(dst, dst_i),
3687 extent_buffer_page(dst, src_i),
3688 dst_off_in_page, src_off_in_page, cur);
3689
3690 src_offset += cur;
3691 dst_offset += cur;
3692 len -= cur;
3693 }
3694}
3695EXPORT_SYMBOL(memcpy_extent_buffer);
3696
3697void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3698 unsigned long src_offset, unsigned long len)
3699{
3700 size_t cur;
3701 size_t dst_off_in_page;
3702 size_t src_off_in_page;
3703 unsigned long dst_end = dst_offset + len - 1;
3704 unsigned long src_end = src_offset + len - 1;
3705 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3706 unsigned long dst_i;
3707 unsigned long src_i;
3708
3709 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003710 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3711 "len %lu len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003712 BUG_ON(1);
3713 }
3714 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003715 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3716 "len %lu len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003717 BUG_ON(1);
3718 }
3719 if (dst_offset < src_offset) {
3720 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3721 return;
3722 }
Chris Masond3977122009-01-05 21:25:51 -05003723 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003724 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3725 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3726
3727 dst_off_in_page = (start_offset + dst_end) &
3728 ((unsigned long)PAGE_CACHE_SIZE - 1);
3729 src_off_in_page = (start_offset + src_end) &
3730 ((unsigned long)PAGE_CACHE_SIZE - 1);
3731
3732 cur = min_t(unsigned long, len, src_off_in_page + 1);
3733 cur = min(cur, dst_off_in_page + 1);
3734 move_pages(extent_buffer_page(dst, dst_i),
3735 extent_buffer_page(dst, src_i),
3736 dst_off_in_page - cur + 1,
3737 src_off_in_page - cur + 1, cur);
3738
3739 dst_end -= cur;
3740 src_end -= cur;
3741 len -= cur;
3742 }
3743}
3744EXPORT_SYMBOL(memmove_extent_buffer);
Chris Mason6af118ce2008-07-22 11:18:07 -04003745
3746int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3747{
3748 u64 start = page_offset(page);
3749 struct extent_buffer *eb;
3750 int ret = 1;
3751 unsigned long i;
3752 unsigned long num_pages;
3753
3754 spin_lock(&tree->buffer_lock);
3755 eb = buffer_search(tree, start);
3756 if (!eb)
3757 goto out;
3758
3759 if (atomic_read(&eb->refs) > 1) {
3760 ret = 0;
3761 goto out;
3762 }
3763 /* at this point we can safely release the extent buffer */
3764 num_pages = num_extent_pages(eb->start, eb->len);
Christoph Hellwigb2141072008-09-05 16:43:31 -04003765 for (i = 0; i < num_pages; i++)
3766 page_cache_release(extent_buffer_page(eb, i));
Chris Mason6af118ce2008-07-22 11:18:07 -04003767 rb_erase(&eb->rb_node, &tree->buffer);
3768 __free_extent_buffer(eb);
3769out:
3770 spin_unlock(&tree->buffer_lock);
3771 return ret;
3772}
3773EXPORT_SYMBOL(try_release_extent_buffer);